From 88eb084d18c6124acccfe06edfc161dfa11bb34b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@google.com>
Date: Tue, 16 Jul 2024 16:32:09 +0000
Subject: [PATCH] Revert "Merge 5.10.220 into android12-5.10-lts"

This reverts commit 87a7f35a248737adec8257a65ec4cb6ee9523f0b, reversing
changes made to 640645c85ba551dc98a3cd56f51be40c707e10fb.

5.10.220 is a bunch of vfs and nfs changes that are not needed in
Android systems, so revert the whole lot all at once, except for the
version number bump.

Change-Id: If28dc2231f27d326d3730716f23545dd0a2cdc75
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
---
 Documentation/filesystems/files.rst          |    8 +-
 Documentation/filesystems/locking.rst        |   10 +-
 Documentation/filesystems/nfs/exporting.rst  |   78 -
 arch/powerpc/platforms/cell/spufs/coredump.c |    2 +-
 crypto/algboss.c                             |    4 +-
 fs/Kconfig                                   |    6 +-
 fs/autofs/dev-ioctl.c                        |    5 +-
 fs/cachefiles/namei.c                        |    9 +-
 fs/cifs/connect.c                            |    2 +-
 fs/coredump.c                                |    5 +-
 fs/ecryptfs/inode.c                          |   10 +-
 fs/exec.c                                    |   29 +-
 fs/exportfs/expfs.c                          |   40 +-
 fs/file.c                                    |  177 +-
 fs/init.c                                    |    6 +-
 fs/lockd/clnt4xdr.c                          |    9 +-
 fs/lockd/clntproc.c                          |    3 +
 fs/lockd/host.c                              |    4 +-
 fs/lockd/svc.c                               |  260 +-
 fs/lockd/svc4proc.c                          |   70 +-
 fs/lockd/svclock.c                           |   67 +-
 fs/lockd/svcproc.c                           |   62 +-
 fs/lockd/svcsubs.c                           |  123 +-
 fs/lockd/svcxdr.h                            |  142 -
 fs/lockd/xdr.c                               |  448 ++-
 fs/lockd/xdr4.c                              |  462 +--
 fs/locks.c                                   |  102 +-
 fs/namei.c                                   |   21 +-
 fs/nfs/blocklayout/blocklayout.c             |    2 +-
 fs/nfs/blocklayout/dev.c                     |    2 +-
 fs/nfs/callback.c                            |  111 +-
 fs/nfs/callback_xdr.c                        |   33 +-
 fs/nfs/dir.c                                 |    2 +-
 fs/nfs/export.c                              |   17 -
 fs/nfs/file.c                                |    3 -
 fs/nfs/filelayout/filelayout.c               |    4 +-
 fs/nfs/filelayout/filelayoutdev.c            |    2 +-
 fs/nfs/flexfilelayout/flexfilelayout.c       |    4 +-
 fs/nfs/flexfilelayout/flexfilelayoutdev.c    |    2 +-
 fs/nfs/nfs42xdr.c                            |    2 +-
 fs/nfs/nfs4state.c                           |    2 +-
 fs/nfs/nfs4xdr.c                             |    6 +-
 fs/nfs/pagelist.c                            |    3 +
 fs/nfs/super.c                               |    8 -
 fs/nfs/write.c                               |    3 +
 fs/nfs_common/Makefile                       |    2 +-
 fs/nfs_common/nfs_ssc.c                      |    2 +
 fs/nfs_common/nfsacl.c                       |  123 -
 fs/nfsd/Kconfig                              |   40 +-
 fs/nfsd/Makefile                             |    8 +-
 fs/nfsd/acl.h                                |    6 +-
 fs/nfsd/blocklayout.c                        |    1 -
 fs/nfsd/blocklayoutxdr.c                     |    1 -
 fs/nfsd/cache.h                              |    2 +-
 fs/nfsd/export.c                             |   74 +-
 fs/nfsd/export.h                             |   16 +-
 fs/nfsd/filecache.c                          | 1217 +++---
 fs/nfsd/filecache.h                          |   23 +-
 fs/nfsd/flexfilelayout.c                     |    3 +-
 fs/nfsd/lockd.c                              |   10 +-
 fs/nfsd/netns.h                              |   63 +-
 fs/nfsd/nfs2acl.c                            |  214 +-
 fs/nfsd/nfs3acl.c                            |  140 +-
 fs/nfsd/nfs3proc.c                           |  402 +-
 fs/nfsd/nfs3xdr.c                            | 1801 ++++-----
 fs/nfsd/nfs4acl.c                            |   45 +-
 fs/nfsd/nfs4callback.c                       |  168 +-
 fs/nfsd/nfs4idmap.c                          |    9 +-
 fs/nfsd/nfs4layouts.c                        |    4 +-
 fs/nfsd/nfs4proc.c                           | 1111 ++----
 fs/nfsd/nfs4recover.c                        |   20 +-
 fs/nfsd/nfs4state.c                          | 1715 +++-----
 fs/nfsd/nfs4xdr.c                            | 3771 +++++++++---------
 fs/nfsd/nfscache.c                           |  115 +-
 fs/nfsd/nfsctl.c                             |  169 +-
 fs/nfsd/nfsd.h                               |   50 +-
 fs/nfsd/nfsfh.c                              |  291 +-
 fs/nfsd/nfsfh.h                              |  179 +-
 fs/nfsd/nfsproc.c                            |  262 +-
 fs/nfsd/nfssvc.c                             |  356 +-
 fs/nfsd/nfsxdr.c                             |  842 ++--
 fs/nfsd/state.h                              |   69 +-
 fs/nfsd/stats.c                              |  124 +-
 fs/nfsd/stats.h                              |   98 +-
 fs/nfsd/trace.c                              |    1 -
 fs/nfsd/trace.h                              |  898 +----
 fs/nfsd/vfs.c                                |  933 +++--
 fs/nfsd/vfs.h                                |   62 +-
 fs/nfsd/xdr.h                                |   68 +-
 fs/nfsd/xdr3.h                               |  116 +-
 fs/nfsd/xdr4.h                               |  127 +-
 fs/nfsd/xdr4cb.h                             |    6 -
 fs/notify/dnotify/dnotify.c                  |   17 +-
 fs/notify/fanotify/fanotify.c                |  487 +--
 fs/notify/fanotify/fanotify.h                |  252 +-
 fs/notify/fanotify/fanotify_user.c           |  886 +---
 fs/notify/fdinfo.c                           |   19 +-
 fs/notify/fsnotify.c                         |  183 +-
 fs/notify/fsnotify.h                         |   19 +-
 fs/notify/group.c                            |   38 +-
 fs/notify/inotify/inotify.h                  |   11 +-
 fs/notify/inotify/inotify_fsnotify.c         |   12 +-
 fs/notify/inotify/inotify_user.c             |   87 +-
 fs/notify/mark.c                             |  172 +-
 fs/notify/notification.c                     |   72 +-
 fs/open.c                                    |   49 +-
 fs/overlayfs/overlayfs.h                     |    9 +-
 fs/proc/fd.c                                 |   48 +-
 fs/udf/file.c                                |    2 +-
 fs/verity/enable.c                           |    2 +-
 include/linux/dnotify.h                      |    2 +-
 include/linux/errno.h                        |    1 -
 include/linux/exportfs.h                     |   15 -
 include/linux/fanotify.h                     |   74 +-
 include/linux/fdtable.h                      |   37 +-
 include/linux/fs.h                           |   54 +-
 include/linux/fsnotify.h                     |   77 +-
 include/linux/fsnotify_backend.h             |  372 +-
 include/linux/iversion.h                     |   13 -
 include/linux/kallsyms.h                     |   17 +-
 include/linux/kthread.h                      |    1 -
 include/linux/lockd/bind.h                   |    3 +-
 include/linux/lockd/lockd.h                  |   17 +-
 include/linux/lockd/xdr.h                    |   35 +-
 include/linux/lockd/xdr4.h                   |   33 +-
 include/linux/module.h                       |   24 +-
 include/linux/nfs.h                          |    8 +
 include/linux/nfs4.h                         |   21 +-
 include/linux/nfs_ssc.h                      |   14 -
 include/linux/nfsacl.h                       |    6 -
 include/linux/pid.h                          |    1 -
 include/linux/sched/user.h                   |    3 +
 include/linux/sunrpc/msg_prot.h              |    3 +
 include/linux/sunrpc/svc.h                   |  151 +-
 include/linux/sunrpc/svc_rdma.h              |    4 +-
 include/linux/sunrpc/svc_xprt.h              |   16 +-
 include/linux/sunrpc/svcauth.h               |    4 +-
 include/linux/sunrpc/svcsock.h               |    7 +-
 include/linux/sunrpc/xdr.h                   |  153 +-
 include/linux/syscalls.h                     |   12 +
 include/linux/sysctl.h                       |    2 -
 include/linux/user_namespace.h               |    4 -
 include/trace/events/sunrpc.h                |   26 +-
 include/uapi/linux/fanotify.h                |   42 -
 include/uapi/linux/nfs3.h                    |    6 -
 include/uapi/linux/nfsd/nfsfh.h              |  105 +
 kernel/audit_fsnotify.c                      |    8 +-
 kernel/audit_tree.c                          |    2 +-
 kernel/audit_watch.c                         |    5 +-
 kernel/bpf/inode.c                           |    2 +-
 kernel/bpf/syscall.c                         |   20 +-
 kernel/bpf/task_iter.c                       |    2 +-
 kernel/fork.c                                |   12 +-
 kernel/kallsyms.c                            |    8 +-
 kernel/kcmp.c                                |   29 +-
 kernel/kthread.c                             |   23 +-
 kernel/livepatch/core.c                      |    7 +-
 kernel/module.c                              |   24 +-
 kernel/pid.c                                 |   15 +-
 kernel/sys.c                                 |    2 +-
 kernel/sysctl.c                              |   54 +-
 kernel/trace/trace_kprobe.c                  |    4 +-
 kernel/ucount.c                              |    4 -
 mm/madvise.c                                 |    2 +-
 mm/memcontrol.c                              |    2 +-
 mm/mincore.c                                 |    2 +-
 net/bluetooth/bnep/core.c                    |    2 +-
 net/bluetooth/cmtp/core.c                    |    2 +-
 net/bluetooth/hidp/core.c                    |    2 +-
 net/sunrpc/auth_gss/gss_rpc_xdr.c            |    2 +-
 net/sunrpc/auth_gss/svcauth_gss.c            |   47 +-
 net/sunrpc/sched.c                           |    1 -
 net/sunrpc/svc.c                             |  314 +-
 net/sunrpc/svc_xprt.c                        |  104 +-
 net/sunrpc/svcauth.c                         |    8 +-
 net/sunrpc/svcauth_unix.c                    |   18 +-
 net/sunrpc/svcsock.c                         |   32 +-
 net/sunrpc/xdr.c                             |  112 +-
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c   |    2 +-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c        |   32 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c     |    2 +-
 net/unix/af_unix.c                           |    2 +-
 tools/objtool/check.c                        |    3 +-
 183 files changed, 8839 insertions(+), 13928 deletions(-)
 delete mode 100644 fs/lockd/svcxdr.h
 create mode 100644 include/uapi/linux/nfsd/nfsfh.h

diff --git a/Documentation/filesystems/files.rst b/Documentation/filesystems/files.rst
index bcf84459917f..cbf8e57376bf 100644
--- a/Documentation/filesystems/files.rst
+++ b/Documentation/filesystems/files.rst
@@ -62,7 +62,7 @@ the fdtable structure -
    be held.
 
 4. To look up the file structure given an fd, a reader
-   must use either lookup_fd_rcu() or files_lookup_fd_rcu() APIs. These
+   must use either fcheck() or fcheck_files() APIs. These
    take care of barrier requirements due to lock-free lookup.
 
    An example::
@@ -70,7 +70,7 @@ the fdtable structure -
 	struct file *file;
 
 	rcu_read_lock();
-	file = lookup_fd_rcu(fd);
+	file = fcheck(fd);
 	if (file) {
 		...
 	}
@@ -84,7 +84,7 @@ the fdtable structure -
    on ->f_count::
 
 	rcu_read_lock();
-	file = files_lookup_fd_rcu(files, fd);
+	file = fcheck_files(files, fd);
 	if (file) {
 		if (atomic_long_inc_not_zero(&file->f_count))
 			*fput_needed = 1;
@@ -104,7 +104,7 @@ the fdtable structure -
    lock-free, they must be installed using rcu_assign_pointer()
    API. If they are looked up lock-free, rcu_dereference()
    must be used. However it is advisable to use files_fdtable()
-   and lookup_fd_rcu()/files_lookup_fd_rcu() which take care of these issues.
+   and fcheck()/fcheck_files() which take care of these issues.
 
 7. While updating, the fdtable pointer must be looked up while
    holding files->file_lock. If ->file_lock is dropped, then
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 5db6dec0b423..18d93fc7dc46 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -433,21 +433,17 @@ prototypes::
 	void (*lm_break)(struct file_lock *); /* break_lease callback */
 	int (*lm_change)(struct file_lock **, int);
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
-        bool (*lm_lock_expirable)(struct file_lock *);
-        void (*lm_expire_lock)(void);
 
 locking rules:
 
 ======================	=============	=================	=========
-ops			   flc_lock  	blocked_lock_lock	may block
+ops			inode->i_lock	blocked_lock_lock	may block
 ======================	=============	=================	=========
-lm_notify:		no      	yes			no
+lm_notify:		yes		yes			no
 lm_grant:		no		no			no
 lm_break:		yes		no			no
 lm_change		yes		no			no
-lm_breaker_owns_lease:	yes     	no			no
-lm_lock_expirable	yes		no			no
-lm_expire_lock		no		no			yes
+lm_breaker_owns_lease:	no		no			no
 ======================	=============	=================	=========
 
 buffer_head
diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
index 6f59a364f84c..33d588a01ace 100644
--- a/Documentation/filesystems/nfs/exporting.rst
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -154,11 +154,6 @@ struct which has the following members:
     to find potential names, and matches inode numbers to find the correct
     match.
 
-  flags
-    Some filesystems may need to be handled differently than others. The
-    export_operations struct also includes a flags field that allows the
-    filesystem to communicate such information to nfsd. See the Export
-    Operations Flags section below for more explanation.
 
 A filehandle fragment consists of an array of 1 or more 4byte words,
 together with a one byte "type".
@@ -168,76 +163,3 @@ generated by encode_fh, in which case it will have been padded with
 nuls.  Rather, the encode_fh routine should choose a "type" which
 indicates the decode_fh how much of the filehandle is valid, and how
 it should be interpreted.
-
-Export Operations Flags
------------------------
-In addition to the operation vector pointers, struct export_operations also
-contains a "flags" field that allows the filesystem to communicate to nfsd
-that it may want to do things differently when dealing with it. The
-following flags are defined:
-
-  EXPORT_OP_NOWCC - disable NFSv3 WCC attributes on this filesystem
-    RFC 1813 recommends that servers always send weak cache consistency
-    (WCC) data to the client after each operation. The server should
-    atomically collect attributes about the inode, do an operation on it,
-    and then collect the attributes afterward. This allows the client to
-    skip issuing GETATTRs in some situations but means that the server
-    is calling vfs_getattr for almost all RPCs. On some filesystems
-    (particularly those that are clustered or networked) this is expensive
-    and atomicity is difficult to guarantee. This flag indicates to nfsd
-    that it should skip providing WCC attributes to the client in NFSv3
-    replies when doing operations on this filesystem. Consider enabling
-    this on filesystems that have an expensive ->getattr inode operation,
-    or when atomicity between pre and post operation attribute collection
-    is impossible to guarantee.
-
-  EXPORT_OP_NOSUBTREECHK - disallow subtree checking on this fs
-    Many NFS operations deal with filehandles, which the server must then
-    vet to ensure that they live inside of an exported tree. When the
-    export consists of an entire filesystem, this is trivial. nfsd can just
-    ensure that the filehandle live on the filesystem. When only part of a
-    filesystem is exported however, then nfsd must walk the ancestors of the
-    inode to ensure that it's within an exported subtree. This is an
-    expensive operation and not all filesystems can support it properly.
-    This flag exempts the filesystem from subtree checking and causes
-    exportfs to get back an error if it tries to enable subtree checking
-    on it.
-
-  EXPORT_OP_CLOSE_BEFORE_UNLINK - always close cached files before unlinking
-    On some exportable filesystems (such as NFS) unlinking a file that
-    is still open can cause a fair bit of extra work. For instance,
-    the NFS client will do a "sillyrename" to ensure that the file
-    sticks around while it's still open. When reexporting, that open
-    file is held by nfsd so we usually end up doing a sillyrename, and
-    then immediately deleting the sillyrenamed file just afterward when
-    the link count actually goes to zero. Sometimes this delete can race
-    with other operations (for instance an rmdir of the parent directory).
-    This flag causes nfsd to close any open files for this inode _before_
-    calling into the vfs to do an unlink or a rename that would replace
-    an existing file.
-
-  EXPORT_OP_REMOTE_FS - Backing storage for this filesystem is remote
-    PF_LOCAL_THROTTLE exists for loopback NFSD, where a thread needs to
-    write to one bdi (the final bdi) in order to free up writes queued
-    to another bdi (the client bdi). Such threads get a private balance
-    of dirty pages so that dirty pages for the client bdi do not imact
-    the daemon writing to the final bdi. For filesystems whose durable
-    storage is not local (such as exported NFS filesystems), this
-    constraint has negative consequences. EXPORT_OP_REMOTE_FS enables
-    an export to disable writeback throttling.
-
-  EXPORT_OP_NOATOMIC_ATTR - Filesystem does not update attributes atomically
-    EXPORT_OP_NOATOMIC_ATTR indicates that the exported filesystem
-    cannot provide the semantics required by the "atomic" boolean in
-    NFSv4's change_info4. This boolean indicates to a client whether the
-    returned before and after change attributes were obtained atomically
-    with the respect to the requested metadata operation (UNLINK,
-    OPEN/CREATE, MKDIR, etc).
-
-  EXPORT_OP_FLUSH_ON_CLOSE - Filesystem flushes file data on close(2)
-    On most filesystems, inodes can remain under writeback after the
-    file is closed. NFSD relies on client activity or local flusher
-    threads to handle writeback. Certain filesystems, such as NFS, flush
-    all of an inode's dirty data on last close. Exports that behave this
-    way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
-    waiting for writeback when closing such files.
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 60b5583e9eaf..026c181a98c5 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -74,7 +74,7 @@ static struct spu_context *coredump_next_context(int *fd)
 	*fd = n - 1;
 
 	rcu_read_lock();
-	file = lookup_fd_rcu(*fd);
+	file = fcheck(*fd);
 	ctx = SPUFS_I(file_inode(file))->i_ctx;
 	get_spu_context(ctx);
 	rcu_read_unlock();
diff --git a/crypto/algboss.c b/crypto/algboss.c
index b87f907bb142..5ebccbd6b74e 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -74,7 +74,7 @@ out:
 	complete_all(&param->larval->completion);
 	crypto_alg_put(&param->larval->alg);
 	kfree(param);
-	module_put_and_kthread_exit(0);
+	module_put_and_exit(0);
 }
 
 static int cryptomgr_schedule_probe(struct crypto_larval *larval)
@@ -209,7 +209,7 @@ skiptest:
 	crypto_alg_tested(param->driver, err);
 
 	kfree(param);
-	module_put_and_kthread_exit(0);
+	module_put_and_exit(0);
 }
 
 static int cryptomgr_schedule_test(struct crypto_alg *alg)
diff --git a/fs/Kconfig b/fs/Kconfig
index d34b8227c772..a6a721108d1c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -321,7 +321,7 @@ config LOCKD
 
 config LOCKD_V4
 	bool
-	depends on NFSD || NFS_V3
+	depends on NFSD_V3 || NFS_V3
 	depends on FILE_LOCKING
 	default y
 
@@ -334,10 +334,6 @@ config NFS_COMMON
 	depends on NFSD || NFS_FS || LOCKD
 	default y
 
-config NFS_V4_2_SSC_HELPER
-	bool
-	default y if NFS_V4_2
-
 source "net/sunrpc/Kconfig"
 source "fs/ceph/Kconfig"
 source "fs/cifs/Kconfig"
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index 5bf781ea6d67..322b7dfb4ea0 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -4,10 +4,9 @@
  * Copyright 2008 Ian Kent <raven@themaw.net>
  */
 
-#include <linux/module.h>
 #include <linux/miscdevice.h>
 #include <linux/compat.h>
-#include <linux/fdtable.h>
+#include <linux/syscalls.h>
 #include <linux/magic.h>
 #include <linux/nospec.h>
 
@@ -290,7 +289,7 @@ static int autofs_dev_ioctl_closemount(struct file *fp,
 				       struct autofs_sb_info *sbi,
 				       struct autofs_dev_ioctl *param)
 {
-	return close_fd(param->ioctlfd);
+	return ksys_close(param->ioctlfd);
 }
 
 /*
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 7b987de0babe..ecc8ecbbfa5a 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -412,14 +412,9 @@ try_again:
 	if (ret < 0) {
 		cachefiles_io_error(cache, "Rename security error %d", ret);
 	} else {
-		struct renamedata rd = {
-			.old_dir	= d_inode(dir),
-			.old_dentry	= rep,
-			.new_dir	= d_inode(cache->graveyard),
-			.new_dentry	= grave,
-		};
 		trace_cachefiles_rename(object, rep, grave, why);
-		ret = vfs_rename(&rd);
+		ret = vfs_rename(d_inode(dir), rep,
+				 d_inode(cache->graveyard), grave, NULL, 0);
 		if (ret != 0 && ret != -ENOMEM)
 			cachefiles_io_error(cache,
 					    "Rename failed with error %d", ret);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a3c0e6a4e484..164b98540716 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1242,7 +1242,7 @@ next_pdu:
 	}
 
 	memalloc_noreclaim_restore(noreclaim_flag);
-	module_put_and_kthread_exit(0);
+	module_put_and_exit(0);
 }
 
 /* extract the host portion of the UNC string */
diff --git a/fs/coredump.c b/fs/coredump.c
index ca4802d14158..7c5edadf5208 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -587,6 +587,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 	int ispipe;
 	size_t *argv = NULL;
 	int argc = 0;
+	struct files_struct *displaced;
 	/* require nonrelative corefile path and be extra careful */
 	bool need_suid_safe = false;
 	bool core_dumped = false;
@@ -792,9 +793,11 @@ void do_coredump(const kernel_siginfo_t *siginfo)
 	}
 
 	/* get us an unshared descriptor table; almost always a no-op */
-	retval = unshare_files();
+	retval = unshare_files(&displaced);
 	if (retval)
 		goto close_fail;
+	if (displaced)
+		put_files_struct(displaced);
 	if (!dump_interrupted()) {
 		/*
 		 * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index cd1a60a319b8..7777bb6f66d2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -598,7 +598,6 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct dentry *lower_new_dir_dentry;
 	struct dentry *trap;
 	struct inode *target_inode;
-	struct renamedata rd = {};
 
 	if (flags)
 		return -EINVAL;
@@ -628,12 +627,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		rc = -ENOTEMPTY;
 		goto out_lock;
 	}
-
-	rd.old_dir	= d_inode(lower_old_dir_dentry);
-	rd.old_dentry	= lower_old_dentry;
-	rd.new_dir	= d_inode(lower_new_dir_dentry);
-	rd.new_dentry	= lower_new_dentry;
-	rc = vfs_rename(&rd);
+	rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry,
+			d_inode(lower_new_dir_dentry), lower_new_dentry,
+			NULL, 0);
 	if (rc)
 		goto out_lock;
 	if (target_inode)
diff --git a/fs/exec.c b/fs/exec.c
index 398ccf06d799..4edc932a7dce 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1266,11 +1266,6 @@ int begin_new_exec(struct linux_binprm * bprm)
 	if (retval)
 		goto out;
 
-	/* Ensure the files table is not shared. */
-	retval = unshare_files();
-	if (retval)
-		goto out;
-
 	/*
 	 * Must be called _before_ exec_mmap() as bprm->mm is
 	 * not visibile until then. This also enables the update
@@ -1796,6 +1791,7 @@ static int bprm_execve(struct linux_binprm *bprm,
 		       int fd, struct filename *filename, int flags)
 {
 	struct file *file;
+	struct files_struct *displaced;
 	int retval;
 
 	/*
@@ -1803,10 +1799,14 @@ static int bprm_execve(struct linux_binprm *bprm,
 	 */
 	io_uring_task_cancel();
 
-	retval = prepare_bprm_creds(bprm);
+	retval = unshare_files(&displaced);
 	if (retval)
 		return retval;
 
+	retval = prepare_bprm_creds(bprm);
+	if (retval)
+		goto out_files;
+
 	check_unsafe_exec(bprm);
 	current->in_execve = 1;
 
@@ -1820,14 +1820,11 @@ static int bprm_execve(struct linux_binprm *bprm,
 	bprm->file = file;
 	/*
 	 * Record that a name derived from an O_CLOEXEC fd will be
-	 * inaccessible after exec.  This allows the code in exec to
-	 * choose to fail when the executable is not mmaped into the
-	 * interpreter and an open file descriptor is not passed to
-	 * the interpreter.  This makes for a better user experience
-	 * than having the interpreter start and then immediately fail
-	 * when it finds the executable is inaccessible.
+	 * inaccessible after exec. Relies on having exclusive access to
+	 * current->files (due to unshare_files above).
 	 */
-	if (bprm->fdpath && get_close_on_exec(fd))
+	if (bprm->fdpath &&
+	    close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
 		bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
 
 	/* Set the unchanging part of bprm->cred */
@@ -1845,6 +1842,8 @@ static int bprm_execve(struct linux_binprm *bprm,
 	rseq_execve(current);
 	acct_update_integrals(current);
 	task_numa_free(current, false);
+	if (displaced)
+		put_files_struct(displaced);
 	return retval;
 
 out:
@@ -1861,6 +1860,10 @@ out_unmark:
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
 
+out_files:
+	if (displaced)
+		reset_files_struct(displaced);
+
 	return retval;
 }
 
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 8c28bd1c9ed9..2dd55b172d57 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -18,7 +18,7 @@
 #include <linux/sched.h>
 #include <linux/cred.h>
 
-#define dprintk(fmt, args...) pr_debug(fmt, ##args)
+#define dprintk(fmt, args...) do{}while(0)
 
 
 static int get_name(const struct path *path, char *name, struct dentry *child);
@@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
 	inode_unlock(dentry->d_inode);
 
 	if (IS_ERR(parent)) {
-		dprintk("get_parent of %lu failed, err %ld\n",
-			dentry->d_inode->i_ino, PTR_ERR(parent));
+		dprintk("%s: get_parent of %ld failed, err %d\n",
+			__func__, dentry->d_inode->i_ino, PTR_ERR(parent));
 		return parent;
 	}
 
@@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
 	dprintk("%s: found name: %s\n", __func__, nbuf);
 	tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf));
 	if (IS_ERR(tmp)) {
-		dprintk("lookup failed: %ld\n", PTR_ERR(tmp));
+		dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
 		err = PTR_ERR(tmp);
 		goto out_err;
 	}
@@ -417,11 +417,9 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
 }
 EXPORT_SYMBOL_GPL(exportfs_encode_fh);
 
-struct dentry *
-exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
-		       int fileid_type,
-		       int (*acceptable)(void *, struct dentry *),
-		       void *context)
+struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
+		int fh_len, int fileid_type,
+		int (*acceptable)(void *, struct dentry *), void *context)
 {
 	const struct export_operations *nop = mnt->mnt_sb->s_export_op;
 	struct dentry *result, *alias;
@@ -434,8 +432,10 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
 	if (!nop || !nop->fh_to_dentry)
 		return ERR_PTR(-ESTALE);
 	result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
+	if (PTR_ERR(result) == -ENOMEM)
+		return ERR_CAST(result);
 	if (IS_ERR_OR_NULL(result))
-		return result;
+		return ERR_PTR(-ESTALE);
 
 	/*
 	 * If no acceptance criteria was specified by caller, a disconnected
@@ -561,26 +561,10 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
 
  err_result:
 	dput(result);
+	if (err != -ENOMEM)
+		err = -ESTALE;
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw);
-
-struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
-				  int fh_len, int fileid_type,
-				  int (*acceptable)(void *, struct dentry *),
-				  void *context)
-{
-	struct dentry *ret;
-
-	ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type,
-				     acceptable, context);
-	if (IS_ERR_OR_NULL(ret)) {
-		if (ret == ERR_PTR(-ENOMEM))
-			return ret;
-		return ERR_PTR(-ESTALE);
-	}
-	return ret;
-}
 EXPORT_SYMBOL_GPL(exportfs_decode_fh);
 
 MODULE_LICENSE("GPL");
diff --git a/fs/file.c b/fs/file.c
index fdb84a64724b..d6bc73960e4a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -175,7 +175,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
 	spin_unlock(&files->file_lock);
 	new_fdt = alloc_fdtable(nr);
 
-	/* make sure all fd_install() have seen resize_in_progress
+	/* make sure all __fd_install() have seen resize_in_progress
 	 * or have finished their rcu_read_lock_sched() section.
 	 */
 	if (atomic_read(&files->count) > 1)
@@ -198,7 +198,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
 	rcu_assign_pointer(files->fdt, new_fdt);
 	if (cur_fdt != &files->fdtab)
 		call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
-	/* coupled with smp_rmb() in fd_install() */
+	/* coupled with smp_rmb() in __fd_install() */
 	smp_wmb();
 	return 1;
 }
@@ -466,6 +466,18 @@ void put_files_struct(struct files_struct *files)
 	}
 }
 
+void reset_files_struct(struct files_struct *files)
+{
+	struct task_struct *tsk = current;
+	struct files_struct *old;
+
+	old = tsk->files;
+	task_lock(tsk);
+	tsk->files = files;
+	task_unlock(tsk);
+	put_files_struct(old);
+}
+
 void exit_files(struct task_struct *tsk)
 {
 	struct files_struct * files = tsk->files;
@@ -509,9 +521,9 @@ static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
 /*
  * allocate a file descriptor, mark it busy.
  */
-static int alloc_fd(unsigned start, unsigned end, unsigned flags)
+int __alloc_fd(struct files_struct *files,
+	       unsigned start, unsigned end, unsigned flags)
 {
-	struct files_struct *files = current->files;
 	unsigned int fd;
 	int error;
 	struct fdtable *fdt;
@@ -567,9 +579,14 @@ out:
 	return error;
 }
 
+static int alloc_fd(unsigned start, unsigned flags)
+{
+	return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
+}
+
 int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
 {
-	return alloc_fd(0, nofile, flags);
+	return __alloc_fd(current->files, 0, nofile, flags);
 }
 
 int get_unused_fd_flags(unsigned flags)
@@ -608,13 +625,17 @@ EXPORT_SYMBOL(put_unused_fd);
  * It should never happen - if we allow dup2() do it, _really_ bad things
  * will follow.
  *
- * This consumes the "file" refcount, so callers should treat it
- * as if they had called fput(file).
+ * NOTE: __fd_install() variant is really, really low-level; don't
+ * use it unless you are forced to by truly lousy API shoved down
+ * your throat.  'files' *MUST* be either current->files or obtained
+ * by get_files_struct(current) done by whoever had given it to you,
+ * or really bad things will happen.  Normally you want to use
+ * fd_install() instead.
  */
 
-void fd_install(unsigned int fd, struct file *file)
+void __fd_install(struct files_struct *files, unsigned int fd,
+		struct file *file)
 {
-	struct files_struct *files = current->files;
 	struct fdtable *fdt;
 
 	rcu_read_lock_sched();
@@ -636,6 +657,15 @@ void fd_install(unsigned int fd, struct file *file)
 	rcu_read_unlock_sched();
 }
 
+/*
+ * This consumes the "file" refcount, so callers should treat it
+ * as if they had called fput(file).
+ */
+void fd_install(unsigned int fd, struct file *file)
+{
+	__fd_install(current->files, fd, file);
+}
+
 EXPORT_SYMBOL(fd_install);
 
 static struct file *pick_file(struct files_struct *files, unsigned fd)
@@ -659,9 +689,11 @@ out_unlock:
 	return file;
 }
 
-int close_fd(unsigned fd)
+/*
+ * The same warnings as for __alloc_fd()/__fd_install() apply here...
+ */
+int __close_fd(struct files_struct *files, unsigned fd)
 {
-	struct files_struct *files = current->files;
 	struct file *file;
 
 	file = pick_file(files, fd);
@@ -670,7 +702,7 @@ int close_fd(unsigned fd)
 
 	return filp_close(file, files);
 }
-EXPORT_SYMBOL(close_fd); /* for ksys_close() */
+EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
 
 /**
  * __close_range() - Close all file descriptors in a given range.
@@ -829,28 +861,68 @@ void do_close_on_exec(struct files_struct *files)
 	spin_unlock(&files->file_lock);
 }
 
+static inline struct file *__fget_files_rcu(struct files_struct *files,
+	unsigned int fd, fmode_t mask, unsigned int refs)
+{
+	for (;;) {
+		struct file *file;
+		struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+		struct file __rcu **fdentry;
+
+		if (unlikely(fd >= fdt->max_fds))
+			return NULL;
+
+		fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
+		file = rcu_dereference_raw(*fdentry);
+		if (unlikely(!file))
+			return NULL;
+
+		if (unlikely(file->f_mode & mask))
+			return NULL;
+
+		/*
+		 * Ok, we have a file pointer. However, because we do
+		 * this all locklessly under RCU, we may be racing with
+		 * that file being closed.
+		 *
+		 * Such a race can take two forms:
+		 *
+		 *  (a) the file ref already went down to zero,
+		 *      and get_file_rcu_many() fails. Just try
+		 *      again:
+		 */
+		if (unlikely(!get_file_rcu_many(file, refs)))
+			continue;
+
+		/*
+		 *  (b) the file table entry has changed under us.
+		 *       Note that we don't need to re-check the 'fdt->fd'
+		 *       pointer having changed, because it always goes
+		 *       hand-in-hand with 'fdt'.
+		 *
+		 * If so, we need to put our refs and try again.
+		 */
+		if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
+		    unlikely(rcu_dereference_raw(*fdentry) != file)) {
+			fput_many(file, refs);
+			continue;
+		}
+
+		/*
+		 * Ok, we have a ref to the file, and checked that it
+		 * still exists.
+		 */
+		return file;
+	}
+}
+
 static struct file *__fget_files(struct files_struct *files, unsigned int fd,
 				 fmode_t mask, unsigned int refs)
 {
 	struct file *file;
 
 	rcu_read_lock();
-loop:
-	file = files_lookup_fd_rcu(files, fd);
-	if (file) {
-		/* File object ref couldn't be taken.
-		 * dup2() atomicity guarantee is the reason
-		 * we loop to catch the new file (or NULL pointer)
-		 */
-		if (file->f_mode & mask)
-			file = NULL;
-		else if (!get_file_rcu_many(file, refs))
-			goto loop;
-		else if (files_lookup_fd_raw(files, fd) != file) {
-			fput_many(file, refs);
-			goto loop;
-		}
-	}
+	file = __fget_files_rcu(files, fd, mask, refs);
 	rcu_read_unlock();
 
 	return file;
@@ -891,42 +963,6 @@ struct file *fget_task(struct task_struct *task, unsigned int fd)
 	return file;
 }
 
-struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd)
-{
-	/* Must be called with rcu_read_lock held */
-	struct files_struct *files;
-	struct file *file = NULL;
-
-	task_lock(task);
-	files = task->files;
-	if (files)
-		file = files_lookup_fd_rcu(files, fd);
-	task_unlock(task);
-
-	return file;
-}
-
-struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret_fd)
-{
-	/* Must be called with rcu_read_lock held */
-	struct files_struct *files;
-	unsigned int fd = *ret_fd;
-	struct file *file = NULL;
-
-	task_lock(task);
-	files = task->files;
-	if (files) {
-		for (; fd < files_fdtable(files)->max_fds; fd++) {
-			file = files_lookup_fd_rcu(files, fd);
-			if (file)
-				break;
-		}
-	}
-	task_unlock(task);
-	*ret_fd = fd;
-	return file;
-}
-
 /*
  * Lightweight file lookup - no refcnt increment if fd table isn't shared.
  *
@@ -949,7 +985,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
 	struct file *file;
 
 	if (atomic_read(&files->count) == 1) {
-		file = files_lookup_fd_raw(files, fd);
+		file = __fcheck_files(files, fd);
 		if (!file || unlikely(file->f_mode & mask))
 			return 0;
 		return (unsigned long)file;
@@ -1085,7 +1121,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
 	struct files_struct *files = current->files;
 
 	if (!file)
-		return close_fd(fd);
+		return __close_fd(files, fd);
 
 	if (fd >= rlimit(RLIMIT_NOFILE))
 		return -EBADF;
@@ -1174,7 +1210,7 @@ static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 
 	spin_lock(&files->file_lock);
 	err = expand_files(files, newfd);
-	file = files_lookup_fd_locked(files, oldfd);
+	file = fcheck(oldfd);
 	if (unlikely(!file))
 		goto Ebadf;
 	if (unlikely(err < 0)) {
@@ -1203,7 +1239,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
 		int retval = oldfd;
 
 		rcu_read_lock();
-		if (!files_lookup_fd_rcu(files, oldfd))
+		if (!fcheck_files(files, oldfd))
 			retval = -EBADF;
 		rcu_read_unlock();
 		return retval;
@@ -1228,11 +1264,10 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
 
 int f_dupfd(unsigned int from, struct file *file, unsigned flags)
 {
-	unsigned long nofile = rlimit(RLIMIT_NOFILE);
 	int err;
-	if (from >= nofile)
+	if (from >= rlimit(RLIMIT_NOFILE))
 		return -EINVAL;
-	err = alloc_fd(from, nofile, flags);
+	err = alloc_fd(from, flags);
 	if (err >= 0) {
 		get_file(file);
 		fd_install(err, file);
diff --git a/fs/init.c b/fs/init.c
index 02723bea8499..e9c320a48cf1 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -49,7 +49,7 @@ int __init init_chdir(const char *filename)
 	error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
 	if (error)
 		return error;
-	error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (!error)
 		set_fs_pwd(current->fs, &path);
 	path_put(&path);
@@ -64,7 +64,7 @@ int __init init_chroot(const char *filename)
 	error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
 	if (error)
 		return error;
-	error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 	error = -EPERM;
@@ -118,7 +118,7 @@ int __init init_eaccess(const char *filename)
 	error = kern_path(filename, LOOKUP_FOLLOW, &path);
 	if (error)
 		return error;
-	error = path_permission(&path, MAY_ACCESS);
+	error = inode_permission(d_inode(path.dentry), MAY_ACCESS);
 	path_put(&path);
 	return error;
 }
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 8161667c976f..7df6324ccb8a 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -261,6 +261,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
 	u32 exclusive;
 	int error;
 	__be32 *p;
+	s32 end;
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(fl);
@@ -284,7 +285,13 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
 	fl->fl_type  = exclusive != 0 ? F_WRLCK : F_RDLCK;
 	p = xdr_decode_hyper(p, &l_offset);
 	xdr_decode_hyper(p, &l_len);
-	nlm4svc_set_file_lock_range(fl, l_offset, l_len);
+	end = l_offset + l_len - 1;
+
+	fl->fl_start = (loff_t)l_offset;
+	if (l_len == 0 || end < 0)
+		fl->fl_end = OFFSET_MAX;
+	else
+		fl->fl_end = (loff_t)end;
 	error = 0;
 out:
 	return error;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 99fffc9cb958..b11f2afa84f1 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -794,6 +794,9 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 		goto retry_cancel;
 	}
 
+	dprintk("lockd: cancel status %u (task %u)\n",
+			status, task->tk_pid);
+
 	switch (status) {
 	case NLM_LCK_GRANTED:
 	case NLM_LCK_DENIED_GRACE_PERIOD:
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index cdc8e12cdac4..771c289f6df7 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -163,8 +163,8 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
 	host->h_nsmhandle  = nsm;
 	host->h_addrbuf    = nsm->sm_addrbuf;
 	host->net	   = ni->net;
-	host->h_cred	   = get_cred(ni->cred);
-	strscpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
+	host->h_cred	   = get_cred(ni->cred),
+	strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
 
 out:
 	return host;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5579e67da17d..1a639e34847d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -54,9 +54,13 @@ EXPORT_SYMBOL_GPL(nlmsvc_ops);
 
 static DEFINE_MUTEX(nlmsvc_mutex);
 static unsigned int		nlmsvc_users;
-static struct svc_serv		*nlmsvc_serv;
+static struct task_struct	*nlmsvc_task;
+static struct svc_rqst		*nlmsvc_rqst;
 unsigned long			nlmsvc_timeout;
 
+static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
+
 unsigned int lockd_net_id;
 
 /*
@@ -180,10 +184,6 @@ lockd(void *vrqstp)
 	nlm_shutdown_hosts();
 	cancel_delayed_work_sync(&ln->grace_period_end);
 	locks_end_grace(&ln->lockd_manager);
-
-	dprintk("lockd_down: service stopped\n");
-
-	svc_exit_thread(rqstp);
 	return 0;
 }
 
@@ -196,8 +196,8 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
 
 	xprt = svc_find_xprt(serv, name, net, family, 0);
 	if (xprt == NULL)
-		return svc_xprt_create(serv, name, net, family, port,
-				       SVC_SOCK_DEFAULTS, cred);
+		return svc_create_xprt(serv, name, net, family, port,
+						SVC_SOCK_DEFAULTS, cred);
 	svc_xprt_put(xprt);
 	return 0;
 }
@@ -247,8 +247,7 @@ out_err:
 	if (warned++ == 0)
 		printk(KERN_WARNING
 			"lockd_up: makesock failed, error=%d\n", err);
-	svc_xprt_destroy_all(serv, net);
-	svc_rpcb_cleanup(serv, net);
+	svc_shutdown_net(serv, net);
 	return err;
 }
 
@@ -286,12 +285,13 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
 			nlm_shutdown_hosts_net(net);
 			cancel_delayed_work_sync(&ln->grace_period_end);
 			locks_end_grace(&ln->lockd_manager);
-			svc_xprt_destroy_all(serv, net);
-			svc_rpcb_cleanup(serv, net);
+			svc_shutdown_net(serv, net);
+			dprintk("%s: per-net data destroyed; net=%x\n",
+				__func__, net->ns.inum);
 		}
 	} else {
-		pr_err("%s: no users! net=%x\n",
-			__func__, net->ns.inum);
+		pr_err("%s: no users! task=%p, net=%x\n",
+			__func__, nlmsvc_task, net->ns.inum);
 		BUG();
 	}
 }
@@ -302,16 +302,20 @@ static int lockd_inetaddr_event(struct notifier_block *this,
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 	struct sockaddr_in sin;
 
-	if (event != NETDEV_DOWN)
+	if ((event != NETDEV_DOWN) ||
+	    !atomic_inc_not_zero(&nlm_ntf_refcnt))
 		goto out;
 
-	if (nlmsvc_serv) {
+	if (nlmsvc_rqst) {
 		dprintk("lockd_inetaddr_event: removed %pI4\n",
 			&ifa->ifa_local);
 		sin.sin_family = AF_INET;
 		sin.sin_addr.s_addr = ifa->ifa_local;
-		svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin);
+		svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
+			(struct sockaddr *)&sin);
 	}
+	atomic_dec(&nlm_ntf_refcnt);
+	wake_up(&nlm_ntf_wq);
 
 out:
 	return NOTIFY_DONE;
@@ -328,17 +332,21 @@ static int lockd_inet6addr_event(struct notifier_block *this,
 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
 	struct sockaddr_in6 sin6;
 
-	if (event != NETDEV_DOWN)
+	if ((event != NETDEV_DOWN) ||
+	    !atomic_inc_not_zero(&nlm_ntf_refcnt))
 		goto out;
 
-	if (nlmsvc_serv) {
+	if (nlmsvc_rqst) {
 		dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_addr = ifa->addr;
 		if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
 			sin6.sin6_scope_id = ifa->idev->dev->ifindex;
-		svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6);
+		svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
+			(struct sockaddr *)&sin6);
 	}
+	atomic_dec(&nlm_ntf_refcnt);
+	wake_up(&nlm_ntf_wq);
 
 out:
 	return NOTIFY_DONE;
@@ -349,14 +357,86 @@ static struct notifier_block lockd_inet6addr_notifier = {
 };
 #endif
 
-static int lockd_get(void)
+static void lockd_unregister_notifiers(void)
+{
+	unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
+#if IS_ENABLED(CONFIG_IPV6)
+	unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
+#endif
+	wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0);
+}
+
+static void lockd_svc_exit_thread(void)
+{
+	atomic_dec(&nlm_ntf_refcnt);
+	lockd_unregister_notifiers();
+	svc_exit_thread(nlmsvc_rqst);
+}
+
+static int lockd_start_svc(struct svc_serv *serv)
 {
-	struct svc_serv *serv;
 	int error;
 
-	if (nlmsvc_serv) {
-		nlmsvc_users++;
+	if (nlmsvc_rqst)
 		return 0;
+
+	/*
+	 * Create the kernel thread and wait for it to start.
+	 */
+	nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
+	if (IS_ERR(nlmsvc_rqst)) {
+		error = PTR_ERR(nlmsvc_rqst);
+		printk(KERN_WARNING
+			"lockd_up: svc_rqst allocation failed, error=%d\n",
+			error);
+		lockd_unregister_notifiers();
+		goto out_rqst;
+	}
+
+	atomic_inc(&nlm_ntf_refcnt);
+	svc_sock_update_bufs(serv);
+	serv->sv_maxconn = nlm_max_connections;
+
+	nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name);
+	if (IS_ERR(nlmsvc_task)) {
+		error = PTR_ERR(nlmsvc_task);
+		printk(KERN_WARNING
+			"lockd_up: kthread_run failed, error=%d\n", error);
+		goto out_task;
+	}
+	nlmsvc_rqst->rq_task = nlmsvc_task;
+	wake_up_process(nlmsvc_task);
+
+	dprintk("lockd_up: service started\n");
+	return 0;
+
+out_task:
+	lockd_svc_exit_thread();
+	nlmsvc_task = NULL;
+out_rqst:
+	nlmsvc_rqst = NULL;
+	return error;
+}
+
+static const struct svc_serv_ops lockd_sv_ops = {
+	.svo_shutdown		= svc_rpcb_cleanup,
+	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
+};
+
+static struct svc_serv *lockd_create_svc(void)
+{
+	struct svc_serv *serv;
+
+	/*
+	 * Check whether we're already up and running.
+	 */
+	if (nlmsvc_rqst) {
+		/*
+		 * Note: increase service usage, because later in case of error
+		 * svc_destroy() will be called.
+		 */
+		svc_get(nlmsvc_rqst->rq_server);
+		return nlmsvc_rqst->rq_server;
 	}
 
 	/*
@@ -371,44 +451,17 @@ static int lockd_get(void)
 		nlm_timeout = LOCKD_DFLT_TIMEO;
 	nlmsvc_timeout = nlm_timeout * HZ;
 
-	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd);
+	serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
 	if (!serv) {
 		printk(KERN_WARNING "lockd_up: create service failed\n");
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
-
-	serv->sv_maxconn = nlm_max_connections;
-	error = svc_set_num_threads(serv, NULL, 1);
-	/* The thread now holds the only reference */
-	svc_put(serv);
-	if (error < 0)
-		return error;
-
-	nlmsvc_serv = serv;
 	register_inetaddr_notifier(&lockd_inetaddr_notifier);
 #if IS_ENABLED(CONFIG_IPV6)
 	register_inet6addr_notifier(&lockd_inet6addr_notifier);
 #endif
 	dprintk("lockd_up: service created\n");
-	nlmsvc_users++;
-	return 0;
-}
-
-static void lockd_put(void)
-{
-	if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n"))
-		return;
-	if (--nlmsvc_users)
-		return;
-
-	unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
-#if IS_ENABLED(CONFIG_IPV6)
-	unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
-#endif
-
-	svc_set_num_threads(nlmsvc_serv, NULL, 0);
-	nlmsvc_serv = NULL;
-	dprintk("lockd_down: service destroyed\n");
+	return serv;
 }
 
 /*
@@ -416,21 +469,36 @@ static void lockd_put(void)
  */
 int lockd_up(struct net *net, const struct cred *cred)
 {
+	struct svc_serv *serv;
 	int error;
 
 	mutex_lock(&nlmsvc_mutex);
 
-	error = lockd_get();
-	if (error)
-		goto err;
-
-	error = lockd_up_net(nlmsvc_serv, net, cred);
-	if (error < 0) {
-		lockd_put();
-		goto err;
+	serv = lockd_create_svc();
+	if (IS_ERR(serv)) {
+		error = PTR_ERR(serv);
+		goto err_create;
 	}
 
-err:
+	error = lockd_up_net(serv, net, cred);
+	if (error < 0) {
+		lockd_unregister_notifiers();
+		goto err_put;
+	}
+
+	error = lockd_start_svc(serv);
+	if (error < 0) {
+		lockd_down_net(serv, net);
+		goto err_put;
+	}
+	nlmsvc_users++;
+	/*
+	 * Note: svc_serv structures have an initial use count of 1,
+	 * so we exit through here on both success and failure.
+	 */
+err_put:
+	svc_destroy(serv);
+err_create:
 	mutex_unlock(&nlmsvc_mutex);
 	return error;
 }
@@ -443,8 +511,27 @@ void
 lockd_down(struct net *net)
 {
 	mutex_lock(&nlmsvc_mutex);
-	lockd_down_net(nlmsvc_serv, net);
-	lockd_put();
+	lockd_down_net(nlmsvc_rqst->rq_server, net);
+	if (nlmsvc_users) {
+		if (--nlmsvc_users)
+			goto out;
+	} else {
+		printk(KERN_ERR "lockd_down: no users! task=%p\n",
+			nlmsvc_task);
+		BUG();
+	}
+
+	if (!nlmsvc_task) {
+		printk(KERN_ERR "lockd_down: no lockd running.\n");
+		BUG();
+	}
+	kthread_stop(nlmsvc_task);
+	dprintk("lockd_down: service stopped\n");
+	lockd_svc_exit_thread();
+	dprintk("lockd_down: service destroyed\n");
+	nlmsvc_task = NULL;
+	nlmsvc_rqst = NULL;
+out:
 	mutex_unlock(&nlmsvc_mutex);
 }
 EXPORT_SYMBOL_GPL(lockd_down);
@@ -497,7 +584,7 @@ static struct ctl_table nlm_sysctls[] = {
 		.data		= &nsm_use_hostnames,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dobool,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "nsm_local_state",
@@ -562,7 +649,6 @@ static int lockd_authenticate(struct svc_rqst *rqstp)
 	switch (rqstp->rq_authop->flavour) {
 		case RPC_AUTH_NULL:
 		case RPC_AUTH_UNIX:
-			rqstp->rq_auth_stat = rpc_auth_ok;
 			if (rqstp->rq_proc == 0)
 				return SVC_OK;
 			if (is_callback(rqstp->rq_proc)) {
@@ -573,7 +659,6 @@ static int lockd_authenticate(struct svc_rqst *rqstp)
 			}
 			return svc_set_client(rqstp);
 	}
-	rqstp->rq_auth_stat = rpc_autherr_badcred;
 	return SVC_DENIED;
 }
 
@@ -681,44 +766,6 @@ static void __exit exit_nlm(void)
 module_init(init_nlm);
 module_exit(exit_nlm);
 
-/**
- * nlmsvc_dispatch - Process an NLM Request
- * @rqstp: incoming request
- * @statp: pointer to location of accept_stat field in RPC Reply buffer
- *
- * Return values:
- *  %0: Processing complete; do not send a Reply
- *  %1: Processing complete; send Reply in rqstp->rq_res
- */
-static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp)
-{
-	const struct svc_procedure *procp = rqstp->rq_procinfo;
-
-	svcxdr_init_decode(rqstp);
-	if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream))
-		goto out_decode_err;
-
-	*statp = procp->pc_func(rqstp);
-	if (*statp == rpc_drop_reply)
-		return 0;
-	if (*statp != rpc_success)
-		return 1;
-
-	svcxdr_init_encode(rqstp);
-	if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream))
-		goto out_encode_err;
-
-	return 1;
-
-out_decode_err:
-	*statp = rpc_garbage_args;
-	return 1;
-
-out_encode_err:
-	*statp = rpc_system_err;
-	return 1;
-}
-
 /*
  * Define NLM program and procedures
  */
@@ -728,7 +775,6 @@ static const struct svc_version	nlmsvc_version1 = {
 	.vs_nproc	= 17,
 	.vs_proc	= nlmsvc_procedures,
 	.vs_count	= nlmsvc_version1_count,
-	.vs_dispatch	= nlmsvc_dispatch,
 	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 static unsigned int nlmsvc_version3_count[24];
@@ -737,7 +783,6 @@ static const struct svc_version	nlmsvc_version3 = {
 	.vs_nproc	= 24,
 	.vs_proc	= nlmsvc_procedures,
 	.vs_count	= nlmsvc_version3_count,
-	.vs_dispatch	= nlmsvc_dispatch,
 	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
@@ -747,7 +792,6 @@ static const struct svc_version	nlmsvc_version4 = {
 	.vs_nproc	= 24,
 	.vs_proc	= nlmsvc_procedures4,
 	.vs_count	= nlmsvc_version4_count,
-	.vs_dispatch	= nlmsvc_dispatch,
 	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b72023a6b4c1..fa41dda39925 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -32,10 +32,6 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	if (!nlmsvc_ops)
 		return nlm_lck_denied_nolocks;
 
-	if (lock->lock_start > OFFSET_MAX ||
-	    (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start))))
-		return nlm4_fbig;
-
 	/* Obtain host handle */
 	if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
 	 || (argp->monitor && nsm_monitor(host) < 0))
@@ -44,21 +40,13 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain file pointer. Not used by FREE_ALL call. */
 	if (filp != NULL) {
-		int mode = lock_to_openmode(&lock->fl);
-
-		error = nlm_lookup_file(rqstp, &file, lock);
-		if (error)
+		if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
 			goto no_locks;
 		*filp = file;
 
 		/* Set up the missing parts of the file_lock structure */
-		lock->fl.fl_flags = FL_POSIX;
-		lock->fl.fl_file  = file->f_file[mode];
+		lock->fl.fl_file  = file->f_file;
 		lock->fl.fl_pid = current->tgid;
-		lock->fl.fl_start = (loff_t)lock->lock_start;
-		lock->fl.fl_end = lock->lock_len ?
-				   (loff_t)(lock->lock_start + lock->lock_len - 1) :
-				   OFFSET_MAX;
 		lock->fl.fl_lmops = &nlmsvc_lock_operations;
 		nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
 		if (!lock->fl.fl_owner) {
@@ -96,7 +84,6 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
-	struct nlm_lockowner *test_owner;
 	__be32 rc = rpc_success;
 
 	dprintk("lockd: TEST4        called\n");
@@ -106,7 +93,6 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
-	test_owner = argp->lock.fl.fl_owner;
 	/* Now check for conflicting locks */
 	resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
 	if (resp->status == nlm_drop_reply)
@@ -114,7 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 	else
 		dprintk("lockd: TEST4        status %d\n", ntohl(resp->status));
 
-	nlmsvc_put_lockowner(test_owner);
+	nlmsvc_release_lockowner(&argp->lock);
 	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rc;
@@ -280,6 +266,8 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp)
  */
 static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 {
+	dprintk("lockd: %5u callback returned %d\n", task->tk_pid,
+			-task->tk_status);
 }
 
 static void nlm4svc_callback_release(void *data)
@@ -522,239 +510,191 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
 		.pc_decode = nlm4svc_decode_void,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_void),
-		.pc_argzero = sizeof(struct nlm_void),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "NULL",
 	},
 	[NLMPROC_TEST] = {
 		.pc_func = nlm4svc_proc_test,
 		.pc_decode = nlm4svc_decode_testargs,
 		.pc_encode = nlm4svc_encode_testres,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St+2+No+Rg,
-		.pc_name = "TEST",
 	},
 	[NLMPROC_LOCK] = {
 		.pc_func = nlm4svc_proc_lock,
 		.pc_decode = nlm4svc_decode_lockargs,
 		.pc_encode = nlm4svc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "LOCK",
 	},
 	[NLMPROC_CANCEL] = {
 		.pc_func = nlm4svc_proc_cancel,
 		.pc_decode = nlm4svc_decode_cancargs,
 		.pc_encode = nlm4svc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "CANCEL",
 	},
 	[NLMPROC_UNLOCK] = {
 		.pc_func = nlm4svc_proc_unlock,
 		.pc_decode = nlm4svc_decode_unlockargs,
 		.pc_encode = nlm4svc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "UNLOCK",
 	},
 	[NLMPROC_GRANTED] = {
 		.pc_func = nlm4svc_proc_granted,
 		.pc_decode = nlm4svc_decode_testargs,
 		.pc_encode = nlm4svc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "GRANTED",
 	},
 	[NLMPROC_TEST_MSG] = {
 		.pc_func = nlm4svc_proc_test_msg,
 		.pc_decode = nlm4svc_decode_testargs,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "TEST_MSG",
 	},
 	[NLMPROC_LOCK_MSG] = {
 		.pc_func = nlm4svc_proc_lock_msg,
 		.pc_decode = nlm4svc_decode_lockargs,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "LOCK_MSG",
 	},
 	[NLMPROC_CANCEL_MSG] = {
 		.pc_func = nlm4svc_proc_cancel_msg,
 		.pc_decode = nlm4svc_decode_cancargs,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "CANCEL_MSG",
 	},
 	[NLMPROC_UNLOCK_MSG] = {
 		.pc_func = nlm4svc_proc_unlock_msg,
 		.pc_decode = nlm4svc_decode_unlockargs,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "UNLOCK_MSG",
 	},
 	[NLMPROC_GRANTED_MSG] = {
 		.pc_func = nlm4svc_proc_granted_msg,
 		.pc_decode = nlm4svc_decode_testargs,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "GRANTED_MSG",
 	},
 	[NLMPROC_TEST_RES] = {
 		.pc_func = nlm4svc_proc_null,
 		.pc_decode = nlm4svc_decode_void,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "TEST_RES",
 	},
 	[NLMPROC_LOCK_RES] = {
 		.pc_func = nlm4svc_proc_null,
 		.pc_decode = nlm4svc_decode_void,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "LOCK_RES",
 	},
 	[NLMPROC_CANCEL_RES] = {
 		.pc_func = nlm4svc_proc_null,
 		.pc_decode = nlm4svc_decode_void,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "CANCEL_RES",
 	},
 	[NLMPROC_UNLOCK_RES] = {
 		.pc_func = nlm4svc_proc_null,
 		.pc_decode = nlm4svc_decode_void,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "UNLOCK_RES",
 	},
 	[NLMPROC_GRANTED_RES] = {
 		.pc_func = nlm4svc_proc_granted_res,
 		.pc_decode = nlm4svc_decode_res,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "GRANTED_RES",
 	},
 	[NLMPROC_NSM_NOTIFY] = {
 		.pc_func = nlm4svc_proc_sm_notify,
 		.pc_decode = nlm4svc_decode_reboot,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_reboot),
-		.pc_argzero = sizeof(struct nlm_reboot),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "SM_NOTIFY",
 	},
 	[17] = {
 		.pc_func = nlm4svc_proc_unused,
 		.pc_decode = nlm4svc_decode_void,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_void),
-		.pc_argzero = sizeof(struct nlm_void),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = 0,
-		.pc_name = "UNUSED",
 	},
 	[18] = {
 		.pc_func = nlm4svc_proc_unused,
 		.pc_decode = nlm4svc_decode_void,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_void),
-		.pc_argzero = sizeof(struct nlm_void),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = 0,
-		.pc_name = "UNUSED",
 	},
 	[19] = {
 		.pc_func = nlm4svc_proc_unused,
 		.pc_decode = nlm4svc_decode_void,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_void),
-		.pc_argzero = sizeof(struct nlm_void),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = 0,
-		.pc_name = "UNUSED",
 	},
 	[NLMPROC_SHARE] = {
 		.pc_func = nlm4svc_proc_share,
 		.pc_decode = nlm4svc_decode_shareargs,
 		.pc_encode = nlm4svc_encode_shareres,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St+1,
-		.pc_name = "SHARE",
 	},
 	[NLMPROC_UNSHARE] = {
 		.pc_func = nlm4svc_proc_unshare,
 		.pc_decode = nlm4svc_decode_shareargs,
 		.pc_encode = nlm4svc_encode_shareres,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St+1,
-		.pc_name = "UNSHARE",
 	},
 	[NLMPROC_NM_LOCK] = {
 		.pc_func = nlm4svc_proc_nm_lock,
 		.pc_decode = nlm4svc_decode_lockargs,
 		.pc_encode = nlm4svc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "NM_LOCK",
 	},
 	[NLMPROC_FREE_ALL] = {
 		.pc_func = nlm4svc_proc_free_all,
 		.pc_decode = nlm4svc_decode_notify,
 		.pc_encode = nlm4svc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "FREE_ALL",
 	},
 };
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 4e30f3c50970..273a81971ed5 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -31,7 +31,6 @@
 #include <linux/lockd/nlm.h>
 #include <linux/lockd/lockd.h>
 #include <linux/kthread.h>
-#include <linux/exportfs.h>
 
 #define NLMDBG_FACILITY		NLMDBG_SVCLOCK
 
@@ -340,7 +339,7 @@ nlmsvc_get_lockowner(struct nlm_lockowner *lockowner)
 	return lockowner;
 }
 
-void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
+static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
 {
 	if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
 		return;
@@ -470,27 +469,18 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	    struct nlm_host *host, struct nlm_lock *lock, int wait,
 	    struct nlm_cookie *cookie, int reclaim)
 {
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	struct inode		*inode = nlmsvc_file_inode(file);
-#endif
 	struct nlm_block	*block = NULL;
 	int			error;
-	int			mode;
-	int			async_block = 0;
 	__be32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
-				inode->i_sb->s_id, inode->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_type, lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end,
 				wait);
 
-	if (nlmsvc_file_file(file)->f_op->lock) {
-		async_block = wait;
-		wait = 0;
-	}
-
 	/* Lock file against concurrent access */
 	mutex_lock(&file->f_mutex);
 	/* Get existing block (in case client is busy-waiting)
@@ -534,8 +524,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 
 	if (!wait)
 		lock->fl.fl_flags &= ~FL_SLEEP;
-	mode = lock_to_openmode(&lock->fl);
-	error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
+	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
 	lock->fl.fl_flags &= ~FL_SLEEP;
 
 	dprintk("lockd: vfs_lock_file returned %d\n", error);
@@ -551,7 +540,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			 */
 			if (wait)
 				break;
-			ret = async_block ? nlm_lck_blocked : nlm_lck_denied;
+			ret = nlm_lck_denied;
 			goto out;
 		case FILE_LOCK_DEFERRED:
 			if (wait)
@@ -588,12 +577,12 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 		struct nlm_lock *conflock, struct nlm_cookie *cookie)
 {
 	int			error;
-	int			mode;
 	__be32			ret;
+	struct nlm_lockowner	*test_owner;
 
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
-				nlmsvc_file_inode(file)->i_sb->s_id,
-				nlmsvc_file_inode(file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_type,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -603,8 +592,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 		goto out;
 	}
 
-	mode = lock_to_openmode(&lock->fl);
-	error = vfs_test_lock(file->f_file[mode], &lock->fl);
+	/* If there's a conflicting lock, remember to clean up the test lock */
+	test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
+
+	error = vfs_test_lock(file->f_file, &lock->fl);
 	if (error) {
 		/* We can't currently deal with deferred test requests */
 		if (error == FILE_LOCK_DEFERRED)
@@ -631,6 +622,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	conflock->fl.fl_end = lock->fl.fl_end;
 	locks_release_private(&lock->fl);
 
+	/* Clean up the test lock */
+	lock->fl.fl_owner = NULL;
+	nlmsvc_put_lockowner(test_owner);
+
 	ret = nlm_lck_denied;
 out:
 	return ret;
@@ -646,11 +641,11 @@ out:
 __be32
 nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
 {
-	int	error = 0;
+	int	error;
 
 	dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				nlmsvc_file_inode(file)->i_sb->s_id,
-				nlmsvc_file_inode(file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -659,14 +654,7 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
 	nlmsvc_cancel_blocked(net, file, lock);
 
 	lock->fl.fl_type = F_UNLCK;
-	lock->fl.fl_file = file->f_file[O_RDONLY];
-	if (lock->fl.fl_file)
-		error = vfs_lock_file(lock->fl.fl_file, F_SETLK,
-					&lock->fl, NULL);
-	lock->fl.fl_file = file->f_file[O_WRONLY];
-	if (lock->fl.fl_file)
-		error |= vfs_lock_file(lock->fl.fl_file, F_SETLK,
-					&lock->fl, NULL);
+	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
 
 	return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
 }
@@ -683,11 +671,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
 {
 	struct nlm_block	*block;
 	int status = 0;
-	int mode;
 
 	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				nlmsvc_file_inode(file)->i_sb->s_id,
-				nlmsvc_file_inode(file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -699,10 +686,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
 	block = nlmsvc_lookup_block(file, lock);
 	mutex_unlock(&file->f_mutex);
 	if (block != NULL) {
-		struct file_lock *fl = &block->b_call->a_args.lock.fl;
-
-		mode = lock_to_openmode(fl);
-		vfs_cancel_lock(block->b_file->f_file[mode], fl);
+		vfs_cancel_lock(block->b_file->f_file,
+				&block->b_call->a_args.lock.fl);
 		status = nlmsvc_unlink_block(block);
 		nlmsvc_release_block(block);
 	}
@@ -818,7 +803,6 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 {
 	struct nlm_file		*file = block->b_file;
 	struct nlm_lock		*lock = &block->b_call->a_args.lock;
-	int			mode;
 	int			error;
 	loff_t			fl_start, fl_end;
 
@@ -844,8 +828,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	lock->fl.fl_flags |= FL_SLEEP;
 	fl_start = lock->fl.fl_start;
 	fl_end = lock->fl.fl_end;
-	mode = lock_to_openmode(&lock->fl);
-	error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
+	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
 	lock->fl.fl_flags &= ~FL_SLEEP;
 	lock->fl.fl_start = fl_start;
 	lock->fl.fl_end = fl_end;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 32784f508c81..50855f2c1f4b 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -55,7 +55,6 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	struct nlm_host		*host = NULL;
 	struct nlm_file		*file = NULL;
 	struct nlm_lock		*lock = &argp->lock;
-	int			mode;
 	__be32			error = 0;
 
 	/* nfsd callbacks must have been installed for this procedure */
@@ -70,15 +69,13 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain file pointer. Not used by FREE_ALL call. */
 	if (filp != NULL) {
-		error = cast_status(nlm_lookup_file(rqstp, &file, lock));
+		error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
 		if (error != 0)
 			goto no_locks;
 		*filp = file;
 
 		/* Set up the missing parts of the file_lock structure */
-		mode = lock_to_openmode(&lock->fl);
-		lock->fl.fl_flags = FL_POSIX;
-		lock->fl.fl_file  = file->f_file[mode];
+		lock->fl.fl_file  = file->f_file;
 		lock->fl.fl_pid = current->tgid;
 		lock->fl.fl_lmops = &nlmsvc_lock_operations;
 		nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
@@ -117,7 +114,6 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
-	struct nlm_lockowner *test_owner;
 	__be32 rc = rpc_success;
 
 	dprintk("lockd: TEST          called\n");
@@ -127,8 +123,6 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
-	test_owner = argp->lock.fl.fl_owner;
-
 	/* Now check for conflicting locks */
 	resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
 	if (resp->status == nlm_drop_reply)
@@ -137,7 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 		dprintk("lockd: TEST          status %d vers %d\n",
 			ntohl(resp->status), rqstp->rq_vers);
 
-	nlmsvc_put_lockowner(test_owner);
+	nlmsvc_release_lockowner(&argp->lock);
 	nlmsvc_release_host(host);
 	nlm_release_file(file);
 	return rc;
@@ -305,6 +299,8 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp)
  */
 static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 {
+	dprintk("lockd: %5u callback returned %d\n", task->tk_pid,
+			-task->tk_status);
 }
 
 void nlmsvc_release_call(struct nlm_rqst *call)
@@ -556,239 +552,191 @@ const struct svc_procedure nlmsvc_procedures[24] = {
 		.pc_decode = nlmsvc_decode_void,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_void),
-		.pc_argzero = sizeof(struct nlm_void),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "NULL",
 	},
 	[NLMPROC_TEST] = {
 		.pc_func = nlmsvc_proc_test,
 		.pc_decode = nlmsvc_decode_testargs,
 		.pc_encode = nlmsvc_encode_testres,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St+2+No+Rg,
-		.pc_name = "TEST",
 	},
 	[NLMPROC_LOCK] = {
 		.pc_func = nlmsvc_proc_lock,
 		.pc_decode = nlmsvc_decode_lockargs,
 		.pc_encode = nlmsvc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "LOCK",
 	},
 	[NLMPROC_CANCEL] = {
 		.pc_func = nlmsvc_proc_cancel,
 		.pc_decode = nlmsvc_decode_cancargs,
 		.pc_encode = nlmsvc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "CANCEL",
 	},
 	[NLMPROC_UNLOCK] = {
 		.pc_func = nlmsvc_proc_unlock,
 		.pc_decode = nlmsvc_decode_unlockargs,
 		.pc_encode = nlmsvc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "UNLOCK",
 	},
 	[NLMPROC_GRANTED] = {
 		.pc_func = nlmsvc_proc_granted,
 		.pc_decode = nlmsvc_decode_testargs,
 		.pc_encode = nlmsvc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "GRANTED",
 	},
 	[NLMPROC_TEST_MSG] = {
 		.pc_func = nlmsvc_proc_test_msg,
 		.pc_decode = nlmsvc_decode_testargs,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "TEST_MSG",
 	},
 	[NLMPROC_LOCK_MSG] = {
 		.pc_func = nlmsvc_proc_lock_msg,
 		.pc_decode = nlmsvc_decode_lockargs,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "LOCK_MSG",
 	},
 	[NLMPROC_CANCEL_MSG] = {
 		.pc_func = nlmsvc_proc_cancel_msg,
 		.pc_decode = nlmsvc_decode_cancargs,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "CANCEL_MSG",
 	},
 	[NLMPROC_UNLOCK_MSG] = {
 		.pc_func = nlmsvc_proc_unlock_msg,
 		.pc_decode = nlmsvc_decode_unlockargs,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "UNLOCK_MSG",
 	},
 	[NLMPROC_GRANTED_MSG] = {
 		.pc_func = nlmsvc_proc_granted_msg,
 		.pc_decode = nlmsvc_decode_testargs,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "GRANTED_MSG",
 	},
 	[NLMPROC_TEST_RES] = {
 		.pc_func = nlmsvc_proc_null,
 		.pc_decode = nlmsvc_decode_void,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "TEST_RES",
 	},
 	[NLMPROC_LOCK_RES] = {
 		.pc_func = nlmsvc_proc_null,
 		.pc_decode = nlmsvc_decode_void,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "LOCK_RES",
 	},
 	[NLMPROC_CANCEL_RES] = {
 		.pc_func = nlmsvc_proc_null,
 		.pc_decode = nlmsvc_decode_void,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "CANCEL_RES",
 	},
 	[NLMPROC_UNLOCK_RES] = {
 		.pc_func = nlmsvc_proc_null,
 		.pc_decode = nlmsvc_decode_void,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "UNLOCK_RES",
 	},
 	[NLMPROC_GRANTED_RES] = {
 		.pc_func = nlmsvc_proc_granted_res,
 		.pc_decode = nlmsvc_decode_res,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_res),
-		.pc_argzero = sizeof(struct nlm_res),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "GRANTED_RES",
 	},
 	[NLMPROC_NSM_NOTIFY] = {
 		.pc_func = nlmsvc_proc_sm_notify,
 		.pc_decode = nlmsvc_decode_reboot,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_reboot),
-		.pc_argzero = sizeof(struct nlm_reboot),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "SM_NOTIFY",
 	},
 	[17] = {
 		.pc_func = nlmsvc_proc_unused,
 		.pc_decode = nlmsvc_decode_void,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_void),
-		.pc_argzero = sizeof(struct nlm_void),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "UNUSED",
 	},
 	[18] = {
 		.pc_func = nlmsvc_proc_unused,
 		.pc_decode = nlmsvc_decode_void,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_void),
-		.pc_argzero = sizeof(struct nlm_void),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "UNUSED",
 	},
 	[19] = {
 		.pc_func = nlmsvc_proc_unused,
 		.pc_decode = nlmsvc_decode_void,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_void),
-		.pc_argzero = sizeof(struct nlm_void),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = St,
-		.pc_name = "UNUSED",
 	},
 	[NLMPROC_SHARE] = {
 		.pc_func = nlmsvc_proc_share,
 		.pc_decode = nlmsvc_decode_shareargs,
 		.pc_encode = nlmsvc_encode_shareres,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St+1,
-		.pc_name = "SHARE",
 	},
 	[NLMPROC_UNSHARE] = {
 		.pc_func = nlmsvc_proc_unshare,
 		.pc_decode = nlmsvc_decode_shareargs,
 		.pc_encode = nlmsvc_encode_shareres,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St+1,
-		.pc_name = "UNSHARE",
 	},
 	[NLMPROC_NM_LOCK] = {
 		.pc_func = nlmsvc_proc_nm_lock,
 		.pc_decode = nlmsvc_decode_lockargs,
 		.pc_encode = nlmsvc_encode_res,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_res),
 		.pc_xdrressize = Ck+St,
-		.pc_name = "NM_LOCK",
 	},
 	[NLMPROC_FREE_ALL] = {
 		.pc_func = nlmsvc_proc_free_all,
 		.pc_decode = nlmsvc_decode_notify,
 		.pc_encode = nlmsvc_encode_void,
 		.pc_argsize = sizeof(struct nlm_args),
-		.pc_argzero = sizeof(struct nlm_args),
 		.pc_ressize = sizeof(struct nlm_void),
 		.pc_xdrressize = 0,
-		.pc_name = "FREE_ALL",
 	},
 };
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index e3b6229e7ae5..028fc152da22 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 
 static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
 {
-	struct inode *inode = nlmsvc_file_inode(file);
+	struct inode *inode = locks_inode(file->f_file);
 
 	dprintk("lockd: %s %s/%ld\n",
 		msg, inode->i_sb->s_id, inode->i_ino);
@@ -71,75 +71,56 @@ static inline unsigned int file_hash(struct nfs_fh *f)
 	return tmp & (FILE_NRHASH - 1);
 }
 
-int lock_to_openmode(struct file_lock *lock)
-{
-	return (lock->fl_type == F_WRLCK) ? O_WRONLY : O_RDONLY;
-}
-
-/*
- * Open the file. Note that if we're reexporting, for example,
- * this could block the lockd thread for a while.
- *
- * We have to make sure we have the right credential to open
- * the file.
- */
-static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
-			   struct nlm_file *file, int mode)
-{
-	struct file **fp = &file->f_file[mode];
-	__be32	nfserr;
-
-	if (*fp)
-		return 0;
-	nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
-	if (nfserr)
-		dprintk("lockd: open failed (error %d)\n", nfserr);
-	return nfserr;
-}
-
 /*
  * Lookup file info. If it doesn't exist, create a file info struct
  * and open a (VFS) file for the given inode.
+ *
+ * FIXME:
+ * Note that we open the file O_RDONLY even when creating write locks.
+ * This is not quite right, but for now, we assume the client performs
+ * the proper R/W checking.
  */
 __be32
 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
-					struct nlm_lock *lock)
+					struct nfs_fh *f)
 {
 	struct nlm_file	*file;
 	unsigned int	hash;
 	__be32		nfserr;
-	int		mode;
 
-	nlm_debug_print_fh("nlm_lookup_file", &lock->fh);
+	nlm_debug_print_fh("nlm_lookup_file", f);
 
-	hash = file_hash(&lock->fh);
-	mode = lock_to_openmode(&lock->fl);
+	hash = file_hash(f);
 
 	/* Lock file table */
 	mutex_lock(&nlm_file_mutex);
 
 	hlist_for_each_entry(file, &nlm_files[hash], f_list)
-		if (!nfs_compare_fh(&file->f_handle, &lock->fh)) {
-			mutex_lock(&file->f_mutex);
-			nfserr = nlm_do_fopen(rqstp, file, mode);
-			mutex_unlock(&file->f_mutex);
+		if (!nfs_compare_fh(&file->f_handle, f))
 			goto found;
-		}
-	nlm_debug_print_fh("creating file for", &lock->fh);
+
+	nlm_debug_print_fh("creating file for", f);
 
 	nfserr = nlm_lck_denied_nolocks;
 	file = kzalloc(sizeof(*file), GFP_KERNEL);
 	if (!file)
-		goto out_free;
+		goto out_unlock;
 
-	memcpy(&file->f_handle, &lock->fh, sizeof(struct nfs_fh));
+	memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
 	mutex_init(&file->f_mutex);
 	INIT_HLIST_NODE(&file->f_list);
 	INIT_LIST_HEAD(&file->f_blocks);
 
-	nfserr = nlm_do_fopen(rqstp, file, mode);
-	if (nfserr)
-		goto out_unlock;
+	/* Open the file. Note that this must not sleep for too long, else
+	 * we would lock up lockd:-) So no NFS re-exports, folks.
+	 *
+	 * We have to make sure we have the right credential to open
+	 * the file.
+	 */
+	if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) {
+		dprintk("lockd: open failed (error %d)\n", nfserr);
+		goto out_free;
+	}
 
 	hlist_add_head(&file->f_list, &nlm_files[hash]);
 
@@ -147,6 +128,7 @@ found:
 	dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
 	*result = file;
 	file->f_count++;
+	nfserr = 0;
 
 out_unlock:
 	mutex_unlock(&nlm_file_mutex);
@@ -166,40 +148,13 @@ nlm_delete_file(struct nlm_file *file)
 	nlm_debug_print_file("closing file", file);
 	if (!hlist_unhashed(&file->f_list)) {
 		hlist_del(&file->f_list);
-		if (file->f_file[O_RDONLY])
-			nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
-		if (file->f_file[O_WRONLY])
-			nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
+		nlmsvc_ops->fclose(file->f_file);
 		kfree(file);
 	} else {
 		printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
 	}
 }
 
-static int nlm_unlock_files(struct nlm_file *file, const struct file_lock *fl)
-{
-	struct file_lock lock;
-
-	locks_init_lock(&lock);
-	lock.fl_type  = F_UNLCK;
-	lock.fl_start = 0;
-	lock.fl_end   = OFFSET_MAX;
-	lock.fl_owner = fl->fl_owner;
-	lock.fl_pid   = fl->fl_pid;
-	lock.fl_flags = FL_POSIX;
-
-	lock.fl_file = file->f_file[O_RDONLY];
-	if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL))
-		goto out_err;
-	lock.fl_file = file->f_file[O_WRONLY];
-	if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL))
-		goto out_err;
-	return 0;
-out_err:
-	pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__);
-	return 1;
-}
-
 /*
  * Loop over all locks on the given file and perform the specified
  * action.
@@ -210,7 +165,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
 {
 	struct inode	 *inode = nlmsvc_file_inode(file);
 	struct file_lock *fl;
-	struct file_lock_context *flctx = locks_inode_context(inode);
+	struct file_lock_context *flctx = inode->i_flctx;
 	struct nlm_host	 *lockhost;
 
 	if (!flctx || list_empty_careful(&flctx->flc_posix))
@@ -227,10 +182,17 @@ again:
 
 		lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
 		if (match(lockhost, host)) {
+			struct file_lock lock = *fl;
 
 			spin_unlock(&flctx->flc_lock);
-			if (nlm_unlock_files(file, fl))
+			lock.fl_type  = F_UNLCK;
+			lock.fl_start = 0;
+			lock.fl_end   = OFFSET_MAX;
+			if (vfs_lock_file(file->f_file, F_SETLK, &lock, NULL) < 0) {
+				printk("lockd: unlock failure in %s:%d\n",
+						__FILE__, __LINE__);
 				return 1;
+			}
 			goto again;
 		}
 	}
@@ -265,7 +227,7 @@ nlm_file_inuse(struct nlm_file *file)
 {
 	struct inode	 *inode = nlmsvc_file_inode(file);
 	struct file_lock *fl;
-	struct file_lock_context *flctx = locks_inode_context(inode);
+	struct file_lock_context *flctx = inode->i_flctx;
 
 	if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
 		return 1;
@@ -284,14 +246,6 @@ nlm_file_inuse(struct nlm_file *file)
 	return 0;
 }
 
-static void nlm_close_files(struct nlm_file *file)
-{
-	if (file->f_file[O_RDONLY])
-		nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
-	if (file->f_file[O_WRONLY])
-		nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
-}
-
 /*
  * Loop over all files in the file table.
  */
@@ -322,7 +276,7 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match,
 			if (list_empty(&file->f_blocks) && !file->f_locks
 			 && !file->f_shares && !file->f_count) {
 				hlist_del(&file->f_list);
-				nlm_close_files(file);
+				nlmsvc_ops->fclose(file->f_file);
 				kfree(file);
 			}
 		}
@@ -456,13 +410,12 @@ nlmsvc_invalidate_all(void)
 	nlm_traverse_files(NULL, nlmsvc_is_client, NULL);
 }
 
-
 static int
 nlmsvc_match_sb(void *datap, struct nlm_file *file)
 {
 	struct super_block *sb = datap;
 
-	return sb == nlmsvc_file_inode(file)->i_sb;
+	return sb == locks_inode(file->f_file)->i_sb;
 }
 
 /**
diff --git a/fs/lockd/svcxdr.h b/fs/lockd/svcxdr.h
deleted file mode 100644
index 4f1a451da5ba..000000000000
--- a/fs/lockd/svcxdr.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Encode/decode NLM basic data types
- *
- * Basic NLMv3 XDR data types are not defined in an IETF standards
- * document.  X/Open has a description of these data types that
- * is useful.  See Chapter 10 of "Protocols for Interworking:
- * XNFS, Version 3W".
- *
- * Basic NLMv4 XDR data types are defined in Appendix II.1.4 of
- * RFC 1813: "NFS Version 3 Protocol Specification".
- *
- * Author: Chuck Lever <chuck.lever@oracle.com>
- *
- * Copyright (c) 2020, Oracle and/or its affiliates.
- */
-
-#ifndef _LOCKD_SVCXDR_H_
-#define _LOCKD_SVCXDR_H_
-
-static inline bool
-svcxdr_decode_stats(struct xdr_stream *xdr, __be32 *status)
-{
-	__be32 *p;
-
-	p = xdr_inline_decode(xdr, XDR_UNIT);
-	if (!p)
-		return false;
-	*status = *p;
-
-	return true;
-}
-
-static inline bool
-svcxdr_encode_stats(struct xdr_stream *xdr, __be32 status)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, XDR_UNIT);
-	if (!p)
-		return false;
-	*p = status;
-
-	return true;
-}
-
-static inline bool
-svcxdr_decode_string(struct xdr_stream *xdr, char **data, unsigned int *data_len)
-{
-	__be32 *p;
-	u32 len;
-
-	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return false;
-	if (len > NLM_MAXSTRLEN)
-		return false;
-	p = xdr_inline_decode(xdr, len);
-	if (!p)
-		return false;
-	*data_len = len;
-	*data = (char *)p;
-
-	return true;
-}
-
-/*
- * NLM cookies are defined by specification to be a variable-length
- * XDR opaque no longer than 1024 bytes. However, this implementation
- * limits their length to 32 bytes, and treats zero-length cookies
- * specially.
- */
-static inline bool
-svcxdr_decode_cookie(struct xdr_stream *xdr, struct nlm_cookie *cookie)
-{
-	__be32 *p;
-	u32 len;
-
-	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return false;
-	if (len > NLM_MAXCOOKIELEN)
-		return false;
-	if (!len)
-		goto out_hpux;
-
-	p = xdr_inline_decode(xdr, len);
-	if (!p)
-		return false;
-	cookie->len = len;
-	memcpy(cookie->data, p, len);
-
-	return true;
-
-	/* apparently HPUX can return empty cookies */
-out_hpux:
-	cookie->len = 4;
-	memset(cookie->data, 0, 4);
-	return true;
-}
-
-static inline bool
-svcxdr_encode_cookie(struct xdr_stream *xdr, const struct nlm_cookie *cookie)
-{
-	__be32 *p;
-
-	if (xdr_stream_encode_u32(xdr, cookie->len) < 0)
-		return false;
-	p = xdr_reserve_space(xdr, cookie->len);
-	if (!p)
-		return false;
-	memcpy(p, cookie->data, cookie->len);
-
-	return true;
-}
-
-static inline bool
-svcxdr_decode_owner(struct xdr_stream *xdr, struct xdr_netobj *obj)
-{
-	__be32 *p;
-	u32 len;
-
-	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return false;
-	if (len > XDR_MAX_NETOBJ)
-		return false;
-	p = xdr_inline_decode(xdr, len);
-	if (!p)
-		return false;
-	obj->len = len;
-	obj->data = (u8 *)p;
-
-	return true;
-}
-
-static inline bool
-svcxdr_encode_owner(struct xdr_stream *xdr, const struct xdr_netobj *obj)
-{
-	if (obj->len > XDR_MAX_NETOBJ)
-		return false;
-	return xdr_stream_encode_opaque(xdr, obj->data, obj->len) > 0;
-}
-
-#endif /* _LOCKD_SVCXDR_H_ */
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 2fb5748dae0c..982629f7b120 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -19,7 +19,7 @@
 
 #include <uapi/linux/nfs2.h>
 
-#include "svcxdr.h"
+#define NLMDBG_FACILITY		NLMDBG_XDR
 
 
 static inline loff_t
@@ -42,313 +42,311 @@ loff_t_to_s32(loff_t offset)
 }
 
 /*
- * NLM file handles are defined by specification to be a variable-length
- * XDR opaque no longer than 1024 bytes. However, this implementation
- * constrains their length to exactly the length of an NFSv2 file
- * handle.
+ * XDR functions for basic NLM types
  */
-static bool
-svcxdr_decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
+static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c)
 {
-	__be32 *p;
-	u32 len;
+	unsigned int	len;
 
-	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return false;
-	if (len != NFS2_FHSIZE)
-		return false;
-
-	p = xdr_inline_decode(xdr, len);
-	if (!p)
-		return false;
-	fh->size = NFS2_FHSIZE;
-	memcpy(fh->data, p, len);
-	memset(fh->data + NFS2_FHSIZE, 0, sizeof(fh->data) - NFS2_FHSIZE);
-
-	return true;
+	len = ntohl(*p++);
+	
+	if(len==0)
+	{
+		c->len=4;
+		memset(c->data, 0, 4);	/* hockeypux brain damage */
+	}
+	else if(len<=NLM_MAXCOOKIELEN)
+	{
+		c->len=len;
+		memcpy(c->data, p, len);
+		p+=XDR_QUADLEN(len);
+	}
+	else 
+	{
+		dprintk("lockd: bad cookie size %d (only cookies under "
+			"%d bytes are supported.)\n",
+				len, NLM_MAXCOOKIELEN);
+		return NULL;
+	}
+	return p;
 }
 
-static bool
-svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
+static inline __be32 *
+nlm_encode_cookie(__be32 *p, struct nlm_cookie *c)
 {
-	struct file_lock *fl = &lock->fl;
-	s32 start, len, end;
+	*p++ = htonl(c->len);
+	memcpy(p, c->data, c->len);
+	p+=XDR_QUADLEN(c->len);
+	return p;
+}
 
-	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return false;
-	if (!svcxdr_decode_fhandle(xdr, &lock->fh))
-		return false;
-	if (!svcxdr_decode_owner(xdr, &lock->oh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &lock->svid) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &start) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return false;
+static __be32 *
+nlm_decode_fh(__be32 *p, struct nfs_fh *f)
+{
+	unsigned int	len;
+
+	if ((len = ntohl(*p++)) != NFS2_FHSIZE) {
+		dprintk("lockd: bad fhandle size %d (should be %d)\n",
+			len, NFS2_FHSIZE);
+		return NULL;
+	}
+	f->size = NFS2_FHSIZE;
+	memset(f->data, 0, sizeof(f->data));
+	memcpy(f->data, p, NFS2_FHSIZE);
+	return p + XDR_QUADLEN(NFS2_FHSIZE);
+}
+
+/*
+ * Encode and decode owner handle
+ */
+static inline __be32 *
+nlm_decode_oh(__be32 *p, struct xdr_netobj *oh)
+{
+	return xdr_decode_netobj(p, oh);
+}
+
+static inline __be32 *
+nlm_encode_oh(__be32 *p, struct xdr_netobj *oh)
+{
+	return xdr_encode_netobj(p, oh);
+}
+
+static __be32 *
+nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
+{
+	struct file_lock	*fl = &lock->fl;
+	s32			start, len, end;
+
+	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
+					    &lock->len,
+					    NLM_MAXSTRLEN))
+	 || !(p = nlm_decode_fh(p, &lock->fh))
+	 || !(p = nlm_decode_oh(p, &lock->oh)))
+		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_flags = FL_POSIX;
-	fl->fl_type  = F_RDLCK;
+	fl->fl_type  = F_RDLCK;		/* as good as anything else */
+	start = ntohl(*p++);
+	len = ntohl(*p++);
 	end = start + len - 1;
+
 	fl->fl_start = s32_to_loff_t(start);
+
 	if (len == 0 || end < 0)
 		fl->fl_end = OFFSET_MAX;
 	else
 		fl->fl_end = s32_to_loff_t(end);
-
-	return true;
+	return p;
 }
 
-static bool
-svcxdr_encode_holder(struct xdr_stream *xdr, const struct nlm_lock *lock)
+/*
+ * Encode result of a TEST/TEST_MSG call
+ */
+static __be32 *
+nlm_encode_testres(__be32 *p, struct nlm_res *resp)
 {
-	const struct file_lock *fl = &lock->fl;
-	s32 start, len;
+	s32		start, len;
 
-	/* exclusive */
-	if (xdr_stream_encode_bool(xdr, fl->fl_type != F_RDLCK) < 0)
-		return false;
-	if (xdr_stream_encode_u32(xdr, lock->svid) < 0)
-		return false;
-	if (!svcxdr_encode_owner(xdr, &lock->oh))
-		return false;
-	start = loff_t_to_s32(fl->fl_start);
-	if (fl->fl_end == OFFSET_MAX)
-		len = 0;
-	else
-		len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
-	if (xdr_stream_encode_u32(xdr, start) < 0)
-		return false;
-	if (xdr_stream_encode_u32(xdr, len) < 0)
-		return false;
+	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
+		return NULL;
+	*p++ = resp->status;
 
-	return true;
-}
+	if (resp->status == nlm_lck_denied) {
+		struct file_lock	*fl = &resp->lock.fl;
 
-static bool
-svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
-{
-	if (!svcxdr_encode_stats(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nlm_lck_denied:
-		if (!svcxdr_encode_holder(xdr, &resp->lock))
-			return false;
+		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
+		*p++ = htonl(resp->lock.svid);
+
+		/* Encode owner handle. */
+		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
+			return NULL;
+
+		start = loff_t_to_s32(fl->fl_start);
+		if (fl->fl_end == OFFSET_MAX)
+			len = 0;
+		else
+			len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
+
+		*p++ = htonl(start);
+		*p++ = htonl(len);
 	}
 
-	return true;
+	return p;
 }
 
 
 /*
- * Decode Call arguments
+ * First, the server side XDR functions
  */
-
-bool
-nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	return true;
-}
-
-bool
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
-	u32 exclusive;
+	u32	exclusive;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return false;
-	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return false;
+	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
+		return 0;
+
+	exclusive = ntohl(*p++);
+	if (!(p = nlm_decode_lock(p, &argp->lock)))
+		return 0;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nlm_res *resp = rqstp->rq_resp;
+
+	if (!(p = nlm_encode_testres(p, resp)))
+		return 0;
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
-	u32 exclusive;
+	u32	exclusive;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
-		return false;
-	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return false;
-	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return false;
+	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
+		return 0;
+	argp->block  = ntohl(*p++);
+	exclusive    = ntohl(*p++);
+	if (!(p = nlm_decode_lock(p, &argp->lock)))
+		return 0;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
-	if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return false;
+	argp->reclaim = ntohl(*p++);
+	argp->state   = ntohl(*p++);
 	argp->monitor = 1;		/* monitor client by default */
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
-	u32 exclusive;
+	u32	exclusive;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
-		return false;
-	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return false;
-	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return false;
+	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
+		return 0;
+	argp->block = ntohl(*p++);
+	exclusive = ntohl(*p++);
+	if (!(p = nlm_decode_lock(p, &argp->lock)))
+		return 0;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
-
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return false;
+	if (!(p = nlm_decode_cookie(p, &argp->cookie))
+	 || !(p = nlm_decode_lock(p, &argp->lock)))
+		return 0;
 	argp->lock.fl.fl_type = F_UNLCK;
-
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	struct nlm_res *resp = rqstp->rq_argp;
-
-	if (!svcxdr_decode_cookie(xdr, &resp->cookie))
-		return false;
-	if (!svcxdr_decode_stats(xdr, &resp->status))
-		return false;
-
-	return true;
-}
-
-bool
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	struct nlm_reboot *argp = rqstp->rq_argp;
-	__be32 *p;
-	u32 len;
-
-	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return false;
-	if (len > SM_MAXSTRLEN)
-		return false;
-	p = xdr_inline_decode(xdr, len);
-	if (!p)
-		return false;
-	argp->len = len;
-	argp->mon = (char *)p;
-	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return false;
-	p = xdr_inline_decode(xdr, SM_PRIV_SIZE);
-	if (!p)
-		return false;
-	memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
-
-	return true;
-}
-
-bool
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->svid = ~(u32)0;
+	lock->svid = ~(u32) 0;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return false;
-	if (!svcxdr_decode_fhandle(xdr, &lock->fh))
-		return false;
-	if (!svcxdr_decode_owner(xdr, &lock->oh))
-		return false;
-	/* XXX: Range checks are missing in the original code */
-	if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0)
-		return false;
-
-	return true;
+	if (!(p = nlm_decode_cookie(p, &argp->cookie))
+	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
+					    &lock->len, NLM_MAXSTRLEN))
+	 || !(p = nlm_decode_fh(p, &lock->fh))
+	 || !(p = nlm_decode_oh(p, &lock->oh)))
+		return 0;
+	argp->fsm_mode = ntohl(*p++);
+	argp->fsm_access = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nlm_res *resp = rqstp->rq_resp;
+
+	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
+		return 0;
+	*p++ = resp->status;
+	*p++ = xdr_zero;		/* sequence argument */
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nlm_res *resp = rqstp->rq_resp;
+
+	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
+		return 0;
+	*p++ = resp->status;
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
-	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return false;
-
-	return true;
+	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
+					    &lock->len, NLM_MAXSTRLEN)))
+		return 0;
+	argp->state = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
 }
 
-
-/*
- * Encode Reply results
- */
-
-bool
-nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
-	return true;
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
+	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
+		return 0;
+	argp->state = ntohl(*p++);
+	memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
+	p += XDR_QUADLEN(SM_PRIV_SIZE);
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
-	struct nlm_res *resp = rqstp->rq_resp;
+	struct nlm_res *resp = rqstp->rq_argp;
 
-	return svcxdr_encode_cookie(xdr, &resp->cookie) &&
-		svcxdr_encode_testrply(xdr, resp);
+	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
+		return 0;
+	resp->status = *p++;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
-	struct nlm_res *resp = rqstp->rq_resp;
-
-	return svcxdr_encode_cookie(xdr, &resp->cookie) &&
-		svcxdr_encode_stats(xdr, resp->status);
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
-	struct nlm_res *resp = rqstp->rq_resp;
-
-	if (!svcxdr_encode_cookie(xdr, &resp->cookie))
-		return false;
-	if (!svcxdr_encode_stats(xdr, resp->status))
-		return false;
-	/* sequence */
-	if (xdr_stream_encode_u32(xdr, 0) < 0)
-		return false;
-
-	return true;
+	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 5fcbf30cd275..5fa9f48a9dba 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -18,7 +18,14 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
-#include "svcxdr.h"
+#define NLMDBG_FACILITY		NLMDBG_XDR
+
+static inline loff_t
+s64_to_loff_t(__s64 offset)
+{
+	return (loff_t)offset;
+}
+
 
 static inline s64
 loff_t_to_s64(loff_t offset)
@@ -33,317 +40,310 @@ loff_t_to_s64(loff_t offset)
 	return res;
 }
 
-void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len)
+/*
+ * XDR functions for basic NLM types
+ */
+static __be32 *
+nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c)
 {
-	s64 end = off + len - 1;
+	unsigned int	len;
 
-	fl->fl_start = off;
-	if (len == 0 || end < 0)
-		fl->fl_end = OFFSET_MAX;
-	else
-		fl->fl_end = end;
+	len = ntohl(*p++);
+	
+	if(len==0)
+	{
+		c->len=4;
+		memset(c->data, 0, 4);	/* hockeypux brain damage */
+	}
+	else if(len<=NLM_MAXCOOKIELEN)
+	{
+		c->len=len;
+		memcpy(c->data, p, len);
+		p+=XDR_QUADLEN(len);
+	}
+	else 
+	{
+		dprintk("lockd: bad cookie size %d (only cookies under "
+			"%d bytes are supported.)\n",
+				len, NLM_MAXCOOKIELEN);
+		return NULL;
+	}
+	return p;
+}
+
+static __be32 *
+nlm4_encode_cookie(__be32 *p, struct nlm_cookie *c)
+{
+	*p++ = htonl(c->len);
+	memcpy(p, c->data, c->len);
+	p+=XDR_QUADLEN(c->len);
+	return p;
+}
+
+static __be32 *
+nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
+{
+	memset(f->data, 0, sizeof(f->data));
+	f->size = ntohl(*p++);
+	if (f->size > NFS_MAXFHSIZE) {
+		dprintk("lockd: bad fhandle size %d (should be <=%d)\n",
+			f->size, NFS_MAXFHSIZE);
+		return NULL;
+	}
+      	memcpy(f->data, p, f->size);
+	return p + XDR_QUADLEN(f->size);
 }
 
 /*
- * NLM file handles are defined by specification to be a variable-length
- * XDR opaque no longer than 1024 bytes. However, this implementation
- * limits their length to the size of an NFSv3 file handle.
+ * Encode and decode owner handle
  */
-static bool
-svcxdr_decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
+static __be32 *
+nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh)
 {
-	__be32 *p;
-	u32 len;
-
-	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return false;
-	if (len > NFS_MAXFHSIZE)
-		return false;
-
-	p = xdr_inline_decode(xdr, len);
-	if (!p)
-		return false;
-	fh->size = len;
-	memcpy(fh->data, p, len);
-	memset(fh->data + len, 0, sizeof(fh->data) - len);
-
-	return true;
+	return xdr_decode_netobj(p, oh);
 }
 
-static bool
-svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
+static __be32 *
+nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
 {
-	struct file_lock *fl = &lock->fl;
+	struct file_lock	*fl = &lock->fl;
+	__u64			len, start;
+	__s64			end;
 
-	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return false;
-	if (!svcxdr_decode_fhandle(xdr, &lock->fh))
-		return false;
-	if (!svcxdr_decode_owner(xdr, &lock->oh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &lock->svid) < 0)
-		return false;
-	if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0)
-		return false;
-	if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0)
-		return false;
+	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
+					    &lock->len, NLM_MAXSTRLEN))
+	 || !(p = nlm4_decode_fh(p, &lock->fh))
+	 || !(p = nlm4_decode_oh(p, &lock->oh)))
+		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_flags = FL_POSIX;
-	fl->fl_type  = F_RDLCK;
-	nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len);
-	return true;
-}
+	fl->fl_type  = F_RDLCK;		/* as good as anything else */
+	p = xdr_decode_hyper(p, &start);
+	p = xdr_decode_hyper(p, &len);
+	end = start + len - 1;
 
-static bool
-svcxdr_encode_holder(struct xdr_stream *xdr, const struct nlm_lock *lock)
-{
-	const struct file_lock *fl = &lock->fl;
-	s64 start, len;
+	fl->fl_start = s64_to_loff_t(start);
 
-	/* exclusive */
-	if (xdr_stream_encode_bool(xdr, fl->fl_type != F_RDLCK) < 0)
-		return false;
-	if (xdr_stream_encode_u32(xdr, lock->svid) < 0)
-		return false;
-	if (!svcxdr_encode_owner(xdr, &lock->oh))
-		return false;
-	start = loff_t_to_s64(fl->fl_start);
-	if (fl->fl_end == OFFSET_MAX)
-		len = 0;
+	if (len == 0 || end < 0)
+		fl->fl_end = OFFSET_MAX;
 	else
-		len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
-	if (xdr_stream_encode_u64(xdr, start) < 0)
-		return false;
-	if (xdr_stream_encode_u64(xdr, len) < 0)
-		return false;
-
-	return true;
+		fl->fl_end = s64_to_loff_t(end);
+	return p;
 }
 
-static bool
-svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
+/*
+ * Encode result of a TEST/TEST_MSG call
+ */
+static __be32 *
+nlm4_encode_testres(__be32 *p, struct nlm_res *resp)
 {
-	if (!svcxdr_encode_stats(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nlm_lck_denied:
-		if (!svcxdr_encode_holder(xdr, &resp->lock))
-			return false;
+	s64		start, len;
+
+	dprintk("xdr: before encode_testres (p %p resp %p)\n", p, resp);
+	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
+		return NULL;
+	*p++ = resp->status;
+
+	if (resp->status == nlm_lck_denied) {
+		struct file_lock	*fl = &resp->lock.fl;
+
+		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
+		*p++ = htonl(resp->lock.svid);
+
+		/* Encode owner handle. */
+		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
+			return NULL;
+
+		start = loff_t_to_s64(fl->fl_start);
+		if (fl->fl_end == OFFSET_MAX)
+			len = 0;
+		else
+			len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
+		
+		p = xdr_encode_hyper(p, start);
+		p = xdr_encode_hyper(p, len);
+		dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+			resp->status, (int)resp->lock.svid, fl->fl_type,
+			(long long)fl->fl_start,  (long long)fl->fl_end);
 	}
 
-	return true;
+	dprintk("xdr: after encode_testres (p %p resp %p)\n", p, resp);
+	return p;
 }
 
 
 /*
- * Decode Call arguments
+ * First, the server side XDR functions
  */
-
-bool
-nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	return true;
-}
-
-bool
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
-	u32 exclusive;
+	u32	exclusive;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return false;
-	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return false;
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
+		return 0;
+
+	exclusive = ntohl(*p++);
+	if (!(p = nlm4_decode_lock(p, &argp->lock)))
+		return 0;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nlm_res *resp = rqstp->rq_resp;
+
+	if (!(p = nlm4_encode_testres(p, resp)))
+		return 0;
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
-	u32 exclusive;
+	u32	exclusive;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
-		return false;
-	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return false;
-	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return false;
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
+		return 0;
+	argp->block  = ntohl(*p++);
+	exclusive    = ntohl(*p++);
+	if (!(p = nlm4_decode_lock(p, &argp->lock)))
+		return 0;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
-	if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return false;
+	argp->reclaim = ntohl(*p++);
+	argp->state   = ntohl(*p++);
 	argp->monitor = 1;		/* monitor client by default */
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
-	u32 exclusive;
+	u32	exclusive;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
-		return false;
-	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return false;
-	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return false;
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
+		return 0;
+	argp->block = ntohl(*p++);
+	exclusive = ntohl(*p++);
+	if (!(p = nlm4_decode_lock(p, &argp->lock)))
+		return 0;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
-
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return false;
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
+	 || !(p = nlm4_decode_lock(p, &argp->lock)))
+		return 0;
 	argp->lock.fl.fl_type = F_UNLCK;
-
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	struct nlm_res *resp = rqstp->rq_argp;
-
-	if (!svcxdr_decode_cookie(xdr, &resp->cookie))
-		return false;
-	if (!svcxdr_decode_stats(xdr, &resp->status))
-		return false;
-
-	return true;
-}
-
-bool
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	struct nlm_reboot *argp = rqstp->rq_argp;
-	__be32 *p;
-	u32 len;
-
-	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return false;
-	if (len > SM_MAXSTRLEN)
-		return false;
-	p = xdr_inline_decode(xdr, len);
-	if (!p)
-		return false;
-	argp->len = len;
-	argp->mon = (char *)p;
-	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return false;
-	p = xdr_inline_decode(xdr, SM_PRIV_SIZE);
-	if (!p)
-		return false;
-	memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
-
-	return true;
-}
-
-bool
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->svid = ~(u32)0;
+	lock->svid = ~(u32) 0;
 
-	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return false;
-	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return false;
-	if (!svcxdr_decode_fhandle(xdr, &lock->fh))
-		return false;
-	if (!svcxdr_decode_owner(xdr, &lock->oh))
-		return false;
-	/* XXX: Range checks are missing in the original code */
-	if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0)
-		return false;
-
-	return true;
+	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
+	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
+					    &lock->len, NLM_MAXSTRLEN))
+	 || !(p = nlm4_decode_fh(p, &lock->fh))
+	 || !(p = nlm4_decode_oh(p, &lock->oh)))
+		return 0;
+	argp->fsm_mode = ntohl(*p++);
+	argp->fsm_access = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nlm_res *resp = rqstp->rq_resp;
+
+	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
+		return 0;
+	*p++ = resp->status;
+	*p++ = xdr_zero;		/* sequence argument */
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nlm_res *resp = rqstp->rq_resp;
+
+	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
+		return 0;
+	*p++ = resp->status;
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
-	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return false;
-
-	return true;
+	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
+					    &lock->len, NLM_MAXSTRLEN)))
+		return 0;
+	argp->state = ntohl(*p++);
+	return xdr_argsize_check(rqstp, p);
 }
 
-
-/*
- * Encode Reply results
- */
-
-bool
-nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
-	return true;
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
+	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
+		return 0;
+	argp->state = ntohl(*p++);
+	memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
+	p += XDR_QUADLEN(SM_PRIV_SIZE);
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
-	struct nlm_res *resp = rqstp->rq_resp;
+	struct nlm_res *resp = rqstp->rq_argp;
 
-	return svcxdr_encode_cookie(xdr, &resp->cookie) &&
-		svcxdr_encode_testrply(xdr, resp);
+	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
+		return 0;
+	resp->status = *p++;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
-	struct nlm_res *resp = rqstp->rq_resp;
-
-	return svcxdr_encode_cookie(xdr, &resp->cookie) &&
-		svcxdr_encode_stats(xdr, resp->status);
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
-	struct nlm_res *resp = rqstp->rq_resp;
-
-	if (!svcxdr_encode_cookie(xdr, &resp->cookie))
-		return false;
-	if (!svcxdr_encode_stats(xdr, resp->status))
-		return false;
-	/* sequence */
-	if (xdr_stream_encode_u32(xdr, 0) < 0)
-		return false;
-
-	return true;
+	return xdr_ressize_check(rqstp, p);
 }
diff --git a/fs/locks.c b/fs/locks.c
index b0753c8871fb..cbb5701ce9f3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -251,7 +251,7 @@ locks_get_lock_context(struct inode *inode, int type)
 	struct file_lock_context *ctx;
 
 	/* paired with cmpxchg() below */
-	ctx = locks_inode_context(inode);
+	ctx = smp_load_acquire(&inode->i_flctx);
 	if (likely(ctx) || type == F_UNLCK)
 		goto out;
 
@@ -270,7 +270,7 @@ locks_get_lock_context(struct inode *inode, int type)
 	 */
 	if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
 		kmem_cache_free(flctx_cache, ctx);
-		ctx = locks_inode_context(inode);
+		ctx = smp_load_acquire(&inode->i_flctx);
 	}
 out:
 	trace_locks_get_lock_context(inode, type, ctx);
@@ -323,7 +323,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list,
 void
 locks_free_lock_context(struct inode *inode)
 {
-	struct file_lock_context *ctx = locks_inode_context(inode);
+	struct file_lock_context *ctx = inode->i_flctx;
 
 	if (unlikely(ctx)) {
 		locks_check_ctx_lists(inode);
@@ -376,34 +376,6 @@ void locks_release_private(struct file_lock *fl)
 }
 EXPORT_SYMBOL_GPL(locks_release_private);
 
-/**
- * locks_owner_has_blockers - Check for blocking lock requests
- * @flctx: file lock context
- * @owner: lock owner
- *
- * Return values:
- *   %true: @owner has at least one blocker
- *   %false: @owner has no blockers
- */
-bool locks_owner_has_blockers(struct file_lock_context *flctx,
-		fl_owner_t owner)
-{
-	struct file_lock *fl;
-
-	spin_lock(&flctx->flc_lock);
-	list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
-		if (fl->fl_owner != owner)
-			continue;
-		if (!list_empty(&fl->fl_blocked_requests)) {
-			spin_unlock(&flctx->flc_lock);
-			return true;
-		}
-	}
-	spin_unlock(&flctx->flc_lock);
-	return false;
-}
-EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
-
 /* Free a lock which is not in use. */
 void locks_free_lock(struct file_lock *fl)
 {
@@ -982,32 +954,19 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 	struct file_lock *cfl;
 	struct file_lock_context *ctx;
 	struct inode *inode = locks_inode(filp);
-	void *owner;
-	void (*func)(void);
 
-	ctx = locks_inode_context(inode);
+	ctx = smp_load_acquire(&inode->i_flctx);
 	if (!ctx || list_empty_careful(&ctx->flc_posix)) {
 		fl->fl_type = F_UNLCK;
 		return;
 	}
 
-retry:
 	spin_lock(&ctx->flc_lock);
 	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
-		if (!posix_locks_conflict(fl, cfl))
-			continue;
-		if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
-			&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
-			owner = cfl->fl_lmops->lm_mod_owner;
-			func = cfl->fl_lmops->lm_expire_lock;
-			__module_get(owner);
-			spin_unlock(&ctx->flc_lock);
-			(*func)();
-			module_put(owner);
-			goto retry;
+		if (posix_locks_conflict(fl, cfl)) {
+			locks_copy_conflock(fl, cfl);
+			goto out;
 		}
-		locks_copy_conflock(fl, cfl);
-		goto out;
 	}
 	fl->fl_type = F_UNLCK;
 out:
@@ -1181,8 +1140,6 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 	int error;
 	bool added = false;
 	LIST_HEAD(dispose);
-	void *owner;
-	void (*func)(void);
 
 	ctx = locks_get_lock_context(inode, request->fl_type);
 	if (!ctx)
@@ -1201,7 +1158,6 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 		new_fl2 = locks_alloc_lock();
 	}
 
-retry:
 	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	/*
@@ -1213,17 +1169,6 @@ retry:
 		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
 			if (!posix_locks_conflict(request, fl))
 				continue;
-			if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
-				&& (*fl->fl_lmops->lm_lock_expirable)(fl)) {
-				owner = fl->fl_lmops->lm_mod_owner;
-				func = fl->fl_lmops->lm_expire_lock;
-				__module_get(owner);
-				spin_unlock(&ctx->flc_lock);
-				percpu_up_read(&file_rwsem);
-				(*func)();
-				module_put(owner);
-				goto retry;
-			}
 			if (conflock)
 				locks_copy_conflock(conflock, fl);
 			error = -EAGAIN;
@@ -1674,7 +1619,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 	new_fl->fl_flags = type;
 
 	/* typically we will check that ctx is non-NULL before calling */
-	ctx = locks_inode_context(inode);
+	ctx = smp_load_acquire(&inode->i_flctx);
 	if (!ctx) {
 		WARN_ON_ONCE(1);
 		goto free_lock;
@@ -1779,7 +1724,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
 	struct file_lock_context *ctx;
 	struct file_lock *fl;
 
-	ctx = locks_inode_context(inode);
+	ctx = smp_load_acquire(&inode->i_flctx);
 	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
 		spin_lock(&ctx->flc_lock);
 		fl = list_first_entry_or_null(&ctx->flc_lease,
@@ -1825,7 +1770,7 @@ int fcntl_getlease(struct file *filp)
 	int type = F_UNLCK;
 	LIST_HEAD(dispose);
 
-	ctx = locks_inode_context(inode);
+	ctx = smp_load_acquire(&inode->i_flctx);
 	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
 		percpu_down_read(&file_rwsem);
 		spin_lock(&ctx->flc_lock);
@@ -1863,9 +1808,6 @@ check_conflicting_open(struct file *filp, const long arg, int flags)
 
 	if (flags & FL_LAYOUT)
 		return 0;
-	if (flags & FL_DELEG)
-		/* We leave these checks to the caller */
-		return 0;
 
 	if (arg == F_RDLCK)
 		return inode_is_open_for_write(inode) ? -EAGAIN : 0;
@@ -2014,7 +1956,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
 	struct file_lock_context *ctx;
 	LIST_HEAD(dispose);
 
-	ctx = locks_inode_context(inode);
+	ctx = smp_load_acquire(&inode->i_flctx);
 	if (!ctx) {
 		trace_generic_delete_lease(inode, NULL);
 		return error;
@@ -2594,15 +2536,14 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	 */
 	if (!error && file_lock->fl_type != F_UNLCK &&
 	    !(file_lock->fl_flags & FL_OFDLCK)) {
-		struct files_struct *files = current->files;
 		/*
 		 * We need that spin_lock here - it prevents reordering between
 		 * update of i_flctx->flc_posix and check for it done in
 		 * close(). rcu_read_lock() wouldn't do.
 		 */
-		spin_lock(&files->file_lock);
-		f = files_lookup_fd_locked(files, fd);
-		spin_unlock(&files->file_lock);
+		spin_lock(&current->files->file_lock);
+		f = fcheck(fd);
+		spin_unlock(&current->files->file_lock);
 		if (f != filp) {
 			file_lock->fl_type = F_UNLCK;
 			error = do_lock_file_wait(filp, cmd, file_lock);
@@ -2726,15 +2667,14 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	 */
 	if (!error && file_lock->fl_type != F_UNLCK &&
 	    !(file_lock->fl_flags & FL_OFDLCK)) {
-		struct files_struct *files = current->files;
 		/*
 		 * We need that spin_lock here - it prevents reordering between
 		 * update of i_flctx->flc_posix and check for it done in
 		 * close(). rcu_read_lock() wouldn't do.
 		 */
-		spin_lock(&files->file_lock);
-		f = files_lookup_fd_locked(files, fd);
-		spin_unlock(&files->file_lock);
+		spin_lock(&current->files->file_lock);
+		f = fcheck(fd);
+		spin_unlock(&current->files->file_lock);
 		if (f != filp) {
 			file_lock->fl_type = F_UNLCK;
 			error = do_lock_file_wait(filp, cmd, file_lock);
@@ -2765,7 +2705,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	 * posix_lock_file().  Another process could be setting a lock on this
 	 * file at the same time, but we wouldn't remove that lock anyway.
 	 */
-	ctx = locks_inode_context(inode);
+	ctx =  smp_load_acquire(&inode->i_flctx);
 	if (!ctx || list_empty(&ctx->flc_posix))
 		return;
 
@@ -2838,7 +2778,7 @@ void locks_remove_file(struct file *filp)
 {
 	struct file_lock_context *ctx;
 
-	ctx = locks_inode_context(locks_inode(filp));
+	ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
 	if (!ctx)
 		return;
 
@@ -2885,7 +2825,7 @@ bool vfs_inode_has_locks(struct inode *inode)
 	struct file_lock_context *ctx;
 	bool ret;
 
-	ctx = locks_inode_context(inode);
+	ctx = smp_load_acquire(&inode->i_flctx);
 	if (!ctx)
 		return false;
 
@@ -3030,7 +2970,7 @@ void show_fd_locks(struct seq_file *f,
 	struct file_lock_context *ctx;
 	int id = 0;
 
-	ctx = locks_inode_context(inode);
+	ctx = smp_load_acquire(&inode->i_flctx);
 	if (!ctx)
 		return;
 
diff --git a/fs/namei.c b/fs/namei.c
index 6b85ad8a1555..8cea84ecbf56 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4361,14 +4361,11 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
  *	   ->i_mutex on parents, which works but leads to some truly excessive
  *	   locking].
  */
-int vfs_rename(struct renamedata *rd)
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+	       struct inode *new_dir, struct dentry *new_dentry,
+	       struct inode **delegated_inode, unsigned int flags)
 {
 	int error;
-	struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
-	struct dentry *old_dentry = rd->old_dentry;
-	struct dentry *new_dentry = rd->new_dentry;
-	struct inode **delegated_inode = rd->delegated_inode;
-	unsigned int flags = rd->flags;
 	bool is_dir = d_is_dir(old_dentry);
 	struct inode *source = old_dentry->d_inode;
 	struct inode *target = new_dentry->d_inode;
@@ -4516,7 +4513,6 @@ EXPORT_SYMBOL_NS(vfs_rename, ANDROID_GKI_VFS_EXPORT_ONLY);
 int do_renameat2(int olddfd, struct filename *from, int newdfd,
 		 struct filename *to, unsigned int flags)
 {
-	struct renamedata rd;
 	struct dentry *old_dentry, *new_dentry;
 	struct dentry *trap;
 	struct path old_path, new_path;
@@ -4620,14 +4616,9 @@ retry_deleg:
 				     &new_path, new_dentry, flags);
 	if (error)
 		goto exit5;
-
-	rd.old_dir	   = old_path.dentry->d_inode;
-	rd.old_dentry	   = old_dentry;
-	rd.new_dir	   = new_path.dentry->d_inode;
-	rd.new_dentry	   = new_dentry;
-	rd.delegated_inode = &delegated_inode;
-	rd.flags	   = flags;
-	error = vfs_rename(&rd);
+	error = vfs_rename(old_path.dentry->d_inode, old_dentry,
+			   new_path.dentry->d_inode, new_dentry,
+			   &delegated_inode, flags);
 exit5:
 	dput(new_dentry);
 exit4:
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index a9e563145e0c..73000aa2d220 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -699,7 +699,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
 
 	xdr_init_decode_pages(&xdr, &buf,
 			lgr->layoutp->pages, lgr->layoutp->len);
-	xdr_set_scratch_page(&xdr, scratch);
+	xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
 
 	status = -EIO;
 	p = xdr_inline_decode(&xdr, 4);
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 16412d6636e8..6e3a14fdff9c 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -510,7 +510,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 		goto out;
 
 	xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
-	xdr_set_scratch_page(&xdr, scratch);
+	xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
 
 	p = xdr_inline_decode(&xdr, sizeof(__be32));
 	if (!p)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 8fe143cad4a2..7817ad94a6ba 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
+#include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/bc_xprt.h>
 
@@ -44,18 +45,18 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
 	int ret;
 	struct nfs_net *nn = net_generic(net, nfs_net_id);
 
-	ret = svc_xprt_create(serv, "tcp", net, PF_INET,
-			      nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
-			      cred);
+	ret = svc_create_xprt(serv, "tcp", net, PF_INET,
+				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+				cred);
 	if (ret <= 0)
 		goto out_err;
 	nn->nfs_callback_tcpport = ret;
 	dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
 		nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
 
-	ret = svc_xprt_create(serv, "tcp", net, PF_INET6,
-			      nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
-			      cred);
+	ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
+				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+				cred);
 	if (ret > 0) {
 		nn->nfs_callback_tcpport6 = ret;
 		dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
@@ -80,6 +81,9 @@ nfs4_callback_svc(void *vrqstp)
 	set_freezable();
 
 	while (!kthread_freezable_should_stop(NULL)) {
+
+		if (signal_pending(current))
+			flush_signals(current);
 		/*
 		 * Listen for a request on the socket
 		 */
@@ -88,8 +92,8 @@ nfs4_callback_svc(void *vrqstp)
 			continue;
 		svc_process(rqstp);
 	}
-
 	svc_exit_thread(rqstp);
+	module_put_and_exit(0);
 	return 0;
 }
 
@@ -109,7 +113,11 @@ nfs41_callback_svc(void *vrqstp)
 	set_freezable();
 
 	while (!kthread_freezable_should_stop(NULL)) {
-		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
+
+		if (signal_pending(current))
+			flush_signals(current);
+
+		prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
 		spin_lock_bh(&serv->sv_cb_lock);
 		if (!list_empty(&serv->sv_cb_list)) {
 			req = list_first_entry(&serv->sv_cb_list,
@@ -124,12 +132,12 @@ nfs41_callback_svc(void *vrqstp)
 		} else {
 			spin_unlock_bh(&serv->sv_cb_lock);
 			if (!kthread_should_stop())
-				freezable_schedule();
+				schedule();
 			finish_wait(&serv->sv_cb_waitq, &wq);
 		}
 	}
-
 	svc_exit_thread(rqstp);
+	module_put_and_exit(0);
 	return 0;
 }
 
@@ -161,12 +169,12 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
 	if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS)
 		nrservs = NFS4_MIN_NR_CALLBACK_THREADS;
 
-	if (serv->sv_nrthreads == nrservs)
+	if (serv->sv_nrthreads-1 == nrservs)
 		return 0;
 
-	ret = svc_set_num_threads(serv, NULL, nrservs);
+	ret = serv->sv_ops->svo_setup(serv, NULL, nrservs);
 	if (ret) {
-		svc_set_num_threads(serv, NULL, 0);
+		serv->sv_ops->svo_setup(serv, NULL, 0);
 		return ret;
 	}
 	dprintk("nfs_callback_up: service started\n");
@@ -181,7 +189,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
 		return;
 
 	dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
-	svc_xprt_destroy_all(serv, net);
+	svc_shutdown_net(serv, net);
 }
 
 static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
@@ -224,17 +232,59 @@ err_bind:
 	return ret;
 }
 
+static const struct svc_serv_ops nfs40_cb_sv_ops = {
+	.svo_function		= nfs4_callback_svc,
+	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
+	.svo_setup		= svc_set_num_threads_sync,
+	.svo_module		= THIS_MODULE,
+};
+#if defined(CONFIG_NFS_V4_1)
+static const struct svc_serv_ops nfs41_cb_sv_ops = {
+	.svo_function		= nfs41_callback_svc,
+	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
+	.svo_setup		= svc_set_num_threads_sync,
+	.svo_module		= THIS_MODULE,
+};
+
+static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+	[0] = &nfs40_cb_sv_ops,
+	[1] = &nfs41_cb_sv_ops,
+};
+#else
+static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+	[0] = &nfs40_cb_sv_ops,
+	[1] = NULL,
+};
+#endif
+
 static struct svc_serv *nfs_callback_create_svc(int minorversion)
 {
 	struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
-	int (*threadfn)(void *data);
+	const struct svc_serv_ops *sv_ops;
 	struct svc_serv *serv;
 
 	/*
 	 * Check whether we're already up and running.
 	 */
-	if (cb_info->serv)
-		return svc_get(cb_info->serv);
+	if (cb_info->serv) {
+		/*
+		 * Note: increase service usage, because later in case of error
+		 * svc_destroy() will be called.
+		 */
+		svc_get(cb_info->serv);
+		return cb_info->serv;
+	}
+
+	switch (minorversion) {
+	case 0:
+		sv_ops = nfs4_cb_sv_ops[0];
+		break;
+	default:
+		sv_ops = nfs4_cb_sv_ops[1];
+	}
+
+	if (sv_ops == NULL)
+		return ERR_PTR(-ENOTSUPP);
 
 	/*
 	 * Sanity check: if there's no task,
@@ -244,16 +294,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
 		printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
 			cb_info->users);
 
-	threadfn = nfs4_callback_svc;
-#if defined(CONFIG_NFS_V4_1)
-	if (minorversion)
-		threadfn = nfs41_callback_svc;
-#else
-	if (minorversion)
-		return ERR_PTR(-ENOTSUPP);
-#endif
-	serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
-			  threadfn);
+	serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
 	if (!serv) {
 		printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
 		return ERR_PTR(-ENOMEM);
@@ -294,10 +335,16 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
 		goto err_start;
 
 	cb_info->users++;
+	/*
+	 * svc_create creates the svc_serv with sv_nrthreads == 1, and then
+	 * svc_prepare_thread increments that. So we need to call svc_destroy
+	 * on both success and failure so that the refcount is 1 when the
+	 * thread exits.
+	 */
 err_net:
 	if (!cb_info->users)
 		cb_info->serv = NULL;
-	svc_put(serv);
+	svc_destroy(serv);
 err_create:
 	mutex_unlock(&nfs_callback_mutex);
 	return ret;
@@ -322,8 +369,8 @@ void nfs_callback_down(int minorversion, struct net *net)
 	cb_info->users--;
 	if (cb_info->users == 0) {
 		svc_get(serv);
-		svc_set_num_threads(serv, NULL, 0);
-		svc_put(serv);
+		serv->sv_ops->svo_setup(serv, NULL, 0);
+		svc_destroy(serv);
 		dprintk("nfs_callback_down: service destroyed\n");
 		cb_info->serv = NULL;
 	}
@@ -382,8 +429,6 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
  */
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
-	rqstp->rq_auth_stat = rpc_autherr_badcred;
-
 	switch (rqstp->rq_authop->flavour) {
 	case RPC_AUTH_NULL:
 		if (rqstp->rq_proc != CB_NULL)
@@ -394,8 +439,6 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 		 if (svc_is_backchannel(rqstp))
 			return SVC_DENIED;
 	}
-
-	rqstp->rq_auth_stat = rpc_auth_ok;
 	return SVC_OK;
 }
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index db69fc267c9a..ca8a4aa351dc 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -63,13 +63,14 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 	return htonl(NFS4_OK);
 }
 
-/*
- * svc_process_common() looks for an XDR encoder to know when
- * not to drop a Reply.
- */
-static bool nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
-	return true;
+	return xdr_argsize_check(rqstp, p);
+}
+
+static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
+{
+	return xdr_ressize_check(rqstp, p);
 }
 
 static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
@@ -983,17 +984,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 
 out_invalidcred:
 	pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n");
-	rqstp->rq_auth_stat = rpc_autherr_badcred;
-	return rpc_success;
-}
-
-static int
-nfs_callback_dispatch(struct svc_rqst *rqstp, __be32 *statp)
-{
-	const struct svc_procedure *procp = rqstp->rq_procinfo;
-
-	*statp = procp->pc_func(rqstp);
-	return 1;
+	return svc_return_autherr(rqstp, rpc_autherr_badcred);
 }
 
 /*
@@ -1062,18 +1053,16 @@ static struct callback_op callback_ops[] = {
 static const struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
+		.pc_decode = nfs4_decode_void,
 		.pc_encode = nfs4_encode_void,
 		.pc_xdrressize = 1,
-		.pc_name = "NULL",
 	},
 	[CB_COMPOUND] = {
 		.pc_func = nfs4_callback_compound,
 		.pc_encode = nfs4_encode_void,
 		.pc_argsize = 256,
-		.pc_argzero = 256,
 		.pc_ressize = 256,
 		.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
-		.pc_name = "COMPOUND",
 	}
 };
 
@@ -1084,7 +1073,7 @@ const struct svc_version nfs4_callback_version1 = {
 	.vs_proc = nfs4_callback_procedures1,
 	.vs_count = nfs4_callback_count1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
-	.vs_dispatch = nfs_callback_dispatch,
+	.vs_dispatch = NULL,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
@@ -1096,7 +1085,7 @@ const struct svc_version nfs4_callback_version4 = {
 	.vs_proc = nfs4_callback_procedures1,
 	.vs_count = nfs4_callback_count4,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
-	.vs_dispatch = nfs_callback_dispatch,
+	.vs_dispatch = NULL,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 935029632d5f..9f88ca7b2001 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -576,7 +576,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 		goto out_nopages;
 
 	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
-	xdr_set_scratch_page(&stream, scratch);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	do {
 		if (entry->label)
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 993be63ab301..3430d6891e89 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -167,25 +167,8 @@ out:
 	return parent;
 }
 
-static u64 nfs_fetch_iversion(struct inode *inode)
-{
-	struct nfs_server *server = NFS_SERVER(inode);
-
-	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
-						   NFS_INO_REVAL_PAGECACHE))
-		__nfs_revalidate_inode(server, inode);
-	return inode_peek_iversion_raw(inode);
-}
-
 const struct export_operations nfs_export_ops = {
 	.encode_fh = nfs_encode_fh,
 	.fh_to_dentry = nfs_fh_to_dentry,
 	.get_parent = nfs_get_parent,
-	.fetch_iversion = nfs_fetch_iversion,
-	.flags = EXPORT_OP_NOWCC		|
-		 EXPORT_OP_NOSUBTREECHK		|
-		 EXPORT_OP_CLOSE_BEFORE_UNLINK	|
-		 EXPORT_OP_REMOTE_FS		|
-		 EXPORT_OP_NOATOMIC_ATTR	|
-		 EXPORT_OP_FLUSH_ON_CLOSE,
 };
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d35aae47b062..7be1a7f7fcb2 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -798,9 +798,6 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 
 	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
 
-	if (fl->fl_flags & FL_RECLAIM)
-		return -ENOGRACE;
-
 	/* No mandatory locks over NFS */
 	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
 		goto out_err;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 2ed8b6885b09..deecfb50dd7e 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -293,6 +293,8 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
 {
 	struct nfs_pgio_header *hdr = data;
 
+	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
+
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
 	    task->tk_status == 0) {
 		nfs41_sequence_done(task, &hdr->res.seq_res);
@@ -664,7 +666,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 		return -ENOMEM;
 
 	xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
-	xdr_set_scratch_page(&stream, scratch);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
 	 * num_fh (4) */
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 86c3f7e69ec4..d913e818858f 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -82,7 +82,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 		goto out_err;
 
 	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
-	xdr_set_scratch_page(&stream, scratch);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* Get the stripe count (number of stripe index) */
 	p = xdr_inline_decode(&stream, 4);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index a263bfec4244..e4f2820ba5a5 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -378,7 +378,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
 
 	xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages,
 			      lgr->layoutp->len);
-	xdr_set_scratch_page(&stream, scratch);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* stripe unit and mirror_array_cnt */
 	rc = -EIO;
@@ -1419,6 +1419,8 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
 {
 	struct nfs_pgio_header *hdr = data;
 
+	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
+
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
 	    task->tk_status == 0) {
 		nfs4_sequence_done(task, &hdr->res.seq_res);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index bfa7202ca7be..1f12297109b4 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -69,7 +69,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 	INIT_LIST_HEAD(&dsaddrs);
 
 	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
-	xdr_set_scratch_page(&stream, scratch);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* multipath count */
 	p = xdr_inline_decode(&stream, 4);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index df5bee2f505c..f2248d9d4db5 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -1536,7 +1536,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp,
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_set_scratch_page(xdr, res->scratch);
+	xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE);
 
 	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index d8fc5d72a161..afb617a4a7e4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2757,7 +2757,7 @@ again:
 		goto again;
 
 	nfs_put_client(clp);
-	module_put_and_kthread_exit(0);
+	module_put_and_exit(0);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e5c6cb770ad..f1e599553f2b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6404,8 +6404,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	struct compound_hdr hdr;
 	int status;
 
-	if (res->acl_scratch != NULL)
-		xdr_set_scratch_page(xdr, res->acl_scratch);
+	if (res->acl_scratch != NULL) {
+		void *p = page_address(res->acl_scratch);
+		xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
+	}
 	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d79a3b6cb070..17fef6eb490c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -870,6 +870,9 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata)
 	struct nfs_pgio_header *hdr = calldata;
 	struct inode *inode = hdr->inode;
 
+	dprintk("NFS: %s: %5u, (status %d)\n", __func__,
+		task->tk_pid, task->tk_status);
+
 	if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
 		return;
 	if (task->tk_status < 0)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 1ffce9076060..b3fcc27b9564 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -86,11 +86,9 @@ const struct super_operations nfs_sops = {
 };
 EXPORT_SYMBOL_GPL(nfs_sops);
 
-#ifdef CONFIG_NFS_V4_2
 static const struct nfs_ssc_client_ops nfs_ssc_clnt_ops_tbl = {
 	.sco_sb_deactive = nfs_sb_deactive,
 };
-#endif
 
 #if IS_ENABLED(CONFIG_NFS_V4)
 static int __init register_nfs4_fs(void)
@@ -113,7 +111,6 @@ static void unregister_nfs4_fs(void)
 }
 #endif
 
-#ifdef CONFIG_NFS_V4_2
 static void nfs_ssc_register_ops(void)
 {
 	nfs_ssc_register(&nfs_ssc_clnt_ops_tbl);
@@ -123,7 +120,6 @@ static void nfs_ssc_unregister_ops(void)
 {
 	nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl);
 }
-#endif /* CONFIG_NFS_V4_2 */
 
 static struct shrinker acl_shrinker = {
 	.count_objects	= nfs_access_cache_count,
@@ -152,9 +148,7 @@ int __init register_nfs_fs(void)
 	ret = register_shrinker(&acl_shrinker);
 	if (ret < 0)
 		goto error_3;
-#ifdef CONFIG_NFS_V4_2
 	nfs_ssc_register_ops();
-#endif
 	return 0;
 error_3:
 	nfs_unregister_sysctl();
@@ -174,9 +168,7 @@ void __exit unregister_nfs_fs(void)
 	unregister_shrinker(&acl_shrinker);
 	nfs_unregister_sysctl();
 	unregister_nfs4_fs();
-#ifdef CONFIG_NFS_V4_2
 	nfs_ssc_unregister_ops();
-#endif
 	unregister_filesystem(&nfs_fs_type);
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2bde35921f2b..4cf060691979 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1809,6 +1809,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_commit_data	*data = calldata;
 
+        dprintk("NFS: %5u nfs_commit_done (status %d)\n",
+                                task->tk_pid, task->tk_status);
+
 	/* Call the NFS version-specific code */
 	NFS_PROTO(data->inode)->commit_done(task, data);
 	trace_nfs_commit_done(task, data);
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile
index 119c75ab9fd0..fa82f5aaa6d9 100644
--- a/fs/nfs_common/Makefile
+++ b/fs/nfs_common/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
 nfs_acl-objs := nfsacl.o
 
 obj-$(CONFIG_GRACE_PERIOD) += grace.o
-obj-$(CONFIG_NFS_V4_2_SSC_HELPER) += nfs_ssc.o
+obj-$(CONFIG_GRACE_PERIOD) += nfs_ssc.o
diff --git a/fs/nfs_common/nfs_ssc.c b/fs/nfs_common/nfs_ssc.c
index 7c1509e968c8..f43bbb373913 100644
--- a/fs/nfs_common/nfs_ssc.c
+++ b/fs/nfs_common/nfs_ssc.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
+ * fs/nfs_common/nfs_ssc_comm.c
+ *
  * Helper for knfsd's SSC to access ops in NFS client modules
  *
  * Author: Dai Ngo <dai.ngo@oracle.com>
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 5a5bd85d08f8..d056ad2fdefd 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -136,77 +136,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(nfsacl_encode);
 
-/**
- * nfs_stream_encode_acl - Encode an NFSv3 ACL
- *
- * @xdr: an xdr_stream positioned to receive an encoded ACL
- * @inode: inode of file whose ACL this is
- * @acl: posix_acl to encode
- * @encode_entries: whether to encode ACEs as well
- * @typeflag: ACL type: NFS_ACL_DEFAULT or zero
- *
- * Return values:
- *   %false: The ACL could not be encoded
- *   %true: @xdr is advanced to the next available position
- */
-bool nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode,
-			   struct posix_acl *acl, int encode_entries,
-			   int typeflag)
-{
-	const size_t elem_size = XDR_UNIT * 3;
-	u32 entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
-	struct nfsacl_encode_desc nfsacl_desc = {
-		.desc = {
-			.elem_size = elem_size,
-			.array_len = encode_entries ? entries : 0,
-			.xcode = xdr_nfsace_encode,
-		},
-		.acl = acl,
-		.typeflag = typeflag,
-		.uid = inode->i_uid,
-		.gid = inode->i_gid,
-	};
-	struct nfsacl_simple_acl aclbuf;
-	unsigned int base;
-	int err;
-
-	if (entries > NFS_ACL_MAX_ENTRIES)
-		return false;
-	if (xdr_stream_encode_u32(xdr, entries) < 0)
-		return false;
-
-	if (encode_entries && acl && acl->a_count == 3) {
-		struct posix_acl *acl2 = &aclbuf.acl;
-
-		/* Avoid the use of posix_acl_alloc().  nfsacl_encode() is
-		 * invoked in contexts where a memory allocation failure is
-		 * fatal.  Fortunately this fake ACL is small enough to
-		 * construct on the stack. */
-		posix_acl_init(acl2, 4);
-
-		/* Insert entries in canonical order: other orders seem
-		 to confuse Solaris VxFS. */
-		acl2->a_entries[0] = acl->a_entries[0];  /* ACL_USER_OBJ */
-		acl2->a_entries[1] = acl->a_entries[1];  /* ACL_GROUP_OBJ */
-		acl2->a_entries[2] = acl->a_entries[1];  /* ACL_MASK */
-		acl2->a_entries[2].e_tag = ACL_MASK;
-		acl2->a_entries[3] = acl->a_entries[2];  /* ACL_OTHER */
-		nfsacl_desc.acl = acl2;
-	}
-
-	base = xdr_stream_pos(xdr);
-	if (!xdr_reserve_space(xdr, XDR_UNIT +
-			       elem_size * nfsacl_desc.desc.array_len))
-		return false;
-	err = xdr_encode_array2(xdr->buf, base, &nfsacl_desc.desc);
-	if (err)
-		return false;
-
-	return true;
-}
-EXPORT_SYMBOL_GPL(nfs_stream_encode_acl);
-
-
 struct nfsacl_decode_desc {
 	struct xdr_array2_desc desc;
 	unsigned int count;
@@ -366,55 +295,3 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
 		   nfsacl_desc.desc.array_len;
 }
 EXPORT_SYMBOL_GPL(nfsacl_decode);
-
-/**
- * nfs_stream_decode_acl - Decode an NFSv3 ACL
- *
- * @xdr: an xdr_stream positioned at an encoded ACL
- * @aclcnt: OUT: count of ACEs in decoded posix_acl
- * @pacl: OUT: a dynamically-allocated buffer containing the decoded posix_acl
- *
- * Return values:
- *   %false: The encoded ACL is not valid
- *   %true: @pacl contains a decoded ACL, and @xdr is advanced
- *
- * On a successful return, caller must release *pacl using posix_acl_release().
- */
-bool nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt,
-			   struct posix_acl **pacl)
-{
-	const size_t elem_size = XDR_UNIT * 3;
-	struct nfsacl_decode_desc nfsacl_desc = {
-		.desc = {
-			.elem_size = elem_size,
-			.xcode = pacl ? xdr_nfsace_decode : NULL,
-		},
-	};
-	unsigned int base;
-	u32 entries;
-
-	if (xdr_stream_decode_u32(xdr, &entries) < 0)
-		return false;
-	if (entries > NFS_ACL_MAX_ENTRIES)
-		return false;
-
-	base = xdr_stream_pos(xdr);
-	if (!xdr_inline_decode(xdr, XDR_UNIT + elem_size * entries))
-		return false;
-	nfsacl_desc.desc.array_maxlen = entries;
-	if (xdr_decode_array2(xdr->buf, base, &nfsacl_desc.desc))
-		return false;
-
-	if (pacl) {
-		if (entries != nfsacl_desc.desc.array_len ||
-		    posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) {
-			posix_acl_release(nfsacl_desc.acl);
-			return false;
-		}
-		*pacl = nfsacl_desc.acl;
-	}
-	if (aclcnt)
-		*aclcnt = entries;
-	return true;
-}
-EXPORT_SYMBOL_GPL(nfs_stream_decode_acl);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 6d2d498a5957..248f1459c039 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -8,7 +8,6 @@ config NFSD
 	select SUNRPC
 	select EXPORTFS
 	select NFS_ACL_SUPPORT if NFSD_V2_ACL
-	select NFS_ACL_SUPPORT if NFSD_V3_ACL
 	depends on MULTIUSER
 	help
 	  Choose Y here if you want to allow other computers to access
@@ -27,29 +26,28 @@ config NFSD
 
 	  Below you can choose which versions of the NFS protocol are
 	  available to clients mounting the NFS server on this system.
-	  Support for NFS version 3 (RFC 1813) is always available when
+	  Support for NFS version 2 (RFC 1094) is always available when
 	  CONFIG_NFSD is selected.
 
 	  If unsure, say N.
 
-config NFSD_V2
-	bool "NFS server support for NFS version 2 (DEPRECATED)"
-	depends on NFSD
-	default n
-	help
-	  NFSv2 (RFC 1094) was the first publicly-released version of NFS.
-	  Unless you are hosting ancient (1990's era) NFS clients, you don't
-	  need this.
-
-	  If unsure, say N.
-
 config NFSD_V2_ACL
-	bool "NFS server support for the NFSv2 ACL protocol extension"
-	depends on NFSD_V2
+	bool
+	depends on NFSD
+
+config NFSD_V3
+	bool "NFS server support for NFS version 3"
+	depends on NFSD
+	help
+	  This option enables support in your system's NFS server for
+	  version 3 of the NFS protocol (RFC 1813).
+
+	  If unsure, say Y.
 
 config NFSD_V3_ACL
 	bool "NFS server support for the NFSv3 ACL protocol extension"
-	depends on NFSD
+	depends on NFSD_V3
+	select NFSD_V2_ACL
 	help
 	  Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
 	  never became an official part of the NFS version 3 protocol.
@@ -72,13 +70,13 @@ config NFSD_V3_ACL
 config NFSD_V4
 	bool "NFS server support for NFS version 4"
 	depends on NFSD && PROC_FS
+	select NFSD_V3
 	select FS_POSIX_ACL
 	select SUNRPC_GSS
 	select CRYPTO
 	select CRYPTO_MD5
 	select CRYPTO_SHA256
 	select GRACE_PERIOD
-	select NFS_V4_2_SSC_HELPER if NFS_V4_2
 	help
 	  This option enables support in your system's NFS server for
 	  version 4 of the NFS protocol (RFC 3530).
@@ -100,7 +98,7 @@ config NFSD_BLOCKLAYOUT
 	help
 	  This option enables support for the exporting pNFS block layouts
 	  in the kernel's NFS server. The pNFS block layout enables NFS
-	  clients to directly perform I/O to block devices accessible to both
+	  clients to directly perform I/O to block devices accesible to both
 	  the server and the clients.  See RFC 5663 for more details.
 
 	  If unsure, say N.
@@ -114,7 +112,7 @@ config NFSD_SCSILAYOUT
 	help
 	  This option enables support for the exporting pNFS SCSI layouts
 	  in the kernel's NFS server. The pNFS SCSI layout enables NFS
-	  clients to directly perform I/O to SCSI devices accessible to both
+	  clients to directly perform I/O to SCSI devices accesible to both
 	  the server and the clients.  See draft-ietf-nfsv4-scsi-layout for
 	  more details.
 
@@ -128,7 +126,7 @@ config NFSD_FLEXFILELAYOUT
 	  This option enables support for the exporting pNFS Flex File
 	  layouts in the kernel's NFS server. The pNFS Flex File  layout
 	  enables NFS clients to directly perform I/O to NFSv3 devices
-	  accessible to both the server and the clients.  See
+	  accesible to both the server and the clients.  See
 	  draft-ietf-nfsv4-flex-files for more details.
 
 	  Warning, this server implements the bare minimum functionality
@@ -139,7 +137,7 @@ config NFSD_FLEXFILELAYOUT
 
 config NFSD_V4_2_INTER_SSC
 	bool "NFSv4.2 inter server to server COPY"
-	depends on NFSD_V4 && NFS_V4_2
+	depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2
 	help
 	  This option enables support for NFSv4.2 inter server to
 	  server copy where the destination server calls the NFSv4.2
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 6fffc8f03f74..3f0983e93a99 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,11 +10,11 @@ obj-$(CONFIG_NFSD)	+= nfsd.o
 # this one should be compiled first, as the tracing macros can easily blow up
 nfsd-y			+= trace.o
 
-nfsd-y 			+= nfssvc.o nfsctl.o nfsfh.o vfs.o \
-			   export.o auth.o lockd.o nfscache.o \
-			   stats.o filecache.o nfs3proc.o nfs3xdr.o
-nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o
+nfsd-y 			+= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
+			   export.o auth.o lockd.o nfscache.o nfsxdr.o \
+			   stats.o filecache.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
+nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
 			   nfs4acl.o nfs4callback.o nfs4recover.o
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 4b7324458a94..ba14d2f4b64f 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -38,8 +38,6 @@
 struct nfs4_acl;
 struct svc_fh;
 struct svc_rqst;
-struct nfsd_attrs;
-enum nfs_ftype4;
 
 int nfs4_acl_bytes(int entries);
 int nfs4_acl_get_whotype(char *, u32);
@@ -47,7 +45,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
 
 int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
 		struct nfs4_acl **acl);
-__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl,
-			 struct nfsd_attrs *attr);
+__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct nfs4_acl *acl);
 
 #endif /* LINUX_NFS4_ACL_H */
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index d91a686d2f31..a07c39c94bbd 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -16,7 +16,6 @@
 #include "blocklayoutxdr.h"
 #include "pnfs.h"
 #include "filecache.h"
-#include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_PNFS
 
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 1ed2f691ebb9..2455dc8be18a 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -9,7 +9,6 @@
 
 #include "nfsd.h"
 #include "blocklayoutxdr.h"
-#include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_PNFS
 
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index f21259ead64b..65c331f75e9c 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -84,6 +84,6 @@ int	nfsd_reply_cache_init(struct nfsd_net *);
 void	nfsd_reply_cache_shutdown(struct nfsd_net *);
 int	nfsd_cache_lookup(struct svc_rqst *);
 void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
-int	nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
+int	nfsd_reply_cache_stats_open(struct inode *, struct file *);
 
 #endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7c863f2c21e0..21e404e7cb68 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -331,29 +331,12 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
 	fsloc->locations = NULL;
 }
 
-static int export_stats_init(struct export_stats *stats)
-{
-	stats->start_time = ktime_get_seconds();
-	return nfsd_percpu_counters_init(stats->counter, EXP_STATS_COUNTERS_NUM);
-}
-
-static void export_stats_reset(struct export_stats *stats)
-{
-	nfsd_percpu_counters_reset(stats->counter, EXP_STATS_COUNTERS_NUM);
-}
-
-static void export_stats_destroy(struct export_stats *stats)
-{
-	nfsd_percpu_counters_destroy(stats->counter, EXP_STATS_COUNTERS_NUM);
-}
-
 static void svc_export_put(struct kref *ref)
 {
 	struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
 	path_put(&exp->ex_path);
 	auth_domain_put(exp->ex_client);
 	nfsd4_fslocs_free(&exp->ex_fslocs);
-	export_stats_destroy(&exp->ex_stats);
 	kfree(exp->ex_uuid);
 	kfree_rcu(exp, ex_rcu);
 }
@@ -425,12 +408,6 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
 		return -EINVAL;
 	}
 
-	if (inode->i_sb->s_export_op->flags & EXPORT_OP_NOSUBTREECHK &&
-	    !(*flags & NFSEXP_NOSUBTREECHECK)) {
-		dprintk("%s: %s does not support subtree checking!\n",
-			__func__, inode->i_sb->s_type->name);
-		return -EINVAL;
-	}
 	return 0;
 
 }
@@ -709,47 +686,22 @@ static void exp_flags(struct seq_file *m, int flag, int fsid,
 		kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs);
 static void show_secinfo(struct seq_file *m, struct svc_export *exp);
 
-static int is_export_stats_file(struct seq_file *m)
-{
-	/*
-	 * The export_stats file uses the same ops as the exports file.
-	 * We use the file's name to determine the reported info per export.
-	 * There is no rename in nsfdfs, so d_name.name is stable.
-	 */
-	return !strcmp(m->file->f_path.dentry->d_name.name, "export_stats");
-}
-
 static int svc_export_show(struct seq_file *m,
 			   struct cache_detail *cd,
 			   struct cache_head *h)
 {
-	struct svc_export *exp;
-	bool export_stats = is_export_stats_file(m);
+	struct svc_export *exp ;
 
-	if (h == NULL) {
-		if (export_stats)
-			seq_puts(m, "#path domain start-time\n#\tstats\n");
-		else
-			seq_puts(m, "#path domain(flags)\n");
+	if (h ==NULL) {
+		seq_puts(m, "#path domain(flags)\n");
 		return 0;
 	}
 	exp = container_of(h, struct svc_export, h);
 	seq_path(m, &exp->ex_path, " \t\n\\");
 	seq_putc(m, '\t');
 	seq_escape(m, exp->ex_client->name, " \t\n\\");
-	if (export_stats) {
-		seq_printf(m, "\t%lld\n", exp->ex_stats.start_time);
-		seq_printf(m, "\tfh_stale: %lld\n",
-			   percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_FH_STALE]));
-		seq_printf(m, "\tio_read: %lld\n",
-			   percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_READ]));
-		seq_printf(m, "\tio_write: %lld\n",
-			   percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_WRITE]));
-		seq_putc(m, '\n');
-		return 0;
-	}
 	seq_putc(m, '(');
-	if (test_bit(CACHE_VALID, &h->flags) &&
+	if (test_bit(CACHE_VALID, &h->flags) && 
 	    !test_bit(CACHE_NEGATIVE, &h->flags)) {
 		exp_flags(m, exp->ex_flags, exp->ex_fsid,
 			  exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
@@ -790,7 +742,6 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 	new->ex_layout_types = 0;
 	new->ex_uuid = NULL;
 	new->cd = item->cd;
-	export_stats_reset(&new->ex_stats);
 }
 
 static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -823,15 +774,10 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
 static struct cache_head *svc_export_alloc(void)
 {
 	struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL);
-	if (!i)
+	if (i)
+		return &i->h;
+	else
 		return NULL;
-
-	if (export_stats_init(&i->ex_stats)) {
-		kfree(i);
-		return NULL;
-	}
-
-	return &i->h;
 }
 
 static const struct cache_detail svc_export_cache_template = {
@@ -1293,14 +1239,10 @@ static int e_show(struct seq_file *m, void *p)
 	struct cache_head *cp = p;
 	struct svc_export *exp = container_of(cp, struct svc_export, h);
 	struct cache_detail *cd = m->private;
-	bool export_stats = is_export_stats_file(m);
 
 	if (p == SEQ_START_TOKEN) {
 		seq_puts(m, "# Version 1.1\n");
-		if (export_stats)
-			seq_puts(m, "# Path Client Start-time\n#\tStats\n");
-		else
-			seq_puts(m, "# Path Client(Flags) # IPs\n");
+		seq_puts(m, "# Path Client(Flags) # IPs\n");
 		return 0;
 	}
 
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index d03f7f6a8642..e7daa1f246f0 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -6,7 +6,6 @@
 #define NFSD_EXPORT_H
 
 #include <linux/sunrpc/cache.h>
-#include <linux/percpu_counter.h>
 #include <uapi/linux/nfsd/export.h>
 #include <linux/nfs4.h>
 
@@ -47,19 +46,6 @@ struct exp_flavor_info {
 	u32	flags;
 };
 
-/* Per-export stats */
-enum {
-	EXP_STATS_FH_STALE,
-	EXP_STATS_IO_READ,
-	EXP_STATS_IO_WRITE,
-	EXP_STATS_COUNTERS_NUM
-};
-
-struct export_stats {
-	time64_t		start_time;
-	struct percpu_counter	counter[EXP_STATS_COUNTERS_NUM];
-};
-
 struct svc_export {
 	struct cache_head	h;
 	struct auth_domain *	ex_client;
@@ -76,7 +62,6 @@ struct svc_export {
 	struct nfsd4_deviceid_map *ex_devid_map;
 	struct cache_detail	*cd;
 	struct rcu_head		ex_rcu;
-	struct export_stats	ex_stats;
 };
 
 /* an "export key" (expkey) maps a filehandlefragement to an
@@ -115,6 +100,7 @@ struct svc_export *	rqst_find_fsidzero_export(struct svc_rqst *);
 int			exp_rootfh(struct net *, struct auth_domain *,
 					char *path, struct knfsd_fh *, int maxsize);
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
+__be32			nfserrno(int errno);
 
 static inline void exp_put(struct svc_export *exp)
 {
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 615ea8324911..e30e1ddc1ace 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -1,32 +1,7 @@
-// SPDX-License-Identifier: GPL-2.0
 /*
- * The NFSD open file cache.
+ * Open file cache.
  *
  * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
- *
- * An nfsd_file object is a per-file collection of open state that binds
- * together:
- *   - a struct file *
- *   - a user credential
- *   - a network namespace
- *   - a read-ahead context
- *   - monitoring for writeback errors
- *
- * nfsd_file objects are reference-counted. Consumers acquire a new
- * object via the nfsd_file_acquire API. They manage their interest in
- * the acquired object, and hence the object's reference count, via
- * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file
- * object:
- *
- *  * non-garbage-collected: When a consumer wants to precisely control
- *    the lifetime of a file's open state, it acquires a non-garbage-
- *    collected nfsd_file. The final nfsd_file_put releases the open
- *    state immediately.
- *
- *  * garbage-collected: When a consumer does not control the lifetime
- *    of open state, it acquires a garbage-collected nfsd_file. The
- *    final nfsd_file_put allows the open state to linger for a period
- *    during which it may be re-used.
  */
 
 #include <linux/hash.h>
@@ -37,7 +12,6 @@
 #include <linux/fsnotify_backend.h>
 #include <linux/fsnotify.h>
 #include <linux/seq_file.h>
-#include <linux/rhashtable.h>
 
 #include "vfs.h"
 #include "nfsd.h"
@@ -46,75 +20,63 @@
 #include "filecache.h"
 #include "trace.h"
 
+#define NFSDDBG_FACILITY	NFSDDBG_FH
+
+/* FIXME: dynamically size this for the machine somehow? */
+#define NFSD_FILE_HASH_BITS                   12
+#define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
 #define NFSD_LAUNDRETTE_DELAY		     (2 * HZ)
 
-#define NFSD_FILE_CACHE_UP		     (0)
+#define NFSD_FILE_SHUTDOWN		     (1)
+#define NFSD_FILE_LRU_THRESHOLD		     (4096UL)
+#define NFSD_FILE_LRU_LIMIT		     (NFSD_FILE_LRU_THRESHOLD << 2)
 
 /* We only care about NFSD_MAY_READ/WRITE for this cache */
 #define NFSD_FILE_MAY_MASK	(NFSD_MAY_READ|NFSD_MAY_WRITE)
 
+struct nfsd_fcache_bucket {
+	struct hlist_head	nfb_head;
+	spinlock_t		nfb_lock;
+	unsigned int		nfb_count;
+	unsigned int		nfb_maxcount;
+};
+
 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
-static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
-static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
-static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
-static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
 
 struct nfsd_fcache_disposal {
+	struct list_head list;
 	struct work_struct work;
+	struct net *net;
 	spinlock_t lock;
 	struct list_head freeme;
+	struct rcu_head rcu;
 };
 
 static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
 
 static struct kmem_cache		*nfsd_file_slab;
 static struct kmem_cache		*nfsd_file_mark_slab;
+static struct nfsd_fcache_bucket	*nfsd_file_hashtbl;
 static struct list_lru			nfsd_file_lru;
-static unsigned long			nfsd_file_flags;
+static long				nfsd_file_lru_flags;
 static struct fsnotify_group		*nfsd_file_fsnotify_group;
+static atomic_long_t			nfsd_filecache_count;
 static struct delayed_work		nfsd_filecache_laundrette;
-static struct rhltable			nfsd_file_rhltable
-						____cacheline_aligned_in_smp;
+static DEFINE_SPINLOCK(laundrette_lock);
+static LIST_HEAD(laundrettes);
 
-static bool
-nfsd_match_cred(const struct cred *c1, const struct cred *c2)
-{
-	int i;
-
-	if (!uid_eq(c1->fsuid, c2->fsuid))
-		return false;
-	if (!gid_eq(c1->fsgid, c2->fsgid))
-		return false;
-	if (c1->group_info == NULL || c2->group_info == NULL)
-		return c1->group_info == c2->group_info;
-	if (c1->group_info->ngroups != c2->group_info->ngroups)
-		return false;
-	for (i = 0; i < c1->group_info->ngroups; i++) {
-		if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
-			return false;
-	}
-	return true;
-}
-
-static const struct rhashtable_params nfsd_file_rhash_params = {
-	.key_len		= sizeof_field(struct nfsd_file, nf_inode),
-	.key_offset		= offsetof(struct nfsd_file, nf_inode),
-	.head_offset		= offsetof(struct nfsd_file, nf_rlist),
-
-	/*
-	 * Start with a single page hash table to reduce resizing churn
-	 * on light workloads.
-	 */
-	.min_size		= 256,
-	.automatic_shrinking	= true,
-};
+static void nfsd_file_gc(void);
 
 static void
 nfsd_file_schedule_laundrette(void)
 {
-	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags))
-		queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
-				   NFSD_LAUNDRETTE_DELAY);
+	long count = atomic_long_read(&nfsd_filecache_count);
+
+	if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
+		return;
+
+	queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+			NFSD_LAUNDRETTE_DELAY);
 }
 
 static void
@@ -153,21 +115,22 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm)
 }
 
 static struct nfsd_file_mark *
-nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
+nfsd_file_mark_find_or_create(struct nfsd_file *nf)
 {
 	int			err;
 	struct fsnotify_mark	*mark;
 	struct nfsd_file_mark	*nfm = NULL, *new;
+	struct inode *inode = nf->nf_inode;
 
 	do {
-		fsnotify_group_lock(nfsd_file_fsnotify_group);
+		mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
 		mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
-					  nfsd_file_fsnotify_group);
+				nfsd_file_fsnotify_group);
 		if (mark) {
 			nfm = nfsd_file_mark_get(container_of(mark,
 						 struct nfsd_file_mark,
 						 nfm_mark));
-			fsnotify_group_unlock(nfsd_file_fsnotify_group);
+			mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
 			if (nfm) {
 				fsnotify_put_mark(mark);
 				break;
@@ -175,9 +138,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
 			/* Avoid soft lockup race with nfsd_file_mark_put() */
 			fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
 			fsnotify_put_mark(mark);
-		} else {
-			fsnotify_group_unlock(nfsd_file_fsnotify_group);
-		}
+		} else
+			mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
 
 		/* allocate a new nfm */
 		new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
@@ -208,91 +170,51 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
 }
 
 static struct nfsd_file *
-nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
-		bool want_gc)
+nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+		struct net *net)
 {
 	struct nfsd_file *nf;
 
 	nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
-	if (unlikely(!nf))
-		return NULL;
-
-	INIT_LIST_HEAD(&nf->nf_lru);
-	nf->nf_birthtime = ktime_get();
-	nf->nf_file = NULL;
-	nf->nf_cred = get_current_cred();
-	nf->nf_net = net;
-	nf->nf_flags = want_gc ?
-		BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
-		BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
-	nf->nf_inode = inode;
-	refcount_set(&nf->nf_ref, 1);
-	nf->nf_may = need;
-	nf->nf_mark = NULL;
+	if (nf) {
+		INIT_HLIST_NODE(&nf->nf_node);
+		INIT_LIST_HEAD(&nf->nf_lru);
+		nf->nf_file = NULL;
+		nf->nf_cred = get_current_cred();
+		nf->nf_net = net;
+		nf->nf_flags = 0;
+		nf->nf_inode = inode;
+		nf->nf_hashval = hashval;
+		refcount_set(&nf->nf_ref, 1);
+		nf->nf_may = may & NFSD_FILE_MAY_MASK;
+		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
+			if (may & NFSD_MAY_WRITE)
+				__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
+			if (may & NFSD_MAY_READ)
+				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+		}
+		nf->nf_mark = NULL;
+		trace_nfsd_file_alloc(nf);
+	}
 	return nf;
 }
 
-/**
- * nfsd_file_check_write_error - check for writeback errors on a file
- * @nf: nfsd_file to check for writeback errors
- *
- * Check whether a nfsd_file has an unseen error. Reset the write
- * verifier if so.
- */
-static void
-nfsd_file_check_write_error(struct nfsd_file *nf)
-{
-	struct file *file = nf->nf_file;
-
-	if ((file->f_mode & FMODE_WRITE) &&
-	    filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
-		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
-}
-
-static void
-nfsd_file_hash_remove(struct nfsd_file *nf)
-{
-	trace_nfsd_file_unhash(nf);
-	rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
-			nfsd_file_rhash_params);
-}
-
 static bool
-nfsd_file_unhash(struct nfsd_file *nf)
-{
-	if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
-		nfsd_file_hash_remove(nf);
-		return true;
-	}
-	return false;
-}
-
-static void
 nfsd_file_free(struct nfsd_file *nf)
 {
-	s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
+	bool flush = false;
 
-	trace_nfsd_file_free(nf);
-
-	this_cpu_inc(nfsd_file_releases);
-	this_cpu_add(nfsd_file_total_age, age);
-
-	nfsd_file_unhash(nf);
+	trace_nfsd_file_put_final(nf);
 	if (nf->nf_mark)
 		nfsd_file_mark_put(nf->nf_mark);
 	if (nf->nf_file) {
-		nfsd_file_check_write_error(nf);
+		get_file(nf->nf_file);
 		filp_close(nf->nf_file, NULL);
+		fput(nf->nf_file);
+		flush = true;
 	}
-
-	/*
-	 * If this item is still linked via nf_lru, that's a bug.
-	 * WARN and leak it to preserve system stability.
-	 */
-	if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
-		return;
-
 	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+	return flush;
 }
 
 static bool
@@ -301,140 +223,191 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
 	struct file *file = nf->nf_file;
 	struct address_space *mapping;
 
-	/* File not open for write? */
-	if (!(file->f_mode & FMODE_WRITE))
+	if (!file || !(file->f_mode & FMODE_WRITE))
 		return false;
-
-	/*
-	 * Some filesystems (e.g. NFS) flush all dirty data on close.
-	 * On others, there is no need to wait for writeback.
-	 */
-	if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE))
-		return false;
-
 	mapping = file->f_mapping;
 	return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
 		mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
 }
 
-
-static bool nfsd_file_lru_add(struct nfsd_file *nf)
+static int
+nfsd_file_check_write_error(struct nfsd_file *nf)
 {
-	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
-	if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) {
-		trace_nfsd_file_lru_add(nf);
+	struct file *file = nf->nf_file;
+
+	if (!file || !(file->f_mode & FMODE_WRITE))
+		return 0;
+	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+}
+
+static void
+nfsd_file_do_unhash(struct nfsd_file *nf)
+{
+	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+	trace_nfsd_file_unhash(nf);
+
+	if (nfsd_file_check_write_error(nf))
+		nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
+	--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
+	hlist_del_rcu(&nf->nf_node);
+	atomic_long_dec(&nfsd_filecache_count);
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+	if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+		nfsd_file_do_unhash(nf);
+		if (!list_empty(&nf->nf_lru))
+			list_lru_del(&nfsd_file_lru, &nf->nf_lru);
 		return true;
 	}
 	return false;
 }
 
-static bool nfsd_file_lru_remove(struct nfsd_file *nf)
+/*
+ * Return true if the file was unhashed.
+ */
+static bool
+nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
 {
-	if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) {
-		trace_nfsd_file_lru_del(nf);
+	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+	trace_nfsd_file_unhash_and_release_locked(nf);
+	if (!nfsd_file_unhash(nf))
+		return false;
+	/* keep final reference for nfsd_file_lru_dispose */
+	if (refcount_dec_not_one(&nf->nf_ref))
 		return true;
+
+	list_add(&nf->nf_lru, dispose);
+	return true;
+}
+
+static void
+nfsd_file_put_noref(struct nfsd_file *nf)
+{
+	trace_nfsd_file_put(nf);
+
+	if (refcount_dec_and_test(&nf->nf_ref)) {
+		WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+		nfsd_file_free(nf);
 	}
-	return false;
+}
+
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+	bool is_hashed;
+
+	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+	if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) {
+		nfsd_file_put_noref(nf);
+		return;
+	}
+
+	filemap_flush(nf->nf_file->f_mapping);
+	is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+	nfsd_file_put_noref(nf);
+	if (is_hashed)
+		nfsd_file_schedule_laundrette();
+	if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+		nfsd_file_gc();
 }
 
 struct nfsd_file *
 nfsd_file_get(struct nfsd_file *nf)
 {
-	if (nf && refcount_inc_not_zero(&nf->nf_ref))
+	if (likely(refcount_inc_not_zero(&nf->nf_ref)))
 		return nf;
 	return NULL;
 }
 
-/**
- * nfsd_file_put - put the reference to a nfsd_file
- * @nf: nfsd_file of which to put the reference
- *
- * Put a reference to a nfsd_file. In the non-GC case, we just put the
- * reference immediately. In the GC case, if the reference would be
- * the last one, the put it on the LRU instead to be cleaned up later.
- */
-void
-nfsd_file_put(struct nfsd_file *nf)
-{
-	might_sleep();
-	trace_nfsd_file_put(nf);
-
-	if (test_bit(NFSD_FILE_GC, &nf->nf_flags) &&
-	    test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
-		/*
-		 * If this is the last reference (nf_ref == 1), then try to
-		 * transfer it to the LRU.
-		 */
-		if (refcount_dec_not_one(&nf->nf_ref))
-			return;
-
-		/* Try to add it to the LRU.  If that fails, decrement. */
-		if (nfsd_file_lru_add(nf)) {
-			/* If it's still hashed, we're done */
-			if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
-				nfsd_file_schedule_laundrette();
-				return;
-			}
-
-			/*
-			 * We're racing with unhashing, so try to remove it from
-			 * the LRU. If removal fails, then someone else already
-			 * has our reference.
-			 */
-			if (!nfsd_file_lru_remove(nf))
-				return;
-		}
-	}
-	if (refcount_dec_and_test(&nf->nf_ref))
-		nfsd_file_free(nf);
-}
-
 static void
 nfsd_file_dispose_list(struct list_head *dispose)
 {
 	struct nfsd_file *nf;
 
-	while (!list_empty(dispose)) {
+	while(!list_empty(dispose)) {
 		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
-		list_del_init(&nf->nf_lru);
-		nfsd_file_free(nf);
+		list_del(&nf->nf_lru);
+		nfsd_file_put_noref(nf);
+	}
+}
+
+static void
+nfsd_file_dispose_list_sync(struct list_head *dispose)
+{
+	bool flush = false;
+	struct nfsd_file *nf;
+
+	while(!list_empty(dispose)) {
+		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+		list_del(&nf->nf_lru);
+		if (!refcount_dec_and_test(&nf->nf_ref))
+			continue;
+		if (nfsd_file_free(nf))
+			flush = true;
+	}
+	if (flush)
+		flush_delayed_fput();
+}
+
+static void
+nfsd_file_list_remove_disposal(struct list_head *dst,
+		struct nfsd_fcache_disposal *l)
+{
+	spin_lock(&l->lock);
+	list_splice_init(&l->freeme, dst);
+	spin_unlock(&l->lock);
+}
+
+static void
+nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+{
+	struct nfsd_fcache_disposal *l;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(l, &laundrettes, list) {
+		if (l->net == net) {
+			spin_lock(&l->lock);
+			list_splice_tail_init(files, &l->freeme);
+			spin_unlock(&l->lock);
+			queue_work(nfsd_filecache_wq, &l->work);
+			break;
+		}
+	}
+	rcu_read_unlock();
+}
+
+static void
+nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
+		struct net *net)
+{
+	struct nfsd_file *nf, *tmp;
+
+	list_for_each_entry_safe(nf, tmp, src, nf_lru) {
+		if (nf->nf_net == net)
+			list_move_tail(&nf->nf_lru, dst);
 	}
 }
 
-/**
- * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list
- * @dispose: list of nfsd_files to be disposed
- *
- * Transfers each file to the "freeme" list for its nfsd_net, to eventually
- * be disposed of by the per-net garbage collector.
- */
 static void
 nfsd_file_dispose_list_delayed(struct list_head *dispose)
 {
-	while(!list_empty(dispose)) {
-		struct nfsd_file *nf = list_first_entry(dispose,
-						struct nfsd_file, nf_lru);
-		struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
-		struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+	LIST_HEAD(list);
+	struct nfsd_file *nf;
 
-		spin_lock(&l->lock);
-		list_move_tail(&nf->nf_lru, &l->freeme);
-		spin_unlock(&l->lock);
-		queue_work(nfsd_filecache_wq, &l->work);
+	while(!list_empty(dispose)) {
+		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+		nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
+		nfsd_file_list_add_disposal(&list, nf->nf_net);
 	}
 }
 
-/**
- * nfsd_file_lru_cb - Examine an entry on the LRU list
- * @item: LRU entry to examine
- * @lru: controlling LRU
- * @lock: LRU list lock (unused)
- * @arg: dispose list
- *
- * Return values:
- *   %LRU_REMOVED: @item was removed from the LRU
- *   %LRU_ROTATE: @item is to be moved to the LRU tail
- *   %LRU_SKIP: @item cannot be evicted
+/*
+ * Note this can deadlock with nfsd_file_cache_purge.
  */
 static enum lru_status
 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
@@ -445,60 +418,72 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
 	struct list_head *head = arg;
 	struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
 
-	/* We should only be dealing with GC entries here */
-	WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags));
+	/*
+	 * Do a lockless refcount check. The hashtable holds one reference, so
+	 * we look to see if anything else has a reference, or if any have
+	 * been put since the shrinker last ran. Those don't get unhashed and
+	 * released.
+	 *
+	 * Note that in the put path, we set the flag and then decrement the
+	 * counter. Here we check the counter and then test and clear the flag.
+	 * That order is deliberate to ensure that we can do this locklessly.
+	 */
+	if (refcount_read(&nf->nf_ref) > 1)
+		goto out_skip;
 
 	/*
 	 * Don't throw out files that are still undergoing I/O or
 	 * that have uncleared errors pending.
 	 */
-	if (nfsd_file_check_writeback(nf)) {
-		trace_nfsd_file_gc_writeback(nf);
-		return LRU_SKIP;
-	}
+	if (nfsd_file_check_writeback(nf))
+		goto out_skip;
 
-	/* If it was recently added to the list, skip it */
-	if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
-		trace_nfsd_file_gc_referenced(nf);
-		return LRU_ROTATE;
-	}
+	if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
+		goto out_skip;
 
-	/*
-	 * Put the reference held on behalf of the LRU. If it wasn't the last
-	 * one, then just remove it from the LRU and ignore it.
-	 */
-	if (!refcount_dec_and_test(&nf->nf_ref)) {
-		trace_nfsd_file_gc_in_use(nf);
-		list_lru_isolate(lru, &nf->nf_lru);
-		return LRU_REMOVED;
-	}
+	if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+		goto out_skip;
 
-	/* Refcount went to zero. Unhash it and queue it to the dispose list */
-	nfsd_file_unhash(nf);
 	list_lru_isolate_move(lru, &nf->nf_lru, head);
-	this_cpu_inc(nfsd_file_evictions);
-	trace_nfsd_file_gc_disposed(nf);
 	return LRU_REMOVED;
+out_skip:
+	return LRU_SKIP;
+}
+
+static unsigned long
+nfsd_file_lru_walk_list(struct shrink_control *sc)
+{
+	LIST_HEAD(head);
+	struct nfsd_file *nf;
+	unsigned long ret;
+
+	if (sc)
+		ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+				nfsd_file_lru_cb, &head);
+	else
+		ret = list_lru_walk(&nfsd_file_lru,
+				nfsd_file_lru_cb,
+				&head, LONG_MAX);
+	list_for_each_entry(nf, &head, nf_lru) {
+		spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+		nfsd_file_do_unhash(nf);
+		spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+	}
+	nfsd_file_dispose_list_delayed(&head);
+	return ret;
 }
 
 static void
 nfsd_file_gc(void)
 {
-	LIST_HEAD(dispose);
-	unsigned long ret;
-
-	ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
-			    &dispose, list_lru_count(&nfsd_file_lru));
-	trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
-	nfsd_file_dispose_list_delayed(&dispose);
+	nfsd_file_lru_walk_list(NULL);
 }
 
 static void
 nfsd_file_gc_worker(struct work_struct *work)
 {
 	nfsd_file_gc();
-	if (list_lru_count(&nfsd_file_lru))
-		nfsd_file_schedule_laundrette();
+	nfsd_file_schedule_laundrette();
 }
 
 static unsigned long
@@ -510,14 +495,7 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
 static unsigned long
 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
 {
-	LIST_HEAD(dispose);
-	unsigned long ret;
-
-	ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
-				   nfsd_file_lru_cb, &dispose);
-	trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
-	nfsd_file_dispose_list_delayed(&dispose);
-	return ret;
+	return nfsd_file_lru_walk_list(sc);
 }
 
 static struct shrinker	nfsd_file_shrinker = {
@@ -526,123 +504,70 @@ static struct shrinker	nfsd_file_shrinker = {
 	.seeks = 1,
 };
 
-/**
- * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
- * @nf: nfsd_file to attempt to queue
- * @dispose: private list to queue successfully-put objects
- *
- * Unhash an nfsd_file, try to get a reference to it, and then put that
- * reference. If it's the last reference, queue it to the dispose list.
- */
 static void
-nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
-	__must_hold(RCU)
+__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
+			struct list_head *dispose)
 {
-	int decrement = 1;
+	struct nfsd_file	*nf;
+	struct hlist_node	*tmp;
 
-	/* If we raced with someone else unhashing, ignore it */
-	if (!nfsd_file_unhash(nf))
-		return;
-
-	/* If we can't get a reference, ignore it */
-	if (!nfsd_file_get(nf))
-		return;
-
-	/* Extra decrement if we remove from the LRU */
-	if (nfsd_file_lru_remove(nf))
-		++decrement;
-
-	/* If refcount goes to 0, then put on the dispose list */
-	if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
-		list_add(&nf->nf_lru, dispose);
-		trace_nfsd_file_closing(nf);
+	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+	hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+		if (inode == nf->nf_inode)
+			nfsd_file_unhash_and_release_locked(nf, dispose);
 	}
-}
-
-/**
- * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
- * @inode:   inode on which to close out nfsd_files
- * @dispose: list on which to gather nfsd_files to close out
- *
- * An nfsd_file represents a struct file being held open on behalf of nfsd.
- * An open file however can block other activity (such as leases), or cause
- * undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
- *
- * This function is intended to find open nfsd_files when this sort of
- * conflicting access occurs and then attempt to close those files out.
- *
- * Populates the dispose list with entries that have already had their
- * refcounts go to zero. The actual free of an nfsd_file can be expensive,
- * so we leave it up to the caller whether it wants to wait or not.
- */
-static void
-nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
-{
-	struct rhlist_head *tmp, *list;
-	struct nfsd_file *nf;
-
-	rcu_read_lock();
-	list = rhltable_lookup(&nfsd_file_rhltable, &inode,
-			       nfsd_file_rhash_params);
-	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
-		if (!test_bit(NFSD_FILE_GC, &nf->nf_flags))
-			continue;
-		nfsd_file_cond_queue(nf, dispose);
-	}
-	rcu_read_unlock();
-}
-
-/**
- * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
- * @inode: inode of the file to attempt to remove
- *
- * Close out any open nfsd_files that can be reaped for @inode. The
- * actual freeing is deferred to the dispose_list_delayed infrastructure.
- *
- * This is used by the fsnotify callbacks and setlease notifier.
- */
-static void
-nfsd_file_close_inode(struct inode *inode)
-{
-	LIST_HEAD(dispose);
-
-	nfsd_file_queue_for_close(inode, &dispose);
-	nfsd_file_dispose_list_delayed(&dispose);
+	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 }
 
 /**
  * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
  * @inode: inode of the file to attempt to remove
  *
- * Close out any open nfsd_files that can be reaped for @inode. The
- * nfsd_files are closed out synchronously.
- *
- * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames
- * when reexporting NFS.
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put. Also ensure that any of the
+ * fputs also have their final __fput done as well.
  */
 void
 nfsd_file_close_inode_sync(struct inode *inode)
 {
-	struct nfsd_file *nf;
+	unsigned int		hashval = (unsigned int)hash_long(inode->i_ino,
+						NFSD_FILE_HASH_BITS);
 	LIST_HEAD(dispose);
 
-	trace_nfsd_file_close(inode);
+	__nfsd_file_close_inode(inode, hashval, &dispose);
+	trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
+	nfsd_file_dispose_list_sync(&dispose);
+}
 
-	nfsd_file_queue_for_close(inode, &dispose);
-	while (!list_empty(&dispose)) {
-		nf = list_first_entry(&dispose, struct nfsd_file, nf_lru);
-		list_del_init(&nf->nf_lru);
-		nfsd_file_free(nf);
-	}
-	flush_delayed_fput();
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Walk the whole hash bucket, looking for any files that correspond to "inode".
+ * If any do, then unhash them and put the hashtable reference to them and
+ * destroy any that had their last reference put.
+ */
+static void
+nfsd_file_close_inode(struct inode *inode)
+{
+	unsigned int		hashval = (unsigned int)hash_long(inode->i_ino,
+						NFSD_FILE_HASH_BITS);
+	LIST_HEAD(dispose);
+
+	__nfsd_file_close_inode(inode, hashval, &dispose);
+	trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
+	nfsd_file_dispose_list_delayed(&dispose);
 }
 
 /**
  * nfsd_file_delayed_close - close unused nfsd_files
  * @work: dummy
  *
- * Scrape the freeme list for this nfsd_net, and then dispose of them
- * all.
+ * Walk the LRU list and close any entries that have not been used since
+ * the last scan.
+ *
+ * Note this can deadlock with nfsd_file_cache_purge.
  */
 static void
 nfsd_file_delayed_close(struct work_struct *work)
@@ -651,10 +576,7 @@ nfsd_file_delayed_close(struct work_struct *work)
 	struct nfsd_fcache_disposal *l = container_of(work,
 			struct nfsd_fcache_disposal, work);
 
-	spin_lock(&l->lock);
-	list_splice_init(&l->freeme, &head);
-	spin_unlock(&l->lock);
-
+	nfsd_file_list_remove_disposal(&head, l);
 	nfsd_file_dispose_list(&head);
 }
 
@@ -666,7 +588,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
 
 	/* Only close files for F_SETLEASE leases */
 	if (fl->fl_flags & FL_LEASE)
-		nfsd_file_close_inode(file_inode(fl->fl_file));
+		nfsd_file_close_inode_sync(file_inode(fl->fl_file));
 	return 0;
 }
 
@@ -679,9 +601,6 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
 				struct inode *inode, struct inode *dir,
 				const struct qstr *name, u32 cookie)
 {
-	if (WARN_ON_ONCE(!inode))
-		return 0;
-
 	trace_nfsd_file_fsnotify_handle_event(inode, mask);
 
 	/* Should be no marks on non-regular files */
@@ -709,21 +628,25 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
 int
 nfsd_file_cache_init(void)
 {
-	int ret;
+	int		ret = -ENOMEM;
+	unsigned int	i;
 
-	lockdep_assert_held(&nfsd_mutex);
-	if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+	clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+	if (nfsd_file_hashtbl)
 		return 0;
 
-	ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
-	if (ret)
-		return ret;
-
-	ret = -ENOMEM;
 	nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
 	if (!nfsd_filecache_wq)
 		goto out;
 
+	nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE,
+				sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+	if (!nfsd_file_hashtbl) {
+		pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+		goto out_err;
+	}
+
 	nfsd_file_slab = kmem_cache_create("nfsd_file",
 				sizeof(struct nfsd_file), 0, 0, NULL);
 	if (!nfsd_file_slab) {
@@ -757,16 +680,19 @@ nfsd_file_cache_init(void)
 		goto out_shrinker;
 	}
 
-	nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
-							FSNOTIFY_GROUP_NOFS);
+	nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
 	if (IS_ERR(nfsd_file_fsnotify_group)) {
 		pr_err("nfsd: unable to create fsnotify group: %ld\n",
 			PTR_ERR(nfsd_file_fsnotify_group));
-		ret = PTR_ERR(nfsd_file_fsnotify_group);
 		nfsd_file_fsnotify_group = NULL;
 		goto out_notifier;
 	}
 
+	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+		INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+		spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+	}
+
 	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
 out:
 	return ret;
@@ -781,47 +707,50 @@ out_err:
 	nfsd_file_slab = NULL;
 	kmem_cache_destroy(nfsd_file_mark_slab);
 	nfsd_file_mark_slab = NULL;
+	kvfree(nfsd_file_hashtbl);
+	nfsd_file_hashtbl = NULL;
 	destroy_workqueue(nfsd_filecache_wq);
 	nfsd_filecache_wq = NULL;
-	rhltable_destroy(&nfsd_file_rhltable);
 	goto out;
 }
 
-/**
- * __nfsd_file_cache_purge: clean out the cache for shutdown
- * @net: net-namespace to shut down the cache (may be NULL)
- *
- * Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
- * then close out everything. Called when an nfsd instance is being shut down,
- * and when the exports table is flushed.
+/*
+ * Note this can deadlock with nfsd_file_lru_cb.
  */
-static void
-__nfsd_file_cache_purge(struct net *net)
+void
+nfsd_file_cache_purge(struct net *net)
 {
-	struct rhashtable_iter iter;
-	struct nfsd_file *nf;
+	unsigned int		i;
+	struct nfsd_file	*nf;
+	struct hlist_node	*next;
 	LIST_HEAD(dispose);
+	bool del;
 
-	rhltable_walk_enter(&nfsd_file_rhltable, &iter);
-	do {
-		rhashtable_walk_start(&iter);
+	if (!nfsd_file_hashtbl)
+		return;
 
-		nf = rhashtable_walk_next(&iter);
-		while (!IS_ERR_OR_NULL(nf)) {
-			if (!net || nf->nf_net == net)
-				nfsd_file_cond_queue(nf, &dispose);
-			nf = rhashtable_walk_next(&iter);
+	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+		struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
+
+		spin_lock(&nfb->nfb_lock);
+		hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
+			if (net && nf->nf_net != net)
+				continue;
+			del = nfsd_file_unhash_and_release_locked(nf, &dispose);
+
+			/*
+			 * Deadlock detected! Something marked this entry as
+			 * unhased, but hasn't removed it from the hash list.
+			 */
+			WARN_ON_ONCE(!del);
 		}
-
-		rhashtable_walk_stop(&iter);
-	} while (nf == ERR_PTR(-EAGAIN));
-	rhashtable_walk_exit(&iter);
-
-	nfsd_file_dispose_list(&dispose);
+		spin_unlock(&nfb->nfb_lock);
+		nfsd_file_dispose_list(&dispose);
+	}
 }
 
 static struct nfsd_fcache_disposal *
-nfsd_alloc_fcache_disposal(void)
+nfsd_alloc_fcache_disposal(struct net *net)
 {
 	struct nfsd_fcache_disposal *l;
 
@@ -829,6 +758,7 @@ nfsd_alloc_fcache_disposal(void)
 	if (!l)
 		return NULL;
 	INIT_WORK(&l->work, nfsd_file_delayed_close);
+	l->net = net;
 	spin_lock_init(&l->lock);
 	INIT_LIST_HEAD(&l->freeme);
 	return l;
@@ -837,40 +767,61 @@ nfsd_alloc_fcache_disposal(void)
 static void
 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
 {
+	rcu_assign_pointer(l->net, NULL);
 	cancel_work_sync(&l->work);
 	nfsd_file_dispose_list(&l->freeme);
-	kfree(l);
+	kfree_rcu(l, rcu);
+}
+
+static void
+nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+	spin_lock(&laundrette_lock);
+	list_add_tail_rcu(&l->list, &laundrettes);
+	spin_unlock(&laundrette_lock);
+}
+
+static void
+nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+	spin_lock(&laundrette_lock);
+	list_del_rcu(&l->list);
+	spin_unlock(&laundrette_lock);
+}
+
+static int
+nfsd_alloc_fcache_disposal_net(struct net *net)
+{
+	struct nfsd_fcache_disposal *l;
+
+	l = nfsd_alloc_fcache_disposal(net);
+	if (!l)
+		return -ENOMEM;
+	nfsd_add_fcache_disposal(l);
+	return 0;
 }
 
 static void
 nfsd_free_fcache_disposal_net(struct net *net)
 {
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+	struct nfsd_fcache_disposal *l;
 
-	nfsd_free_fcache_disposal(l);
+	rcu_read_lock();
+	list_for_each_entry_rcu(l, &laundrettes, list) {
+		if (l->net != net)
+			continue;
+		nfsd_del_fcache_disposal(l);
+		rcu_read_unlock();
+		nfsd_free_fcache_disposal(l);
+		return;
+	}
+	rcu_read_unlock();
 }
 
 int
 nfsd_file_cache_start_net(struct net *net)
 {
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
-	nn->fcache_disposal = nfsd_alloc_fcache_disposal();
-	return nn->fcache_disposal ? 0 : -ENOMEM;
-}
-
-/**
- * nfsd_file_cache_purge - Remove all cache items associated with @net
- * @net: target net namespace
- *
- */
-void
-nfsd_file_cache_purge(struct net *net)
-{
-	lockdep_assert_held(&nfsd_mutex);
-	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
-		__nfsd_file_cache_purge(net);
+	return nfsd_alloc_fcache_disposal_net(net);
 }
 
 void
@@ -883,11 +834,7 @@ nfsd_file_cache_shutdown_net(struct net *net)
 void
 nfsd_file_cache_shutdown(void)
 {
-	int i;
-
-	lockdep_assert_held(&nfsd_mutex);
-	if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
-		return;
+	set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
 
 	lease_unregister_notifier(&nfsd_file_lease_notifier);
 	unregister_shrinker(&nfsd_file_shrinker);
@@ -896,7 +843,7 @@ nfsd_file_cache_shutdown(void)
 	 * calling nfsd_file_cache_purge
 	 */
 	cancel_delayed_work_sync(&nfsd_filecache_laundrette);
-	__nfsd_file_cache_purge(NULL);
+	nfsd_file_cache_purge(NULL);
 	list_lru_destroy(&nfsd_file_lru);
 	rcu_barrier();
 	fsnotify_put_group(nfsd_file_fsnotify_group);
@@ -906,332 +853,240 @@ nfsd_file_cache_shutdown(void)
 	fsnotify_wait_marks_destroyed();
 	kmem_cache_destroy(nfsd_file_mark_slab);
 	nfsd_file_mark_slab = NULL;
+	kvfree(nfsd_file_hashtbl);
+	nfsd_file_hashtbl = NULL;
 	destroy_workqueue(nfsd_filecache_wq);
 	nfsd_filecache_wq = NULL;
-	rhltable_destroy(&nfsd_file_rhltable);
+}
 
-	for_each_possible_cpu(i) {
-		per_cpu(nfsd_file_cache_hits, i) = 0;
-		per_cpu(nfsd_file_acquisitions, i) = 0;
-		per_cpu(nfsd_file_releases, i) = 0;
-		per_cpu(nfsd_file_total_age, i) = 0;
-		per_cpu(nfsd_file_evictions, i) = 0;
+static bool
+nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+{
+	int i;
+
+	if (!uid_eq(c1->fsuid, c2->fsuid))
+		return false;
+	if (!gid_eq(c1->fsgid, c2->fsgid))
+		return false;
+	if (c1->group_info == NULL || c2->group_info == NULL)
+		return c1->group_info == c2->group_info;
+	if (c1->group_info->ngroups != c2->group_info->ngroups)
+		return false;
+	for (i = 0; i < c1->group_info->ngroups; i++) {
+		if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+			return false;
 	}
+	return true;
 }
 
 static struct nfsd_file *
-nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
-			struct inode *inode, unsigned char need,
-			bool want_gc)
+nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+			unsigned int hashval, struct net *net)
 {
-	struct rhlist_head *tmp, *list;
 	struct nfsd_file *nf;
+	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
 
-	list = rhltable_lookup(&nfsd_file_rhltable, &inode,
-			       nfsd_file_rhash_params);
-	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
+	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+				 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) {
 		if (nf->nf_may != need)
 			continue;
+		if (nf->nf_inode != inode)
+			continue;
 		if (nf->nf_net != net)
 			continue;
-		if (!nfsd_match_cred(nf->nf_cred, cred))
+		if (!nfsd_match_cred(nf->nf_cred, current_cred()))
 			continue;
-		if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
+		if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
 			continue;
-		if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
-			continue;
-
-		if (!nfsd_file_get(nf))
-			continue;
-		return nf;
+		if (nfsd_file_get(nf) != NULL)
+			return nf;
 	}
 	return NULL;
 }
 
 /**
- * nfsd_file_is_cached - are there any cached open files for this inode?
- * @inode: inode to check
+ * nfsd_file_is_cached - are there any cached open files for this fh?
+ * @inode: inode of the file to check
  *
- * The lookup matches inodes in all net namespaces and is atomic wrt
- * nfsd_file_acquire().
- *
- * Return values:
- *   %true: filecache contains at least one file matching this inode
- *   %false: filecache contains no files matching this inode
+ * Scan the hashtable for open files that match this fh. Returns true if there
+ * are any, and false if not.
  */
 bool
 nfsd_file_is_cached(struct inode *inode)
 {
-	struct rhlist_head *tmp, *list;
-	struct nfsd_file *nf;
-	bool ret = false;
+	bool			ret = false;
+	struct nfsd_file	*nf;
+	unsigned int		hashval;
+
+        hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
 
 	rcu_read_lock();
-	list = rhltable_lookup(&nfsd_file_rhltable, &inode,
-			       nfsd_file_rhash_params);
-	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist)
-		if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
+	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+				 nf_node) {
+		if (inode == nf->nf_inode) {
 			ret = true;
 			break;
 		}
+	}
 	rcu_read_unlock();
-
-	trace_nfsd_file_is_cached(inode, (int)ret);
+	trace_nfsd_file_is_cached(inode, hashval, (int)ret);
 	return ret;
 }
 
-static __be32
-nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		     unsigned int may_flags, struct file *file,
-		     struct nfsd_file **pnf, bool want_gc)
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		  unsigned int may_flags, struct nfsd_file **pnf)
 {
-	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+	__be32	status;
 	struct net *net = SVC_NET(rqstp);
-	struct nfsd_file *new, *nf;
-	const struct cred *cred;
-	bool open_retry = true;
+	struct nfsd_file *nf, *new;
 	struct inode *inode;
-	__be32 status;
-	int ret;
+	unsigned int hashval;
+	bool retry = true;
 
+	/* FIXME: skip this if fh_dentry is already set? */
 	status = fh_verify(rqstp, fhp, S_IFREG,
 				may_flags|NFSD_MAY_OWNER_OVERRIDE);
 	if (status != nfs_ok)
 		return status;
-	inode = d_inode(fhp->fh_dentry);
-	cred = get_current_cred();
 
+	inode = d_inode(fhp->fh_dentry);
+	hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
 retry:
 	rcu_read_lock();
-	nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
+	nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
 	rcu_read_unlock();
-
-	if (nf) {
-		/*
-		 * If the nf is on the LRU then it holds an extra reference
-		 * that must be put if it's removed. It had better not be
-		 * the last one however, since we should hold another.
-		 */
-		if (nfsd_file_lru_remove(nf))
-			WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
+	if (nf)
 		goto wait_for_construction;
-	}
 
-	new = nfsd_file_alloc(net, inode, need, want_gc);
+	new = nfsd_file_alloc(inode, may_flags, hashval, net);
 	if (!new) {
-		status = nfserr_jukebox;
-		goto out;
+		trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
+					NULL, nfserr_jukebox);
+		return nfserr_jukebox;
 	}
 
-	rcu_read_lock();
-	spin_lock(&inode->i_lock);
-	nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
-	if (unlikely(nf)) {
-		spin_unlock(&inode->i_lock);
-		rcu_read_unlock();
-		nfsd_file_slab_free(&new->nf_rcu);
-		goto wait_for_construction;
-	}
-	nf = new;
-	ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
-			      nfsd_file_rhash_params);
-	spin_unlock(&inode->i_lock);
-	rcu_read_unlock();
-	if (likely(ret == 0))
+	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+	nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+	if (nf == NULL)
 		goto open_file;
-
-	if (ret == -EEXIST)
-		goto retry;
-	trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
-	status = nfserr_jukebox;
-	goto construction_err;
+	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+	nfsd_file_slab_free(&new->nf_rcu);
 
 wait_for_construction:
 	wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
 
 	/* Did construction of this file fail? */
 	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
-		trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
-		if (!open_retry) {
+		if (!retry) {
 			status = nfserr_jukebox;
-			goto construction_err;
+			goto out;
 		}
-		open_retry = false;
+		retry = false;
+		nfsd_file_put_noref(nf);
 		goto retry;
 	}
+
 	this_cpu_inc(nfsd_file_cache_hits);
 
-	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
-	if (status != nfs_ok) {
+	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
+		bool write = (may_flags & NFSD_MAY_WRITE);
+
+		if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
+		    (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
+			status = nfserrno(nfsd_open_break_lease(
+					file_inode(nf->nf_file), may_flags));
+			if (status == nfs_ok) {
+				clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
+				if (write)
+					clear_bit(NFSD_FILE_BREAK_WRITE,
+						  &nf->nf_flags);
+			}
+		}
+	}
+out:
+	if (status == nfs_ok) {
+		*pnf = nf;
+	} else {
 		nfsd_file_put(nf);
 		nf = NULL;
 	}
 
-out:
-	if (status == nfs_ok) {
-		this_cpu_inc(nfsd_file_acquisitions);
-		nfsd_file_check_write_error(nf);
-		*pnf = nf;
-	}
-	put_cred(cred);
-	trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
+	trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
 	return status;
-
 open_file:
-	trace_nfsd_file_alloc(nf);
-	nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
-	if (nf->nf_mark) {
-		if (file) {
-			get_file(file);
-			nf->nf_file = file;
-			status = nfs_ok;
-			trace_nfsd_file_opened(nf, status);
-		} else {
-			status = nfsd_open_verified(rqstp, fhp, may_flags,
-						    &nf->nf_file);
-			trace_nfsd_file_open(nf, status);
-		}
-	} else
+	nf = new;
+	/* Take reference for the hashtable */
+	refcount_inc(&nf->nf_ref);
+	__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+	__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+	list_lru_add(&nfsd_file_lru, &nf->nf_lru);
+	hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+	++nfsd_file_hashtbl[hashval].nfb_count;
+	nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+			nfsd_file_hashtbl[hashval].nfb_count);
+	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+	if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
+		nfsd_file_gc();
+
+	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+	if (nf->nf_mark)
+		status = nfsd_open_verified(rqstp, fhp, S_IFREG,
+				may_flags, &nf->nf_file);
+	else
 		status = nfserr_jukebox;
 	/*
 	 * If construction failed, or we raced with a call to unlink()
 	 * then unhash.
 	 */
-	if (status != nfs_ok || inode->i_nlink == 0)
-		nfsd_file_unhash(nf);
-	clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
-	if (status == nfs_ok)
-		goto out;
-
-construction_err:
-	if (refcount_dec_and_test(&nf->nf_ref))
-		nfsd_file_free(nf);
-	nf = NULL;
+	if (status != nfs_ok || inode->i_nlink == 0) {
+		bool do_free;
+		spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+		do_free = nfsd_file_unhash(nf);
+		spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+		if (do_free)
+			nfsd_file_put_noref(nf);
+	}
+	clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+	smp_mb__after_atomic();
+	wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
 	goto out;
 }
 
-/**
- * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file
- * @rqstp: the RPC transaction being executed
- * @fhp: the NFS filehandle of the file to be opened
- * @may_flags: NFSD_MAY_ settings for the file
- * @pnf: OUT: new or found "struct nfsd_file" object
- *
- * The nfsd_file object returned by this API is reference-counted
- * and garbage-collected. The object is retained for a few
- * seconds after the final nfsd_file_put() in case the caller
- * wants to re-use it.
- *
- * Return values:
- *   %nfs_ok - @pnf points to an nfsd_file with its reference
- *   count boosted.
- *
- * On error, an nfsstat value in network byte order is returned.
- */
-__be32
-nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		     unsigned int may_flags, struct nfsd_file **pnf)
-{
-	return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true);
-}
-
-/**
- * nfsd_file_acquire - Get a struct nfsd_file with an open file
- * @rqstp: the RPC transaction being executed
- * @fhp: the NFS filehandle of the file to be opened
- * @may_flags: NFSD_MAY_ settings for the file
- * @pnf: OUT: new or found "struct nfsd_file" object
- *
- * The nfsd_file_object returned by this API is reference-counted
- * but not garbage-collected. The object is unhashed after the
- * final nfsd_file_put().
- *
- * Return values:
- *   %nfs_ok - @pnf points to an nfsd_file with its reference
- *   count boosted.
- *
- * On error, an nfsstat value in network byte order is returned.
- */
-__be32
-nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		  unsigned int may_flags, struct nfsd_file **pnf)
-{
-	return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false);
-}
-
-/**
- * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
- * @rqstp: the RPC transaction being executed
- * @fhp: the NFS filehandle of the file just created
- * @may_flags: NFSD_MAY_ settings for the file
- * @file: cached, already-open file (may be NULL)
- * @pnf: OUT: new or found "struct nfsd_file" object
- *
- * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
- * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
- * opening a new one.
- *
- * Return values:
- *   %nfs_ok - @pnf points to an nfsd_file with its reference
- *   count boosted.
- *
- * On error, an nfsstat value in network byte order is returned.
- */
-__be32
-nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
-			 unsigned int may_flags, struct file *file,
-			 struct nfsd_file **pnf)
-{
-	return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false);
-}
-
 /*
  * Note that fields may be added, removed or reordered in the future. Programs
  * scraping this file for info should test the labels to ensure they're
  * getting the correct field.
  */
-int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
 {
-	unsigned long releases = 0, evictions = 0;
-	unsigned long hits = 0, acquisitions = 0;
-	unsigned int i, count = 0, buckets = 0;
-	unsigned long lru = 0, total_age = 0;
+	unsigned int i, count = 0, longest = 0;
+	unsigned long hits = 0;
 
-	/* Serialize with server shutdown */
+	/*
+	 * No need for spinlocks here since we're not terribly interested in
+	 * accuracy. We do take the nfsd_mutex simply to ensure that we
+	 * don't end up racing with server shutdown
+	 */
 	mutex_lock(&nfsd_mutex);
-	if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) {
-		struct bucket_table *tbl;
-		struct rhashtable *ht;
-
-		lru = list_lru_count(&nfsd_file_lru);
-
-		rcu_read_lock();
-		ht = &nfsd_file_rhltable.ht;
-		count = atomic_read(&ht->nelems);
-		tbl = rht_dereference_rcu(ht->tbl, ht);
-		buckets = tbl->size;
-		rcu_read_unlock();
+	if (nfsd_file_hashtbl) {
+		for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+			count += nfsd_file_hashtbl[i].nfb_count;
+			longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+		}
 	}
 	mutex_unlock(&nfsd_mutex);
 
-	for_each_possible_cpu(i) {
+	for_each_possible_cpu(i)
 		hits += per_cpu(nfsd_file_cache_hits, i);
-		acquisitions += per_cpu(nfsd_file_acquisitions, i);
-		releases += per_cpu(nfsd_file_releases, i);
-		total_age += per_cpu(nfsd_file_total_age, i);
-		evictions += per_cpu(nfsd_file_evictions, i);
-	}
 
-	seq_printf(m, "total inodes:  %u\n", count);
-	seq_printf(m, "hash buckets:  %u\n", buckets);
-	seq_printf(m, "lru entries:   %lu\n", lru);
+	seq_printf(m, "total entries: %u\n", count);
+	seq_printf(m, "longest chain: %u\n", longest);
 	seq_printf(m, "cache hits:    %lu\n", hits);
-	seq_printf(m, "acquisitions:  %lu\n", acquisitions);
-	seq_printf(m, "releases:      %lu\n", releases);
-	seq_printf(m, "evictions:     %lu\n", evictions);
-	if (releases)
-		seq_printf(m, "mean age (ms): %ld\n", total_age / releases);
-	else
-		seq_printf(m, "mean age (ms): -\n");
 	return 0;
 }
+
+int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, nfsd_file_cache_stats_show, NULL);
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index e54165a3224f..435ceab27897 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -29,23 +29,23 @@ struct nfsd_file_mark {
  * never be dereferenced, only used for comparison.
  */
 struct nfsd_file {
-	struct rhlist_head	nf_rlist;
-	void			*nf_inode;
+	struct hlist_node	nf_node;
+	struct list_head	nf_lru;
+	struct rcu_head		nf_rcu;
 	struct file		*nf_file;
 	const struct cred	*nf_cred;
 	struct net		*nf_net;
 #define NFSD_FILE_HASHED	(0)
 #define NFSD_FILE_PENDING	(1)
-#define NFSD_FILE_REFERENCED	(2)
-#define NFSD_FILE_GC		(3)
+#define NFSD_FILE_BREAK_READ	(2)
+#define NFSD_FILE_BREAK_WRITE	(3)
+#define NFSD_FILE_REFERENCED	(4)
 	unsigned long		nf_flags;
+	struct inode		*nf_inode;
+	unsigned int		nf_hashval;
 	refcount_t		nf_ref;
 	unsigned char		nf_may;
-
 	struct nfsd_file_mark	*nf_mark;
-	struct list_head	nf_lru;
-	struct rcu_head		nf_rcu;
-	ktime_t			nf_birthtime;
 };
 
 int nfsd_file_cache_init(void);
@@ -57,12 +57,7 @@ void nfsd_file_put(struct nfsd_file *nf);
 struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
 void nfsd_file_close_inode_sync(struct inode *inode);
 bool nfsd_file_is_cached(struct inode *inode);
-__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		  unsigned int may_flags, struct nfsd_file **nfp);
 __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		  unsigned int may_flags, struct nfsd_file **nfp);
-__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		  unsigned int may_flags, struct file *file,
-		  struct nfsd_file **nfp);
-int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
+int	nfsd_file_cache_stats_open(struct inode *, struct file *);
 #endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index fabc21ed68ce..db7ef07ae50c 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -15,7 +15,6 @@
 
 #include "flexfilelayoutxdr.h"
 #include "pnfs.h"
-#include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_PNFS
 
@@ -62,7 +61,7 @@ nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
 		goto out_error;
 
 	fl->fh.size = fhp->fh_handle.fh_size;
-	memcpy(fl->fh.data, &fhp->fh_handle.fh_raw, fl->fh.size);
+	memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size);
 
 	/* Give whole file layout segments */
 	seg->offset = 0;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 46a7f9b813e5..3f5b3d7b62b7 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -25,22 +25,18 @@
  * Note: we hold the dentry use count while the file is open.
  */
 static __be32
-nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
-		int mode)
+nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 {
 	__be32		nfserr;
-	int		access;
 	struct svc_fh	fh;
 
 	/* must initialize before using! but maxsize doesn't matter */
 	fh_init(&fh,0);
 	fh.fh_handle.fh_size = f->size;
-	memcpy(&fh.fh_handle.fh_raw, f->data, f->size);
+	memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
 	fh.fh_export = NULL;
 
-	access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
-	access |= NFSD_MAY_LOCK;
-	nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
+	nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
 	fh_put(&fh);
  	/* We return nlm error codes as nlm doesn't know
 	 * about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 51a4b7885cae..02d3d2f0e616 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -10,8 +10,6 @@
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
-#include <linux/percpu_counter.h>
-#include <linux/siphash.h>
 
 /* Hash tables for nfs4_clientid state */
 #define CLIENT_HASH_BITS                 4
@@ -23,14 +21,6 @@
 struct cld_net;
 struct nfsd4_client_tracking_ops;
 
-enum {
-	/* cache misses due only to checksum comparison failures */
-	NFSD_NET_PAYLOAD_MISSES,
-	/* amount of memory (in bytes) currently consumed by the DRC */
-	NFSD_NET_DRC_MEM_USAGE,
-	NFSD_NET_COUNTERS_NUM
-};
-
 /*
  * Represents a nfsd "container". With respect to nfsv4 state tracking, the
  * fields of interest are the *_id_hashtbls and the *_name_tree. These track
@@ -109,8 +99,9 @@ struct nfsd_net {
 	bool nfsd_net_up;
 	bool lockd_up;
 
-	seqlock_t writeverf_lock;
-	unsigned char writeverf[8];
+	/* Time of server startup */
+	struct timespec64 nfssvc_boot;
+	seqlock_t boot_lock;
 
 	/*
 	 * Max number of connections this nfsd container will allow. Defaults
@@ -123,13 +114,12 @@ struct nfsd_net {
 	u32 clverifier_counter;
 
 	struct svc_serv *nfsd_serv;
-	/* When a listening socket is added to nfsd, keep_active is set
-	 * and this justifies a reference on nfsd_serv.  This stops
-	 * nfsd_serv from being freed.  When the number of threads is
-	 * set, keep_active is cleared and the reference is dropped.  So
-	 * when the last thread exits, the service will be destroyed.
-	 */
-	int keep_active;
+
+	wait_queue_head_t ntf_wq;
+	atomic_t ntf_refcnt;
+
+	/* Allow umount to wait for nfsd state cleanup */
+	struct completion nfsd_shutdown_complete;
 
 	/*
 	 * clientid and stateid data for construction of net unique COPY
@@ -159,16 +149,20 @@ struct nfsd_net {
 
 	/*
 	 * Stats and other tracking of on the duplicate reply cache.
-	 * The longest_chain* fields are modified with only the per-bucket
-	 * cache lock, which isn't really safe and should be fixed if we want
-	 * these statistics to be completely accurate.
+	 * These fields and the "rc" fields in nfsdstats are modified
+	 * with only the per-bucket cache lock, which isn't really safe
+	 * and should be fixed if we want the statistics to be
+	 * completely accurate.
 	 */
 
 	/* total number of entries */
 	atomic_t                 num_drc_entries;
 
-	/* Per-netns stats counters */
-	struct percpu_counter    counter[NFSD_NET_COUNTERS_NUM];
+	/* cache misses due only to checksum comparison failures */
+	unsigned int             payload_misses;
+
+	/* amount of memory (in bytes) currently consumed by the DRC */
+	unsigned int             drc_mem_usage;
 
 	/* longest hash chain seen */
 	unsigned int             longest_chain;
@@ -177,25 +171,8 @@ struct nfsd_net {
 	unsigned int             longest_chain_cachesize;
 
 	struct shrinker		nfsd_reply_cache_shrinker;
-
-	/* tracking server-to-server copy mounts */
-	spinlock_t              nfsd_ssc_lock;
-	struct list_head        nfsd_ssc_mount_list;
-	wait_queue_head_t       nfsd_ssc_waitq;
-
 	/* utsname taken from the process that starts the server */
 	char			nfsd_name[UNX_MAXNODENAME+1];
-
-	struct nfsd_fcache_disposal *fcache_disposal;
-
-	siphash_key_t		siphash_key;
-
-	atomic_t		nfs4_client_count;
-	int			nfs4_max_clients;
-
-	atomic_t		nfsd_courtesy_clients;
-	struct shrinker		nfsd_client_shrinker;
-	struct work_struct	nfsd_shrinker_work;
 };
 
 /* Simple check to find out if a given net was properly initialized */
@@ -205,6 +182,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn);
 
 extern unsigned int nfsd_net_id;
 
-void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
-void nfsd_reset_write_verifier(struct nfsd_net *nn);
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_boot_verifier(struct nfsd_net *nn);
 #endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 9adf672dedbd..6a900f770dd2 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -111,7 +111,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
 	if (error)
 		goto out_errno;
 
-	inode_lock(inode);
+	fh_lock(fh);
 
 	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
 	if (error)
@@ -120,7 +120,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
 	if (error)
 		goto out_drop_lock;
 
-	inode_unlock(inode);
+	fh_unlock(fh);
 
 	fh_drop_write(fh);
 
@@ -134,7 +134,7 @@ out:
 	return rpc_success;
 
 out_drop_lock:
-	inode_unlock(inode);
+	fh_unlock(fh);
 	fh_drop_write(fh);
 out_errno:
 	resp->status = nfserrno(error);
@@ -185,106 +185,161 @@ out:
 /*
  * XDR decode functions
  */
+static int nfsaclsvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
+{
+	return 1;
+}
 
-static bool
-nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
 
-	if (!svcxdr_decode_fhandle(xdr, &argp->fh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
-		return false;
+	p = nfs2svc_decode_fh(p, &argp->fh);
+	if (!p)
+		return 0;
+	argp->mask = ntohl(*p); p++;
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-static bool
-nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct kvec *head = rqstp->rq_arg.head;
+	unsigned int base;
+	int n;
 
-	if (!svcxdr_decode_fhandle(xdr, &argp->fh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
-		return false;
-	if (argp->mask & ~NFS_ACL_MASK)
-		return false;
-	if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ?
-				   &argp->acl_access : NULL))
-		return false;
-	if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ?
-				   &argp->acl_default : NULL))
-		return false;
+	p = nfs2svc_decode_fh(p, &argp->fh);
+	if (!p)
+		return 0;
+	argp->mask = ntohl(*p++);
+	if (argp->mask & ~NFS_ACL_MASK ||
+	    !xdr_argsize_check(rqstp, p))
+		return 0;
 
-	return true;
+	base = (char *)p - (char *)head->iov_base;
+	n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+			  (argp->mask & NFS_ACL) ?
+			  &argp->acl_access : NULL);
+	if (n > 0)
+		n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+				  (argp->mask & NFS_DFACL) ?
+				  &argp->acl_default : NULL);
+	return (n > 0);
 }
 
-static bool
-nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
 {
-	struct nfsd3_accessargs *args = rqstp->rq_argp;
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
 
-	if (!svcxdr_decode_fhandle(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->access) < 0)
-		return false;
+	p = nfs2svc_decode_fh(p, &argp->fh);
+	if (!p)
+		return 0;
+	return xdr_argsize_check(rqstp, p);
+}
 
-	return true;
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+
+	p = nfs2svc_decode_fh(p, &argp->fh);
+	if (!p)
+		return 0;
+	argp->access = ntohl(*p++);
+
+	return xdr_argsize_check(rqstp, p);
 }
 
 /*
  * XDR encode functions
  */
 
+/*
+ * There must be an encoding function for void results so svc_process
+ * will work properly.
+ */
+static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
+{
+	return xdr_ressize_check(rqstp, p);
+}
+
 /* GETACL */
-static bool
-nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct inode *inode;
+	struct kvec *head = rqstp->rq_res.head;
+	unsigned int base;
+	int n;
+	int w;
 
-	if (!svcxdr_encode_stat(xdr, resp->status))
-		return false;
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		return xdr_ressize_check(rqstp, p);
 
+	/*
+	 * Since this is version 2, the check for nfserr in
+	 * nfsd_dispatch actually ensures the following cannot happen.
+	 * However, it seems fragile to depend on that.
+	 */
 	if (dentry == NULL || d_really_is_negative(dentry))
-		return true;
+		return 0;
 	inode = d_inode(dentry);
 
-	if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-		return false;
-	if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
-		return false;
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
+	*p++ = htonl(resp->mask);
+	if (!xdr_ressize_check(rqstp, p))
+		return 0;
+	base = (char *)p - (char *)head->iov_base;
 
-	if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
-				   resp->mask & NFS_ACL, 0))
-		return false;
-	if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
-				   resp->mask & NFS_DFACL, NFS_ACL_DEFAULT))
-		return false;
+	rqstp->rq_res.page_len = w = nfsacl_size(
+		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+	while (w > 0) {
+		if (!*(rqstp->rq_next_page++))
+			return 0;
+		w -= PAGE_SIZE;
+	}
 
-	return true;
+	n = nfsacl_encode(&rqstp->rq_res, base, inode,
+			  resp->acl_access,
+			  resp->mask & NFS_ACL, 0);
+	if (n > 0)
+		n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+				  resp->acl_default,
+				  resp->mask & NFS_DFACL,
+				  NFS_ACL_DEFAULT);
+	return (n > 0);
+}
+
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		goto out;
+
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
+out:
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* ACCESS */
-static bool
-nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_accessres *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_stat(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->access) < 0)
-			return false;
-		break;
-	}
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		goto out;
 
-	return true;
+	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
+	*p++ = htonl(resp->access);
+out:
+	return xdr_ressize_check(rqstp, p);
 }
 
 /*
@@ -299,6 +354,13 @@ static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp)
 	posix_acl_release(resp->acl_default);
 }
 
+static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp)
+{
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
+	fh_put(&resp->fh);
+}
+
 static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
 {
 	struct nfsd3_accessres *resp = rqstp->rq_resp;
@@ -316,14 +378,12 @@ struct nfsd3_voidargs { int dummy; };
 static const struct svc_procedure nfsd_acl_procedures2[5] = {
 	[ACLPROC2_NULL] = {
 		.pc_func = nfsacld_proc_null,
-		.pc_decode = nfssvc_decode_voidarg,
-		.pc_encode = nfssvc_encode_voidres,
-		.pc_argsize = sizeof(struct nfsd_voidargs),
-		.pc_argzero = sizeof(struct nfsd_voidargs),
-		.pc_ressize = sizeof(struct nfsd_voidres),
+		.pc_decode = nfsaclsvc_decode_voidarg,
+		.pc_encode = nfsaclsvc_encode_voidres,
+		.pc_argsize = sizeof(struct nfsd3_voidargs),
+		.pc_ressize = sizeof(struct nfsd3_voidargs),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST,
-		.pc_name = "NULL",
 	},
 	[ACLPROC2_GETACL] = {
 		.pc_func = nfsacld_proc_getacl,
@@ -331,35 +391,29 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
 		.pc_encode = nfsaclsvc_encode_getaclres,
 		.pc_release = nfsaclsvc_release_getacl,
 		.pc_argsize = sizeof(struct nfsd3_getaclargs),
-		.pc_argzero = sizeof(struct nfsd3_getaclargs),
 		.pc_ressize = sizeof(struct nfsd3_getaclres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+1+2*(1+ACL),
-		.pc_name = "GETACL",
 	},
 	[ACLPROC2_SETACL] = {
 		.pc_func = nfsacld_proc_setacl,
 		.pc_decode = nfsaclsvc_decode_setaclargs,
-		.pc_encode = nfssvc_encode_attrstatres,
-		.pc_release = nfssvc_release_attrstat,
+		.pc_encode = nfsaclsvc_encode_attrstatres,
+		.pc_release = nfsaclsvc_release_attrstat,
 		.pc_argsize = sizeof(struct nfsd3_setaclargs),
-		.pc_argzero = sizeof(struct nfsd3_setaclargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT,
-		.pc_name = "SETACL",
 	},
 	[ACLPROC2_GETATTR] = {
 		.pc_func = nfsacld_proc_getattr,
-		.pc_decode = nfssvc_decode_fhandleargs,
-		.pc_encode = nfssvc_encode_attrstatres,
-		.pc_release = nfssvc_release_attrstat,
+		.pc_decode = nfsaclsvc_decode_fhandleargs,
+		.pc_encode = nfsaclsvc_encode_attrstatres,
+		.pc_release = nfsaclsvc_release_attrstat,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
-		.pc_argzero = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT,
-		.pc_name = "GETATTR",
 	},
 	[ACLPROC2_ACCESS] = {
 		.pc_func = nfsacld_proc_access,
@@ -367,11 +421,9 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
 		.pc_encode = nfsaclsvc_encode_accessres,
 		.pc_release = nfsaclsvc_release_access,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
-		.pc_argzero = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT+1,
-		.pc_name = "SETATTR",
 	},
 };
 
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 161f831b3a1b..34a394e50e1d 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -101,7 +101,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 	if (error)
 		goto out_errno;
 
-	inode_lock(inode);
+	fh_lock(fh);
 
 	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
 	if (error)
@@ -109,7 +109,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
 
 out_drop_lock:
-	inode_unlock(inode);
+	fh_unlock(fh);
 	fh_drop_write(fh);
 out_errno:
 	resp->status = nfserrno(error);
@@ -124,39 +124,43 @@ out:
 /*
  * XDR decode functions
  */
-
-static bool
-nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_getaclargs *args = rqstp->rq_argp;
 
-	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->mask) < 0)
-		return false;
+	p = nfs3svc_decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	args->mask = ntohl(*p); p++;
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-static bool
-nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
-	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_setaclargs *args = rqstp->rq_argp;
+	struct kvec *head = rqstp->rq_arg.head;
+	unsigned int base;
+	int n;
 
-	if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
-		return false;
-	if (argp->mask & ~NFS_ACL_MASK)
-		return false;
-	if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ?
-				   &argp->acl_access : NULL))
-		return false;
-	if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ?
-				   &argp->acl_default : NULL))
-		return false;
+	p = nfs3svc_decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	args->mask = ntohl(*p++);
+	if (args->mask & ~NFS_ACL_MASK ||
+	    !xdr_argsize_check(rqstp, p))
+		return 0;
 
-	return true;
+	base = (char *)p - (char *)head->iov_base;
+	n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
+			  (args->mask & NFS_ACL) ?
+			  &args->acl_access : NULL);
+	if (n > 0)
+		n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
+				  (args->mask & NFS_DFACL) ?
+				  &args->acl_default : NULL);
+	return (n > 0);
 }
 
 /*
@@ -164,47 +168,59 @@ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 /* GETACL */
-static bool
-nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
-	struct inode *inode;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		inode = d_inode(dentry);
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
-			return false;
+	*p++ = resp->status;
+	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0 && dentry && d_really_is_positive(dentry)) {
+		struct inode *inode = d_inode(dentry);
+		struct kvec *head = rqstp->rq_res.head;
+		unsigned int base;
+		int n;
+		int w;
 
-		if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
-					   resp->mask & NFS_ACL, 0))
-			return false;
-		if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
-					   resp->mask & NFS_DFACL,
-					   NFS_ACL_DEFAULT))
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-	}
+		*p++ = htonl(resp->mask);
+		if (!xdr_ressize_check(rqstp, p))
+			return 0;
+		base = (char *)p - (char *)head->iov_base;
 
-	return true;
+		rqstp->rq_res.page_len = w = nfsacl_size(
+			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
+			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+		while (w > 0) {
+			if (!*(rqstp->rq_next_page++))
+				return 0;
+			w -= PAGE_SIZE;
+		}
+
+		n = nfsacl_encode(&rqstp->rq_res, base, inode,
+				  resp->acl_access,
+				  resp->mask & NFS_ACL, 0);
+		if (n > 0)
+			n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+					  resp->acl_default,
+					  resp->mask & NFS_DFACL,
+					  NFS_ACL_DEFAULT);
+		if (n <= 0)
+			return 0;
+	} else
+		if (!xdr_ressize_check(rqstp, p))
+			return 0;
+
+	return 1;
 }
 
 /* SETACL */
-static bool
-nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 
-	return svcxdr_encode_nfsstat3(xdr, resp->status) &&
-		svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh);
+	*p++ = resp->status;
+	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
 }
 
 /*
@@ -229,14 +245,12 @@ struct nfsd3_voidargs { int dummy; };
 static const struct svc_procedure nfsd_acl_procedures3[3] = {
 	[ACLPROC3_NULL] = {
 		.pc_func = nfsd3_proc_null,
-		.pc_decode = nfssvc_decode_voidarg,
-		.pc_encode = nfssvc_encode_voidres,
-		.pc_argsize = sizeof(struct nfsd_voidargs),
-		.pc_argzero = sizeof(struct nfsd_voidargs),
-		.pc_ressize = sizeof(struct nfsd_voidres),
+		.pc_decode = nfs3svc_decode_voidarg,
+		.pc_encode = nfs3svc_encode_voidres,
+		.pc_argsize = sizeof(struct nfsd3_voidargs),
+		.pc_ressize = sizeof(struct nfsd3_voidargs),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST,
-		.pc_name = "NULL",
 	},
 	[ACLPROC3_GETACL] = {
 		.pc_func = nfsd3_proc_getacl,
@@ -244,11 +258,9 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
 		.pc_encode = nfs3svc_encode_getaclres,
 		.pc_release = nfs3svc_release_getacl,
 		.pc_argsize = sizeof(struct nfsd3_getaclargs),
-		.pc_argzero = sizeof(struct nfsd3_getaclargs),
 		.pc_ressize = sizeof(struct nfsd3_getaclres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+1+2*(1+ACL),
-		.pc_name = "GETACL",
 	},
 	[ACLPROC3_SETACL] = {
 		.pc_func = nfsd3_proc_setacl,
@@ -256,11 +268,9 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
 		.pc_encode = nfs3svc_encode_setaclres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_setaclargs),
-		.pc_argzero = sizeof(struct nfsd3_setaclargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstat),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT,
-		.pc_name = "SETACL",
 	},
 };
 
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 19cf583096d9..981a4e4c9a3c 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -8,12 +8,10 @@
 #include <linux/fs.h>
 #include <linux/ext2_fs.h>
 #include <linux/magic.h>
-#include <linux/namei.h>
 
 #include "cache.h"
 #include "xdr3.h"
 #include "vfs.h"
-#include "filecache.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
@@ -68,15 +66,12 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp)
 {
 	struct nfsd3_sattrargs *argp = rqstp->rq_argp;
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &argp->attrs,
-	};
 
 	dprintk("nfsd: SETATTR(3)  %s\n",
 				SVCFH_fmt(&argp->fh));
 
 	fh_copy(&resp->fh, &argp->fh);
-	resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs,
+	resp->status = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,
 				    argp->check_guard, argp->guardtime);
 	return rpc_success;
 }
@@ -129,7 +124,7 @@ nfsd3_proc_access(struct svc_rqst *rqstp)
 static __be32
 nfsd3_proc_readlink(struct svc_rqst *rqstp)
 {
-	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_readlinkargs *argp = rqstp->rq_argp;
 	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
 
 	dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
@@ -137,9 +132,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp)
 	/* Read the symlink. */
 	fh_copy(&resp->fh, &argp->fh);
 	resp->len = NFS3_MAXPATHLEN;
-	resp->pages = rqstp->rq_next_page++;
-	resp->status = nfsd_readlink(rqstp, &resp->fh,
-				     page_address(*resp->pages), &resp->len);
+	resp->status = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len);
 	return rpc_success;
 }
 
@@ -151,43 +144,25 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
 {
 	struct nfsd3_readargs *argp = rqstp->rq_argp;
 	struct nfsd3_readres *resp = rqstp->rq_resp;
-	unsigned int len;
-	int v;
+	u32	max_blocksize = svc_max_payload(rqstp);
+	unsigned long cnt = min(argp->count, max_blocksize);
 
 	dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
 				SVCFH_fmt(&argp->fh),
 				(unsigned long) argp->count,
 				(unsigned long long) argp->offset);
 
-	argp->count = min_t(u32, argp->count, svc_max_payload(rqstp));
-	argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
-	if (argp->offset > (u64)OFFSET_MAX)
-		argp->offset = (u64)OFFSET_MAX;
-	if (argp->offset + argp->count > (u64)OFFSET_MAX)
-		argp->count = (u64)OFFSET_MAX - argp->offset;
-
-	v = 0;
-	len = argp->count;
-	resp->pages = rqstp->rq_next_page;
-	while (len > 0) {
-		struct page *page = *(rqstp->rq_next_page++);
-
-		rqstp->rq_vec[v].iov_base = page_address(page);
-		rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
-		len -= rqstp->rq_vec[v].iov_len;
-		v++;
-	}
-
 	/* Obtain buffer pointer for payload.
 	 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
 	 * + 1 (xdr opaque byte count) = 26
 	 */
-	resp->count = argp->count;
+	resp->count = cnt;
 	svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
 
 	fh_copy(&resp->fh, &argp->fh);
 	resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
-				 rqstp->rq_vec, v, &resp->count, &resp->eof);
+				 rqstp->rq_vec, argp->vlen, &resp->count,
+				 &resp->eof);
 	return rpc_success;
 }
 
@@ -215,147 +190,32 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
 
 	fh_copy(&resp->fh, &argp->fh);
 	resp->committed = argp->stable;
-	nvecs = svc_fill_write_vector(rqstp, &argp->payload);
-
+	nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+				      &argp->first, cnt);
+	if (!nvecs) {
+		resp->status = nfserr_io;
+		goto out;
+	}
 	resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
 				  rqstp->rq_vec, nvecs, &cnt,
 				  resp->committed, resp->verf);
 	resp->count = cnt;
+out:
 	return rpc_success;
 }
 
 /*
- * Implement NFSv3's unchecked, guarded, and exclusive CREATE
- * semantics for regular files. Except for the created file,
- * this operation is stateless on the server.
- *
- * Upon return, caller must release @fhp and @resfhp.
+ * With NFSv3, CREATE processing is a lot easier than with NFSv2.
+ * At least in theory; we'll see how it fares in practice when the
+ * first reports about SunOS compatibility problems start to pour in...
  */
-static __be32
-nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		  struct svc_fh *resfhp, struct nfsd3_createargs *argp)
-{
-	struct iattr *iap = &argp->attrs;
-	struct dentry *parent, *child;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= iap,
-	};
-	__u32 v_mtime, v_atime;
-	struct inode *inode;
-	__be32 status;
-	int host_err;
-
-	if (isdotent(argp->name, argp->len))
-		return nfserr_exist;
-	if (!(iap->ia_valid & ATTR_MODE))
-		iap->ia_mode = 0;
-
-	status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
-	if (status != nfs_ok)
-		return status;
-
-	parent = fhp->fh_dentry;
-	inode = d_inode(parent);
-
-	host_err = fh_want_write(fhp);
-	if (host_err)
-		return nfserrno(host_err);
-
-	inode_lock_nested(inode, I_MUTEX_PARENT);
-
-	child = lookup_one_len(argp->name, parent, argp->len);
-	if (IS_ERR(child)) {
-		status = nfserrno(PTR_ERR(child));
-		goto out;
-	}
-
-	if (d_really_is_negative(child)) {
-		status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
-		if (status != nfs_ok)
-			goto out;
-	}
-
-	status = fh_compose(resfhp, fhp->fh_export, child, fhp);
-	if (status != nfs_ok)
-		goto out;
-
-	v_mtime = 0;
-	v_atime = 0;
-	if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
-		u32 *verifier = (u32 *)argp->verf;
-
-		/*
-		 * Solaris 7 gets confused (bugid 4218508) if these have
-		 * the high bit set, as do xfs filesystems without the
-		 * "bigtime" feature. So just clear the high bits.
-		 */
-		v_mtime = verifier[0] & 0x7fffffff;
-		v_atime = verifier[1] & 0x7fffffff;
-	}
-
-	if (d_really_is_positive(child)) {
-		status = nfs_ok;
-
-		switch (argp->createmode) {
-		case NFS3_CREATE_UNCHECKED:
-			if (!d_is_reg(child))
-				break;
-			iap->ia_valid &= ATTR_SIZE;
-			goto set_attr;
-		case NFS3_CREATE_GUARDED:
-			status = nfserr_exist;
-			break;
-		case NFS3_CREATE_EXCLUSIVE:
-			if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
-			    d_inode(child)->i_atime.tv_sec == v_atime &&
-			    d_inode(child)->i_size == 0) {
-				break;
-			}
-			status = nfserr_exist;
-		}
-		goto out;
-	}
-
-	if (!IS_POSIXACL(inode))
-		iap->ia_mode &= ~current_umask();
-
-	fh_fill_pre_attrs(fhp);
-	host_err = vfs_create(inode, child, iap->ia_mode, true);
-	if (host_err < 0) {
-		status = nfserrno(host_err);
-		goto out;
-	}
-	fh_fill_post_attrs(fhp);
-
-	/* A newly created file already has a file size of zero. */
-	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
-		iap->ia_valid &= ~ATTR_SIZE;
-	if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
-		iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
-				ATTR_MTIME_SET | ATTR_ATIME_SET;
-		iap->ia_mtime.tv_sec = v_mtime;
-		iap->ia_atime.tv_sec = v_atime;
-		iap->ia_mtime.tv_nsec = 0;
-		iap->ia_atime.tv_nsec = 0;
-	}
-
-set_attr:
-	status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
-
-out:
-	inode_unlock(inode);
-	if (child && !IS_ERR(child))
-		dput(child);
-	fh_drop_write(fhp);
-	return status;
-}
-
 static __be32
 nfsd3_proc_create(struct svc_rqst *rqstp)
 {
 	struct nfsd3_createargs *argp = rqstp->rq_argp;
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
-	svc_fh *dirfhp, *newfhp;
+	svc_fh		*dirfhp, *newfhp = NULL;
+	struct iattr	*attr;
 
 	dprintk("nfsd: CREATE(3)   %s %.*s\n",
 				SVCFH_fmt(&argp->fh),
@@ -364,8 +224,21 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
 
 	dirfhp = fh_copy(&resp->dirfh, &argp->fh);
 	newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
+	attr   = &argp->attrs;
 
-	resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
+	/* Unfudge the mode bits */
+	attr->ia_mode &= ~S_IFMT;
+	if (!(attr->ia_valid & ATTR_MODE)) { 
+		attr->ia_valid |= ATTR_MODE;
+		attr->ia_mode = S_IFREG;
+	} else {
+		attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
+	}
+
+	/* Now create the file and set attributes */
+	resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
+				      attr, newfhp, argp->createmode,
+				      (u32 *)argp->verf, NULL, NULL);
 	return rpc_success;
 }
 
@@ -377,9 +250,6 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp)
 {
 	struct nfsd3_createargs *argp = rqstp->rq_argp;
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &argp->attrs,
-	};
 
 	dprintk("nfsd: MKDIR(3)    %s %.*s\n",
 				SVCFH_fmt(&argp->fh),
@@ -390,7 +260,8 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp)
 	fh_copy(&resp->dirfh, &argp->fh);
 	fh_init(&resp->fh, NFS3_FHSIZE);
 	resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
-				   &attrs, S_IFDIR, 0, &resp->fh);
+				   &argp->attrs, S_IFDIR, 0, &resp->fh);
+	fh_unlock(&resp->dirfh);
 	return rpc_success;
 }
 
@@ -399,9 +270,6 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
 {
 	struct nfsd3_symlinkargs *argp = rqstp->rq_argp;
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &argp->attrs,
-	};
 
 	if (argp->tlen == 0) {
 		resp->status = nfserr_inval;
@@ -428,7 +296,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
 	fh_copy(&resp->dirfh, &argp->ffh);
 	fh_init(&resp->fh, NFS3_FHSIZE);
 	resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname,
-				    argp->flen, argp->tname, &attrs, &resp->fh);
+				    argp->flen, argp->tname, &resp->fh);
 	kfree(argp->tname);
 out:
 	return rpc_success;
@@ -442,9 +310,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
 {
 	struct nfsd3_mknodargs *argp = rqstp->rq_argp;
 	struct nfsd3_diropres  *resp = rqstp->rq_resp;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &argp->attrs,
-	};
 	int type;
 	dev_t	rdev = 0;
 
@@ -470,7 +335,8 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
 
 	type = nfs3_ftypes[argp->ftype];
 	resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
-				   &attrs, type, rdev, &resp->fh);
+				   &argp->attrs, type, rdev, &resp->fh);
+	fh_unlock(&resp->dirfh);
 out:
 	return rpc_success;
 }
@@ -493,6 +359,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp)
 	fh_copy(&resp->fh, &argp->fh);
 	resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR,
 				   argp->name, argp->len);
+	fh_unlock(&resp->fh);
 	return rpc_success;
 }
 
@@ -513,6 +380,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp)
 	fh_copy(&resp->fh, &argp->fh);
 	resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR,
 				   argp->name, argp->len);
+	fh_unlock(&resp->fh);
 	return rpc_success;
 }
 
@@ -558,26 +426,6 @@ nfsd3_proc_link(struct svc_rqst *rqstp)
 	return rpc_success;
 }
 
-static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
-				     struct nfsd3_readdirres *resp,
-				     u32 count)
-{
-	struct xdr_buf *buf = &resp->dirlist;
-	struct xdr_stream *xdr = &resp->xdr;
-	unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen,
-				     svc_max_payload(rqstp));
-
-	memset(buf, 0, sizeof(*buf));
-
-	/* Reserve room for the NULL ptr & eof flag (-2 words) */
-	buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf);
-	buf->buflen -= XDR_UNIT * 2;
-	buf->pages = rqstp->rq_next_page;
-	rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-	xdr_init_encode_pages(xdr, buf, buf->pages,  NULL);
-}
-
 /*
  * Read a portion of a directory.
  */
@@ -586,26 +434,53 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
 {
 	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
 	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
-	loff_t		offset;
+	int		count = 0;
+	struct page	**p;
+	caddr_t		page_addr = NULL;
 
 	dprintk("nfsd: READDIR(3)  %s %d bytes at %d\n",
 				SVCFH_fmt(&argp->fh),
 				argp->count, (u32) argp->cookie);
 
-	nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
+	/* Make sure we've room for the NULL ptr & eof flag, and shrink to
+	 * client read size */
+	count = (argp->count >> 2) - 2;
 
+	/* Read directory and encode entries on the fly */
 	fh_copy(&resp->fh, &argp->fh);
-	resp->common.err = nfs_ok;
-	resp->cookie_offset = 0;
-	resp->rqstp = rqstp;
-	offset = argp->cookie;
-	resp->status = nfsd_readdir(rqstp, &resp->fh, &offset,
-				    &resp->common, nfs3svc_encode_entry3);
-	memcpy(resp->verf, argp->verf, 8);
-	nfs3svc_encode_cookie3(resp, offset);
 
-	/* Recycle only pages that were part of the reply */
-	rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+	resp->buflen = count;
+	resp->common.err = nfs_ok;
+	resp->buffer = argp->buffer;
+	resp->rqstp = rqstp;
+	resp->status = nfsd_readdir(rqstp, &resp->fh, (loff_t *)&argp->cookie,
+				    &resp->common, nfs3svc_encode_entry);
+	memcpy(resp->verf, argp->verf, 8);
+	count = 0;
+	for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+		page_addr = page_address(*p);
+
+		if (((caddr_t)resp->buffer >= page_addr) &&
+		    ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+			count += (caddr_t)resp->buffer - page_addr;
+			break;
+		}
+		count += PAGE_SIZE;
+	}
+	resp->count = count >> 2;
+	if (resp->offset) {
+		loff_t offset = argp->cookie;
+
+		if (unlikely(resp->offset1)) {
+			/* we ended up with offset on a page boundary */
+			*resp->offset = htonl(offset >> 32);
+			*resp->offset1 = htonl(offset & 0xffffffff);
+			resp->offset1 = NULL;
+		} else {
+			xdr_encode_hyper(resp->offset, offset);
+		}
+		resp->offset = NULL;
+	}
 
 	return rpc_success;
 }
@@ -619,17 +494,25 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
 {
 	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
 	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
+	int	count = 0;
 	loff_t	offset;
+	struct page **p;
+	caddr_t	page_addr = NULL;
 
 	dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
 				SVCFH_fmt(&argp->fh),
 				argp->count, (u32) argp->cookie);
 
-	nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
+	/* Convert byte count to number of words (i.e. >> 2),
+	 * and reserve room for the NULL ptr & eof flag (-2 words) */
+	resp->count = (argp->count >> 2) - 2;
 
+	/* Read directory and encode entries on the fly */
 	fh_copy(&resp->fh, &argp->fh);
+
 	resp->common.err = nfs_ok;
-	resp->cookie_offset = 0;
+	resp->buffer = argp->buffer;
+	resp->buflen = resp->count;
 	resp->rqstp = rqstp;
 	offset = argp->cookie;
 
@@ -643,12 +526,30 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
 	}
 
 	resp->status = nfsd_readdir(rqstp, &resp->fh, &offset,
-				    &resp->common, nfs3svc_encode_entryplus3);
+				    &resp->common, nfs3svc_encode_entry_plus);
 	memcpy(resp->verf, argp->verf, 8);
-	nfs3svc_encode_cookie3(resp, offset);
+	for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+		page_addr = page_address(*p);
 
-	/* Recycle only pages that were part of the reply */
-	rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+		if (((caddr_t)resp->buffer >= page_addr) &&
+		    ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+			count += (caddr_t)resp->buffer - page_addr;
+			break;
+		}
+		count += PAGE_SIZE;
+	}
+	resp->count = count >> 2;
+	if (resp->offset) {
+		if (unlikely(resp->offset1)) {
+			/* we ended up with offset on a page boundary */
+			*resp->offset = htonl(offset >> 32);
+			*resp->offset1 = htonl(offset & 0xffffffff);
+			resp->offset1 = NULL;
+		} else {
+			xdr_encode_hyper(resp->offset, offset);
+		}
+		resp->offset = NULL;
+	}
 
 out:
 	return rpc_success;
@@ -764,21 +665,20 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
 {
 	struct nfsd3_commitargs *argp = rqstp->rq_argp;
 	struct nfsd3_commitres *resp = rqstp->rq_resp;
-	struct nfsd_file *nf;
 
 	dprintk("nfsd: COMMIT(3)   %s %u@%Lu\n",
 				SVCFH_fmt(&argp->fh),
 				argp->count,
 				(unsigned long long) argp->offset);
 
-	fh_copy(&resp->fh, &argp->fh);
-	resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE |
-					    NFSD_MAY_NOT_BREAK_LEASE, &nf);
-	if (resp->status)
+	if (argp->offset > NFS_OFFSET_MAX) {
+		resp->status = nfserr_inval;
 		goto out;
-	resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset,
+	}
+
+	fh_copy(&resp->fh, &argp->fh);
+	resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
 				   argp->count, resp->verf);
-	nfsd_file_put(nf);
 out:
 	return rpc_success;
 }
@@ -788,14 +688,18 @@ out:
  * NFSv3 Server procedures.
  * Only the results of non-idempotent operations are cached.
  */
+#define nfs3svc_decode_fhandleargs	nfs3svc_decode_fhandle
 #define nfs3svc_encode_attrstatres	nfs3svc_encode_attrstat
 #define nfs3svc_encode_wccstatres	nfs3svc_encode_wccstat
 #define nfsd3_mkdirargs			nfsd3_createargs
 #define nfsd3_readdirplusargs		nfsd3_readdirargs
 #define nfsd3_fhandleargs		nfsd_fhandle
+#define nfsd3_fhandleres		nfsd3_attrstat
 #define nfsd3_attrstatres		nfsd3_attrstat
 #define nfsd3_wccstatres		nfsd3_attrstat
 #define nfsd3_createres			nfsd3_diropres
+#define nfsd3_voidres			nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
 
 #define ST 1		/* status*/
 #define FH 17		/* filehandle with length */
@@ -806,26 +710,22 @@ out:
 static const struct svc_procedure nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
 		.pc_func = nfsd3_proc_null,
-		.pc_decode = nfssvc_decode_voidarg,
-		.pc_encode = nfssvc_encode_voidres,
-		.pc_argsize = sizeof(struct nfsd_voidargs),
-		.pc_argzero = sizeof(struct nfsd_voidargs),
-		.pc_ressize = sizeof(struct nfsd_voidres),
+		.pc_decode = nfs3svc_decode_voidarg,
+		.pc_encode = nfs3svc_encode_voidres,
+		.pc_argsize = sizeof(struct nfsd3_voidargs),
+		.pc_ressize = sizeof(struct nfsd3_voidres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST,
-		.pc_name = "NULL",
 	},
 	[NFS3PROC_GETATTR] = {
 		.pc_func = nfsd3_proc_getattr,
 		.pc_decode = nfs3svc_decode_fhandleargs,
-		.pc_encode = nfs3svc_encode_getattrres,
+		.pc_encode = nfs3svc_encode_attrstatres,
 		.pc_release = nfs3svc_release_fhandle,
-		.pc_argsize = sizeof(struct nfsd_fhandle),
-		.pc_argzero = sizeof(struct nfsd_fhandle),
+		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT,
-		.pc_name = "GETATTR",
 	},
 	[NFS3PROC_SETATTR] = {
 		.pc_func = nfsd3_proc_setattr,
@@ -833,23 +733,19 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
-		.pc_argzero = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC,
-		.pc_name = "SETATTR",
 	},
 	[NFS3PROC_LOOKUP] = {
 		.pc_func = nfsd3_proc_lookup,
 		.pc_decode = nfs3svc_decode_diropargs,
-		.pc_encode = nfs3svc_encode_lookupres,
+		.pc_encode = nfs3svc_encode_diropres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
-		.pc_argzero = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+FH+pAT+pAT,
-		.pc_name = "LOOKUP",
 	},
 	[NFS3PROC_ACCESS] = {
 		.pc_func = nfsd3_proc_access,
@@ -857,23 +753,19 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_accessres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
-		.pc_argzero = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+1,
-		.pc_name = "ACCESS",
 	},
 	[NFS3PROC_READLINK] = {
 		.pc_func = nfsd3_proc_readlink,
-		.pc_decode = nfs3svc_decode_fhandleargs,
+		.pc_decode = nfs3svc_decode_readlinkargs,
 		.pc_encode = nfs3svc_encode_readlinkres,
 		.pc_release = nfs3svc_release_fhandle,
-		.pc_argsize = sizeof(struct nfsd_fhandle),
-		.pc_argzero = sizeof(struct nfsd_fhandle),
+		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
-		.pc_name = "READLINK",
 	},
 	[NFS3PROC_READ] = {
 		.pc_func = nfsd3_proc_read,
@@ -881,11 +773,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_readres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
-		.pc_argzero = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
-		.pc_name = "READ",
 	},
 	[NFS3PROC_WRITE] = {
 		.pc_func = nfsd3_proc_write,
@@ -893,11 +783,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_writeres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
-		.pc_argzero = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC+4,
-		.pc_name = "WRITE",
 	},
 	[NFS3PROC_CREATE] = {
 		.pc_func = nfsd3_proc_create,
@@ -905,11 +793,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
-		.pc_argzero = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
-		.pc_name = "CREATE",
 	},
 	[NFS3PROC_MKDIR] = {
 		.pc_func = nfsd3_proc_mkdir,
@@ -917,11 +803,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
-		.pc_argzero = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
-		.pc_name = "MKDIR",
 	},
 	[NFS3PROC_SYMLINK] = {
 		.pc_func = nfsd3_proc_symlink,
@@ -929,11 +813,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
-		.pc_argzero = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
-		.pc_name = "SYMLINK",
 	},
 	[NFS3PROC_MKNOD] = {
 		.pc_func = nfsd3_proc_mknod,
@@ -941,11 +823,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_createres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
-		.pc_argzero = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
-		.pc_name = "MKNOD",
 	},
 	[NFS3PROC_REMOVE] = {
 		.pc_func = nfsd3_proc_remove,
@@ -953,11 +833,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
-		.pc_argzero = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC,
-		.pc_name = "REMOVE",
 	},
 	[NFS3PROC_RMDIR] = {
 		.pc_func = nfsd3_proc_rmdir,
@@ -965,11 +843,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_wccstatres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
-		.pc_argzero = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC,
-		.pc_name = "RMDIR",
 	},
 	[NFS3PROC_RENAME] = {
 		.pc_func = nfsd3_proc_rename,
@@ -977,11 +853,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_renameres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
-		.pc_argzero = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC+WC,
-		.pc_name = "RENAME",
 	},
 	[NFS3PROC_LINK] = {
 		.pc_func = nfsd3_proc_link,
@@ -989,11 +863,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_linkres,
 		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
-		.pc_argzero = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+pAT+WC,
-		.pc_name = "LINK",
 	},
 	[NFS3PROC_READDIR] = {
 		.pc_func = nfsd3_proc_readdir,
@@ -1001,10 +873,8 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
-		.pc_argzero = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
-		.pc_name = "READDIR",
 	},
 	[NFS3PROC_READDIRPLUS] = {
 		.pc_func = nfsd3_proc_readdirplus,
@@ -1012,43 +882,35 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_readdirres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
-		.pc_argzero = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
-		.pc_name = "READDIRPLUS",
 	},
 	[NFS3PROC_FSSTAT] = {
 		.pc_func = nfsd3_proc_fsstat,
 		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
-		.pc_argzero = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+2*6+1,
-		.pc_name = "FSSTAT",
 	},
 	[NFS3PROC_FSINFO] = {
 		.pc_func = nfsd3_proc_fsinfo,
 		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
-		.pc_argzero = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+12,
-		.pc_name = "FSINFO",
 	},
 	[NFS3PROC_PATHCONF] = {
 		.pc_func = nfsd3_proc_pathconf,
 		.pc_decode = nfs3svc_decode_fhandleargs,
 		.pc_encode = nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
-		.pc_argzero = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+6,
-		.pc_name = "PATHCONF",
 	},
 	[NFS3PROC_COMMIT] = {
 		.pc_func = nfsd3_proc_commit,
@@ -1056,11 +918,9 @@ static const struct svc_procedure nfsd_procedures3[22] = {
 		.pc_encode = nfs3svc_encode_commitres,
 		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
-		.pc_argzero = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+WC+2,
-		.pc_name = "COMMIT",
 	},
 };
 
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 3308dd671ef0..716566da400e 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -14,26 +14,13 @@
 #include "netns.h"
 #include "vfs.h"
 
-/*
- * Force construction of an empty post-op attr
- */
-static const struct svc_fh nfs3svc_null_fh = {
-	.fh_no_wcc	= true,
-};
+#define NFSDDBG_FACILITY		NFSDDBG_XDR
 
-/*
- * time_delta. {1, 0} means the server is accurate only
- * to the nearest second.
- */
-static const struct timespec64 nfs3svc_time_delta = {
-	.tv_sec		= 1,
-	.tv_nsec	= 0,
-};
 
 /*
  * Mapping of S_IF* types to NFS file types
  */
-static const u32 nfs3_ftypes[] = {
+static u32	nfs3_ftypes[] = {
 	NF3NON,  NF3FIFO, NF3CHR, NF3BAD,
 	NF3DIR,  NF3BAD,  NF3BLK, NF3BAD,
 	NF3REG,  NF3BAD,  NF3LNK, NF3BAD,
@@ -42,938 +29,824 @@ static const u32 nfs3_ftypes[] = {
 
 
 /*
- * Basic NFSv3 data types (RFC 1813 Sections 2.5 and 2.6)
+ * XDR functions for basic NFS types
  */
+static __be32 *
+encode_time3(__be32 *p, struct timespec64 *time)
+{
+	*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
+	return p;
+}
 
 static __be32 *
-encode_nfstime3(__be32 *p, const struct timespec64 *time)
+decode_time3(__be32 *p, struct timespec64 *time)
 {
-	*p++ = cpu_to_be32((u32)time->tv_sec);
-	*p++ = cpu_to_be32(time->tv_nsec);
+	time->tv_sec = ntohl(*p++);
+	time->tv_nsec = ntohl(*p++);
+	return p;
+}
+
+static __be32 *
+decode_fh(__be32 *p, struct svc_fh *fhp)
+{
+	unsigned int size;
+	fh_init(fhp, NFS3_FHSIZE);
+	size = ntohl(*p++);
+	if (size > NFS3_FHSIZE)
+		return NULL;
+
+	memcpy(&fhp->fh_handle.fh_base, p, size);
+	fhp->fh_handle.fh_size = size;
+	return p + XDR_QUADLEN(size);
+}
+
+/* Helper function for NFSv3 ACL code */
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp)
+{
+	return decode_fh(p, fhp);
+}
+
+static __be32 *
+encode_fh(__be32 *p, struct svc_fh *fhp)
+{
+	unsigned int size = fhp->fh_handle.fh_size;
+	*p++ = htonl(size);
+	if (size) p[XDR_QUADLEN(size)-1]=0;
+	memcpy(p, &fhp->fh_handle.fh_base, size);
+	return p + XDR_QUADLEN(size);
+}
+
+/*
+ * Decode a file name and make sure that the path contains
+ * no slashes or null bytes.
+ */
+static __be32 *
+decode_filename(__be32 *p, char **namp, unsigned int *lenp)
+{
+	char		*name;
+	unsigned int	i;
+
+	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) {
+		for (i = 0, name = *namp; i < *lenp; i++, name++) {
+			if (*name == '\0' || *name == '/')
+				return NULL;
+		}
+	}
 
 	return p;
 }
 
-static bool
-svcxdr_decode_nfstime3(struct xdr_stream *xdr, struct timespec64 *timep)
+static __be32 *
+decode_sattr3(__be32 *p, struct iattr *iap, struct user_namespace *userns)
 {
-	__be32 *p;
-
-	p = xdr_inline_decode(xdr, XDR_UNIT * 2);
-	if (!p)
-		return false;
-	timep->tv_sec = be32_to_cpup(p++);
-	timep->tv_nsec = be32_to_cpup(p);
-
-	return true;
-}
-
-/**
- * svcxdr_decode_nfs_fh3 - Decode an NFSv3 file handle
- * @xdr: XDR stream positioned at an undecoded NFSv3 FH
- * @fhp: OUT: filled-in server file handle
- *
- * Return values:
- *  %false: The encoded file handle was not valid
- *  %true: @fhp has been initialized
- */
-bool
-svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp)
-{
-	__be32 *p;
-	u32 size;
-
-	if (xdr_stream_decode_u32(xdr, &size) < 0)
-		return false;
-	if (size == 0 || size > NFS3_FHSIZE)
-		return false;
-	p = xdr_inline_decode(xdr, size);
-	if (!p)
-		return false;
-	fh_init(fhp, NFS3_FHSIZE);
-	fhp->fh_handle.fh_size = size;
-	memcpy(&fhp->fh_handle.fh_raw, p, size);
-
-	return true;
-}
-
-/**
- * svcxdr_encode_nfsstat3 - Encode an NFSv3 status code
- * @xdr: XDR stream
- * @status: status value to encode
- *
- * Return values:
- *   %false: Send buffer space was exhausted
- *   %true: Success
- */
-bool
-svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, sizeof(status));
-	if (!p)
-		return false;
-	*p = status;
-
-	return true;
-}
-
-static bool
-svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp)
-{
-	u32 size = fhp->fh_handle.fh_size;
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, XDR_UNIT + size);
-	if (!p)
-		return false;
-	*p++ = cpu_to_be32(size);
-	if (size)
-		p[XDR_QUADLEN(size) - 1] = 0;
-	memcpy(p, &fhp->fh_handle.fh_raw, size);
-
-	return true;
-}
-
-static bool
-svcxdr_encode_post_op_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp)
-{
-	if (xdr_stream_encode_item_present(xdr) < 0)
-		return false;
-	if (!svcxdr_encode_nfs_fh3(xdr, fhp))
-		return false;
-
-	return true;
-}
-
-static bool
-svcxdr_encode_cookieverf3(struct xdr_stream *xdr, const __be32 *verf)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, NFS3_COOKIEVERFSIZE);
-	if (!p)
-		return false;
-	memcpy(p, verf, NFS3_COOKIEVERFSIZE);
-
-	return true;
-}
-
-static bool
-svcxdr_encode_writeverf3(struct xdr_stream *xdr, const __be32 *verf)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, NFS3_WRITEVERFSIZE);
-	if (!p)
-		return false;
-	memcpy(p, verf, NFS3_WRITEVERFSIZE);
-
-	return true;
-}
-
-static bool
-svcxdr_decode_filename3(struct xdr_stream *xdr, char **name, unsigned int *len)
-{
-	u32 size, i;
-	__be32 *p;
-	char *c;
-
-	if (xdr_stream_decode_u32(xdr, &size) < 0)
-		return false;
-	if (size == 0 || size > NFS3_MAXNAMLEN)
-		return false;
-	p = xdr_inline_decode(xdr, size);
-	if (!p)
-		return false;
-
-	*len = size;
-	*name = (char *)p;
-	for (i = 0, c = *name; i < size; i++, c++) {
-		if (*c == '\0' || *c == '/')
-			return false;
-	}
-
-	return true;
-}
-
-static bool
-svcxdr_decode_diropargs3(struct xdr_stream *xdr, struct svc_fh *fhp,
-			 char **name, unsigned int *len)
-{
-	return svcxdr_decode_nfs_fh3(xdr, fhp) &&
-		svcxdr_decode_filename3(xdr, name, len);
-}
-
-static bool
-svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-		     struct iattr *iap)
-{
-	u32 set_it;
+	u32	tmp;
 
 	iap->ia_valid = 0;
 
-	if (xdr_stream_decode_bool(xdr, &set_it) < 0)
-		return false;
-	if (set_it) {
-		u32 mode;
-
-		if (xdr_stream_decode_u32(xdr, &mode) < 0)
-			return false;
+	if (*p++) {
 		iap->ia_valid |= ATTR_MODE;
-		iap->ia_mode = mode;
+		iap->ia_mode = ntohl(*p++);
 	}
-	if (xdr_stream_decode_bool(xdr, &set_it) < 0)
-		return false;
-	if (set_it) {
-		u32 uid;
-
-		if (xdr_stream_decode_u32(xdr, &uid) < 0)
-			return false;
-		iap->ia_uid = make_kuid(nfsd_user_namespace(rqstp), uid);
+	if (*p++) {
+		iap->ia_uid = make_kuid(userns, ntohl(*p++));
 		if (uid_valid(iap->ia_uid))
 			iap->ia_valid |= ATTR_UID;
 	}
-	if (xdr_stream_decode_bool(xdr, &set_it) < 0)
-		return false;
-	if (set_it) {
-		u32 gid;
-
-		if (xdr_stream_decode_u32(xdr, &gid) < 0)
-			return false;
-		iap->ia_gid = make_kgid(nfsd_user_namespace(rqstp), gid);
+	if (*p++) {
+		iap->ia_gid = make_kgid(userns, ntohl(*p++));
 		if (gid_valid(iap->ia_gid))
 			iap->ia_valid |= ATTR_GID;
 	}
-	if (xdr_stream_decode_bool(xdr, &set_it) < 0)
-		return false;
-	if (set_it) {
-		u64 newsize;
+	if (*p++) {
+		u64	newsize;
 
-		if (xdr_stream_decode_u64(xdr, &newsize) < 0)
-			return false;
 		iap->ia_valid |= ATTR_SIZE;
-		iap->ia_size = newsize;
+		p = xdr_decode_hyper(p, &newsize);
+		iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
 	}
-	if (xdr_stream_decode_u32(xdr, &set_it) < 0)
-		return false;
-	switch (set_it) {
-	case DONT_CHANGE:
-		break;
-	case SET_TO_SERVER_TIME:
+	if ((tmp = ntohl(*p++)) == 1) {	/* set to server time */
 		iap->ia_valid |= ATTR_ATIME;
-		break;
-	case SET_TO_CLIENT_TIME:
-		if (!svcxdr_decode_nfstime3(xdr, &iap->ia_atime))
-			return false;
+	} else if (tmp == 2) {		/* set to client time */
 		iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
-		break;
-	default:
-		return false;
+		iap->ia_atime.tv_sec = ntohl(*p++);
+		iap->ia_atime.tv_nsec = ntohl(*p++);
 	}
-	if (xdr_stream_decode_u32(xdr, &set_it) < 0)
-		return false;
-	switch (set_it) {
-	case DONT_CHANGE:
-		break;
-	case SET_TO_SERVER_TIME:
+	if ((tmp = ntohl(*p++)) == 1) {	/* set to server time */
 		iap->ia_valid |= ATTR_MTIME;
-		break;
-	case SET_TO_CLIENT_TIME:
-		if (!svcxdr_decode_nfstime3(xdr, &iap->ia_mtime))
-			return false;
+	} else if (tmp == 2) {		/* set to client time */
 		iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
-		break;
-	default:
-		return false;
+		iap->ia_mtime.tv_sec = ntohl(*p++);
+		iap->ia_mtime.tv_nsec = ntohl(*p++);
 	}
-
-	return true;
+	return p;
 }
 
-static bool
-svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args)
+static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp)
 {
-	__be32 *p;
-	u32 check;
-
-	if (xdr_stream_decode_bool(xdr, &check) < 0)
-		return false;
-	if (check) {
-		p = xdr_inline_decode(xdr, XDR_UNIT * 2);
-		if (!p)
-			return false;
-		args->check_guard = 1;
-		args->guardtime = be32_to_cpup(p);
-	} else
-		args->check_guard = 0;
-
-	return true;
-}
-
-static bool
-svcxdr_decode_specdata3(struct xdr_stream *xdr, struct nfsd3_mknodargs *args)
-{
-	__be32 *p;
-
-	p = xdr_inline_decode(xdr, XDR_UNIT * 2);
-	if (!p)
-		return false;
-	args->major = be32_to_cpup(p++);
-	args->minor = be32_to_cpup(p);
-
-	return true;
-}
-
-static bool
-svcxdr_decode_devicedata3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-			  struct nfsd3_mknodargs *args)
-{
-	return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs) &&
-		svcxdr_decode_specdata3(xdr, args);
-}
-
-static bool
-svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-		     const struct svc_fh *fhp, const struct kstat *stat)
-{
-	struct user_namespace *userns = nfsd_user_namespace(rqstp);
-	__be32 *p;
-	u64 fsid;
-
-	p = xdr_reserve_space(xdr, XDR_UNIT * 21);
-	if (!p)
-		return false;
-
-	*p++ = cpu_to_be32(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
-	*p++ = cpu_to_be32((u32)(stat->mode & S_IALLUGO));
-	*p++ = cpu_to_be32((u32)stat->nlink);
-	*p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid));
-	*p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid));
-	if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN)
-		p = xdr_encode_hyper(p, (u64)NFS3_MAXPATHLEN);
-	else
-		p = xdr_encode_hyper(p, (u64)stat->size);
-
-	/* used */
-	p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9);
-
-	/* rdev */
-	*p++ = cpu_to_be32((u32)MAJOR(stat->rdev));
-	*p++ = cpu_to_be32((u32)MINOR(stat->rdev));
-
+	u64 f;
 	switch(fsid_source(fhp)) {
+	default:
+	case FSIDSOURCE_DEV:
+		p = xdr_encode_hyper(p, (u64)huge_encode_dev
+				     (fhp->fh_dentry->d_sb->s_dev));
+		break;
 	case FSIDSOURCE_FSID:
-		fsid = (u64)fhp->fh_export->ex_fsid;
+		p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
 		break;
 	case FSIDSOURCE_UUID:
-		fsid = ((u64 *)fhp->fh_export->ex_uuid)[0];
-		fsid ^= ((u64 *)fhp->fh_export->ex_uuid)[1];
+		f = ((u64*)fhp->fh_export->ex_uuid)[0];
+		f ^= ((u64*)fhp->fh_export->ex_uuid)[1];
+		p = xdr_encode_hyper(p, f);
 		break;
-	default:
-		fsid = (u64)huge_encode_dev(fhp->fh_dentry->d_sb->s_dev);
 	}
-	p = xdr_encode_hyper(p, fsid);
+	return p;
+}
 
-	/* fileid */
+static __be32 *
+encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
+	      struct kstat *stat)
+{
+	struct user_namespace *userns = nfsd_user_namespace(rqstp);
+	*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
+	*p++ = htonl((u32) (stat->mode & S_IALLUGO));
+	*p++ = htonl((u32) stat->nlink);
+	*p++ = htonl((u32) from_kuid_munged(userns, stat->uid));
+	*p++ = htonl((u32) from_kgid_munged(userns, stat->gid));
+	if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
+		p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
+	} else {
+		p = xdr_encode_hyper(p, (u64) stat->size);
+	}
+	p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9);
+	*p++ = htonl((u32) MAJOR(stat->rdev));
+	*p++ = htonl((u32) MINOR(stat->rdev));
+	p = encode_fsid(p, fhp);
 	p = xdr_encode_hyper(p, stat->ino);
+	p = encode_time3(p, &stat->atime);
+	p = encode_time3(p, &stat->mtime);
+	p = encode_time3(p, &stat->ctime);
 
-	p = encode_nfstime3(p, &stat->atime);
-	p = encode_nfstime3(p, &stat->mtime);
-	encode_nfstime3(p, &stat->ctime);
-
-	return true;
+	return p;
 }
 
-static bool
-svcxdr_encode_wcc_attr(struct xdr_stream *xdr, const struct svc_fh *fhp)
+static __be32 *
+encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, XDR_UNIT * 6);
-	if (!p)
-		return false;
-	p = xdr_encode_hyper(p, (u64)fhp->fh_pre_size);
-	p = encode_nfstime3(p, &fhp->fh_pre_mtime);
-	encode_nfstime3(p, &fhp->fh_pre_ctime);
-
-	return true;
-}
-
-static bool
-svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp)
-{
-	if (!fhp->fh_pre_saved) {
-		if (xdr_stream_encode_item_absent(xdr) < 0)
-			return false;
-		return true;
-	}
-
-	if (xdr_stream_encode_item_present(xdr) < 0)
-		return false;
-	return svcxdr_encode_wcc_attr(xdr, fhp);
-}
-
-/**
- * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes
- * @rqstp: Context of a completed RPC transaction
- * @xdr: XDR stream
- * @fhp: File handle to encode
- *
- * Return values:
- *   %false: Send buffer space was exhausted
- *   %true: Success
- */
-bool
-svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-			   const struct svc_fh *fhp)
-{
-	struct dentry *dentry = fhp->fh_dentry;
-	struct kstat stat;
-
-	/*
-	 * The inode may be NULL if the call failed because of a
-	 * stale file handle. In this case, no attributes are
-	 * returned.
-	 */
-	if (fhp->fh_no_wcc || !dentry || !d_really_is_positive(dentry))
-		goto no_post_op_attrs;
-	if (fh_getattr(fhp, &stat) != nfs_ok)
-		goto no_post_op_attrs;
-
-	if (xdr_stream_encode_item_present(xdr) < 0)
-		return false;
-	lease_get_mtime(d_inode(dentry), &stat.mtime);
-	if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &stat))
-		return false;
-
-	return true;
-
-no_post_op_attrs:
-	return xdr_stream_encode_item_absent(xdr) > 0;
+	/* Attributes to follow */
+	*p++ = xdr_one;
+	return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr);
 }
 
 /*
- * Encode weak cache consistency data
+ * Encode post-operation attributes.
+ * The inode may be NULL if the call failed because of a stale file
+ * handle. In this case, no attributes are returned.
  */
-static bool
-svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-		       const struct svc_fh *fhp)
+static __be32 *
+encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct dentry *dentry = fhp->fh_dentry;
+	if (dentry && d_really_is_positive(dentry)) {
+	        __be32 err;
+		struct kstat stat;
 
-	if (!dentry || !d_really_is_positive(dentry) || !fhp->fh_post_saved)
-		goto neither;
+		err = fh_getattr(fhp, &stat);
+		if (!err) {
+			*p++ = xdr_one;		/* attributes follow */
+			lease_get_mtime(d_inode(dentry), &stat.mtime);
+			return encode_fattr3(rqstp, p, fhp, &stat);
+		}
+	}
+	*p++ = xdr_zero;
+	return p;
+}
 
-	/* before */
-	if (!svcxdr_encode_pre_op_attr(xdr, fhp))
-		return false;
+/* Helper for NFSv3 ACLs */
+__be32 *
+nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+{
+	return encode_post_op_attr(rqstp, p, fhp);
+}
 
-	/* after */
-	if (xdr_stream_encode_item_present(xdr) < 0)
-		return false;
-	if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &fhp->fh_post_attr))
-		return false;
+/*
+ * Enocde weak cache consistency data
+ */
+static __be32 *
+encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+{
+	struct dentry	*dentry = fhp->fh_dentry;
 
-	return true;
+	if (dentry && d_really_is_positive(dentry) && fhp->fh_post_saved) {
+		if (fhp->fh_pre_saved) {
+			*p++ = xdr_one;
+			p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size);
+			p = encode_time3(p, &fhp->fh_pre_mtime);
+			p = encode_time3(p, &fhp->fh_pre_ctime);
+		} else {
+			*p++ = xdr_zero;
+		}
+		return encode_saved_post_attr(rqstp, p, fhp);
+	}
+	/* no pre- or post-attrs */
+	*p++ = xdr_zero;
+	return encode_post_op_attr(rqstp, p, fhp);
+}
 
-neither:
-	if (xdr_stream_encode_item_absent(xdr) < 0)
-		return false;
-	if (!svcxdr_encode_post_op_attr(rqstp, xdr, fhp))
-		return false;
+/*
+ * Fill in the pre_op attr for the wcc data
+ */
+void fill_pre_wcc(struct svc_fh *fhp)
+{
+	struct inode    *inode;
+	struct kstat	stat;
+	__be32 err;
 
-	return true;
+	if (fhp->fh_pre_saved)
+		return;
+
+	inode = d_inode(fhp->fh_dentry);
+	err = fh_getattr(fhp, &stat);
+	if (err) {
+		/* Grab the times from inode anyway */
+		stat.mtime = inode->i_mtime;
+		stat.ctime = inode->i_ctime;
+		stat.size  = inode->i_size;
+	}
+
+	fhp->fh_pre_mtime = stat.mtime;
+	fhp->fh_pre_ctime = stat.ctime;
+	fhp->fh_pre_size  = stat.size;
+	fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+	fhp->fh_pre_saved = true;
+}
+
+/*
+ * Fill in the post_op attr for the wcc data
+ */
+void fill_post_wcc(struct svc_fh *fhp)
+{
+	__be32 err;
+
+	if (fhp->fh_post_saved)
+		printk("nfsd: inode locked twice during operation.\n");
+
+	err = fh_getattr(fhp, &fhp->fh_post_attr);
+	fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr,
+						     d_inode(fhp->fh_dentry));
+	if (err) {
+		fhp->fh_post_saved = false;
+		/* Grab the ctime anyway - set_change_info might use it */
+		fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
+	} else
+		fhp->fh_post_saved = true;
 }
 
 /*
  * XDR decode functions
  */
+int
+nfs3svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
+{
+	return 1;
+}
 
-bool
-nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_fhandle *args = rqstp->rq_argp;
 
-	return svcxdr_decode_nfs_fh3(xdr, &args->fh);
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_sattrargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_nfs_fh3(xdr, &args->fh) &&
-		svcxdr_decode_sattr3(rqstp, xdr, &args->attrs) &&
-		svcxdr_decode_sattrguard3(xdr, args);
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
+
+	if ((args->check_guard = ntohl(*p++)) != 0) { 
+		struct timespec64 time;
+		p = decode_time3(p, &time);
+		args->guardtime = time.tv_sec;
+	}
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_diropargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len);
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_accessargs *args = rqstp->rq_argp;
 
-	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->access) < 0)
-		return false;
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	args->access = ntohl(*p++);
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_readargs *args = rqstp->rq_argp;
-
-	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return false;
-
-	return true;
-}
-
-bool
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	struct nfsd3_writeargs *args = rqstp->rq_argp;
+	unsigned int len;
+	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
-	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->stable) < 0)
-		return false;
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	p = xdr_decode_hyper(p, &args->offset);
 
-	/* opaque data */
-	if (xdr_stream_decode_u32(xdr, &args->len) < 0)
-		return false;
+	args->count = ntohl(*p++);
+	len = min(args->count, max_blocksize);
 
-	/* request sanity */
+	/* set up the kvec */
+	v=0;
+	while (len > 0) {
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
+		rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
+		len -= rqstp->rq_vec[v].iov_len;
+		v++;
+	}
+	args->vlen = v;
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nfsd3_writeargs *args = rqstp->rq_argp;
+	unsigned int len, hdr, dlen;
+	u32 max_blocksize = svc_max_payload(rqstp);
+	struct kvec *head = rqstp->rq_arg.head;
+	struct kvec *tail = rqstp->rq_arg.tail;
+
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	p = xdr_decode_hyper(p, &args->offset);
+
+	args->count = ntohl(*p++);
+	args->stable = ntohl(*p++);
+	len = args->len = ntohl(*p++);
+	if ((void *)p > head->iov_base + head->iov_len)
+		return 0;
+	/*
+	 * The count must equal the amount of data passed.
+	 */
 	if (args->count != args->len)
-		return false;
+		return 0;
+
+	/*
+	 * Check to make sure that we got the right number of
+	 * bytes.
+	 */
+	hdr = (void*)p - head->iov_base;
+	dlen = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len - hdr;
+	/*
+	 * Round the length of the data which was specified up to
+	 * the next multiple of XDR units and then compare that
+	 * against the length which was actually received.
+	 * Note that when RPCSEC/GSS (for example) is used, the
+	 * data buffer can be padded so dlen might be larger
+	 * than required.  It must never be smaller.
+	 */
+	if (dlen < XDR_QUADLEN(len)*4)
+		return 0;
+
 	if (args->count > max_blocksize) {
 		args->count = max_blocksize;
-		args->len = max_blocksize;
+		len = args->len = max_blocksize;
 	}
 
-	return xdr_stream_subsegment(xdr, &args->payload, args->count);
+	args->first.iov_base = (void *)p;
+	args->first.iov_len = head->iov_len - hdr;
+	return 1;
 }
 
-bool
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_createargs *args = rqstp->rq_argp;
 
-	if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->createmode) < 0)
-		return false;
-	switch (args->createmode) {
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+
+	switch (args->createmode = ntohl(*p++)) {
 	case NFS3_CREATE_UNCHECKED:
 	case NFS3_CREATE_GUARDED:
-		return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs);
+		p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
+		break;
 	case NFS3_CREATE_EXCLUSIVE:
-		args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE);
-		if (!args->verf)
-			return false;
+		args->verf = p;
+		p += 2;
 		break;
 	default:
-		return false;
+		return 0;
 	}
-	return true;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_createargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_diropargs3(xdr, &args->fh,
-					&args->name, &args->len) &&
-		svcxdr_decode_sattr3(rqstp, xdr, &args->attrs);
+	if (!(p = decode_fh(p, &args->fh)) ||
+	    !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+	p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
-	struct kvec *head = rqstp->rq_arg.head;
+	char *base = (char *)p;
+	size_t dlen;
 
-	if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen))
-		return false;
-	if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
-		return false;
+	if (!(p = decode_fh(p, &args->ffh)) ||
+	    !(p = decode_filename(p, &args->fname, &args->flen)))
+		return 0;
+	p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
 
-	/* symlink_data */
-	args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
-	args->first.iov_base = xdr_inline_decode(xdr, args->tlen);
-	return args->first.iov_base != NULL;
+	args->tlen = ntohl(*p++);
+
+	args->first.iov_base = p;
+	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
+	args->first.iov_len -= (char *)p - base;
+
+	dlen = args->first.iov_len + rqstp->rq_arg.page_len +
+	       rqstp->rq_arg.tail[0].iov_len;
+	if (dlen < XDR_QUADLEN(args->tlen) << 2)
+		return 0;
+	return 1;
 }
 
-bool
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_mknodargs *args = rqstp->rq_argp;
 
-	if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->ftype) < 0)
-		return false;
-	switch (args->ftype) {
-	case NF3CHR:
-	case NF3BLK:
-		return svcxdr_decode_devicedata3(rqstp, xdr, args);
-	case NF3SOCK:
-	case NF3FIFO:
-		return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs);
-	case NF3REG:
-	case NF3DIR:
-	case NF3LNK:
-		/* Valid XDR but illegal file types */
-		break;
-	default:
-		return false;
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+
+	args->ftype = ntohl(*p++);
+
+	if (args->ftype == NF3BLK  || args->ftype == NF3CHR
+	 || args->ftype == NF3SOCK || args->ftype == NF3FIFO)
+		p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
+
+	if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
+		args->major = ntohl(*p++);
+		args->minor = ntohl(*p++);
 	}
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_renameargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_diropargs3(xdr, &args->ffh,
-					&args->fname, &args->flen) &&
-		svcxdr_decode_diropargs3(xdr, &args->tfh,
-					 &args->tname, &args->tlen);
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_filename(p, &args->fname, &args->flen))
+	 || !(p = decode_fh(p, &args->tfh))
+	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nfsd3_readlinkargs *args = rqstp->rq_argp;
+
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	args->buffer = page_address(*(rqstp->rq_next_page++));
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_linkargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_nfs_fh3(xdr, &args->ffh) &&
-		svcxdr_decode_diropargs3(xdr, &args->tfh,
-					 &args->tname, &args->tlen);
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_fh(p, &args->tfh))
+	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_readdirargs *args = rqstp->rq_argp;
+	int len;
+	u32 max_blocksize = svc_max_payload(rqstp);
 
-	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u64(xdr, &args->cookie) < 0)
-		return false;
-	args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
-	if (!args->verf)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return false;
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	p = xdr_decode_hyper(p, &args->cookie);
+	args->verf   = p; p += 2;
+	args->dircount = ~0;
+	args->count  = ntohl(*p++);
+	len = args->count  = min_t(u32, args->count, max_blocksize);
 
-	return true;
+	while (len > 0) {
+		struct page *p = *(rqstp->rq_next_page++);
+		if (!args->buffer)
+			args->buffer = page_address(p);
+		len -= PAGE_SIZE;
+	}
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_readdirargs *args = rqstp->rq_argp;
-	u32 dircount;
+	int len;
+	u32 max_blocksize = svc_max_payload(rqstp);
 
-	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u64(xdr, &args->cookie) < 0)
-		return false;
-	args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
-	if (!args->verf)
-		return false;
-	/* dircount is ignored */
-	if (xdr_stream_decode_u32(xdr, &dircount) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return false;
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	p = xdr_decode_hyper(p, &args->cookie);
+	args->verf     = p; p += 2;
+	args->dircount = ntohl(*p++);
+	args->count    = ntohl(*p++);
 
-	return true;
+	len = args->count = min(args->count, max_blocksize);
+	while (len > 0) {
+		struct page *p = *(rqstp->rq_next_page++);
+		if (!args->buffer)
+			args->buffer = page_address(p);
+		len -= PAGE_SIZE;
+	}
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_commitargs *args = rqstp->rq_argp;
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	p = xdr_decode_hyper(p, &args->offset);
+	args->count = ntohl(*p++);
 
-	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return false;
-
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
 /*
  * XDR encode functions
  */
 
+int
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
+{
+	return xdr_ressize_check(rqstp, p);
+}
+
 /* GETATTR */
-bool
-nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime);
-		if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat))
-			return false;
-		break;
+	*p++ = resp->status;
+	if (resp->status == 0) {
+		lease_get_mtime(d_inode(resp->fh.fh_dentry),
+				&resp->stat.mtime);
+		p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat);
 	}
-
-	return true;
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* SETATTR, REMOVE, RMDIR */
-bool
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 
-	return svcxdr_encode_nfsstat3(xdr, resp->status) &&
-		svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh);
+	*p++ = resp->status;
+	p = encode_wcc_data(rqstp, p, &resp->fh);
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* LOOKUP */
-bool
-nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh))
-			return false;
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
-			return false;
+	*p++ = resp->status;
+	if (resp->status == 0) {
+		p = encode_fh(p, &resp->fh);
+		p = encode_post_op_attr(rqstp, p, &resp->fh);
 	}
-
-	return true;
+	p = encode_post_op_attr(rqstp, p, &resp->dirfh);
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* ACCESS */
-bool
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_accessres *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->access) < 0)
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-	}
-
-	return true;
+	*p++ = resp->status;
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0)
+		*p++ = htonl(resp->access);
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* READLINK */
-bool
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
-	struct kvec *head = rqstp->rq_res.head;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->len) < 0)
-			return false;
-		xdr_write_pages(xdr, resp->pages, 0, resp->len);
-		if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-	}
-
-	return true;
+	*p++ = resp->status;
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0) {
+		*p++ = htonl(resp->len);
+		xdr_ressize_check(rqstp, p);
+		rqstp->rq_res.page_len = resp->len;
+		if (resp->len & 3) {
+			/* need to pad the tail */
+			rqstp->rq_res.tail[0].iov_base = p;
+			*p = 0;
+			rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
+		}
+		return 1;
+	} else
+		return xdr_ressize_check(rqstp, p);
 }
 
 /* READ */
-bool
-nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_readres *resp = rqstp->rq_resp;
-	struct kvec *head = rqstp->rq_res.head;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
-			return false;
-		if (xdr_stream_encode_bool(xdr, resp->eof) < 0)
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
-			return false;
-		xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
-				resp->count);
-		if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-	}
-
-	return true;
+	*p++ = resp->status;
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+	if (resp->status == 0) {
+		*p++ = htonl(resp->count);
+		*p++ = htonl(resp->eof);
+		*p++ = htonl(resp->count);	/* xdr opaque count */
+		xdr_ressize_check(rqstp, p);
+		/* now update rqstp->rq_res to reflect data as well */
+		rqstp->rq_res.page_len = resp->count;
+		if (resp->count & 3) {
+			/* need to pad the tail */
+			rqstp->rq_res.tail[0].iov_base = p;
+			*p = 0;
+			rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
+		}
+		return 1;
+	} else
+		return xdr_ressize_check(rqstp, p);
 }
 
 /* WRITE */
-bool
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_writeres *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->committed) < 0)
-			return false;
-		if (!svcxdr_encode_writeverf3(xdr, resp->verf))
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
-			return false;
+	*p++ = resp->status;
+	p = encode_wcc_data(rqstp, p, &resp->fh);
+	if (resp->status == 0) {
+		*p++ = htonl(resp->count);
+		*p++ = htonl(resp->committed);
+		*p++ = resp->verf[0];
+		*p++ = resp->verf[1];
 	}
-
-	return true;
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
-bool
-nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh))
-			return false;
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh))
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh))
-			return false;
+	*p++ = resp->status;
+	if (resp->status == 0) {
+		*p++ = xdr_one;
+		p = encode_fh(p, &resp->fh);
+		p = encode_post_op_attr(rqstp, p, &resp->fh);
 	}
-
-	return true;
+	p = encode_wcc_data(rqstp, p, &resp->dirfh);
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* RENAME */
-bool
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_renameres *resp = rqstp->rq_resp;
 
-	return svcxdr_encode_nfsstat3(xdr, resp->status) &&
-		svcxdr_encode_wcc_data(rqstp, xdr, &resp->ffh) &&
-		svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh);
+	*p++ = resp->status;
+	p = encode_wcc_data(rqstp, p, &resp->ffh);
+	p = encode_wcc_data(rqstp, p, &resp->tfh);
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* LINK */
-bool
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_linkres *resp = rqstp->rq_resp;
 
-	return svcxdr_encode_nfsstat3(xdr, resp->status) &&
-		svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh) &&
-		svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh);
+	*p++ = resp->status;
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
+	p = encode_wcc_data(rqstp, p, &resp->tfh);
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* READDIR */
-bool
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_readdirres *resp = rqstp->rq_resp;
-	struct xdr_buf *dirlist = &resp->dirlist;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-		if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
-			return false;
-		xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
-		/* no more entries */
-		if (xdr_stream_encode_item_absent(xdr) < 0)
-			return false;
-		if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0)
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return false;
-	}
+	*p++ = resp->status;
+	p = encode_post_op_attr(rqstp, p, &resp->fh);
 
-	return true;
+	if (resp->status == 0) {
+		/* stupid readdir cookie */
+		memcpy(p, resp->verf, 8); p += 2;
+		xdr_ressize_check(rqstp, p);
+		if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE)
+			return 1; /*No room for trailer */
+		rqstp->rq_res.page_len = (resp->count) << 2;
+
+		/* add the 'tail' to the end of the 'head' page - page 0. */
+		rqstp->rq_res.tail[0].iov_base = p;
+		*p++ = 0;		/* no more entries */
+		*p++ = htonl(resp->common.err == nfserr_eof);
+		rqstp->rq_res.tail[0].iov_len = 2<<2;
+		return 1;
+	} else
+		return xdr_ressize_check(rqstp, p);
+}
+
+static __be32 *
+encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
+	     int namlen, u64 ino)
+{
+	*p++ = xdr_one;				 /* mark entry present */
+	p    = xdr_encode_hyper(p, ino);	 /* file id */
+	p    = xdr_encode_array(p, name, namlen);/* name length & name */
+
+	cd->offset = p;				/* remember pointer */
+	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */
+
+	return p;
 }
 
 static __be32
@@ -1014,323 +887,267 @@ out:
 	return rv;
 }
 
-/**
- * nfs3svc_encode_cookie3 - Encode a directory offset cookie
- * @resp: readdir result context
- * @offset: offset cookie to encode
- *
- * The buffer space for the offset cookie has already been reserved
- * by svcxdr_encode_entry3_common().
- */
-void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset)
+static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino)
 {
-	__be64 cookie = cpu_to_be64(offset);
+	struct svc_fh	*fh = &cd->scratch;
+	__be32 err;
 
-	if (!resp->cookie_offset)
-		return;
-	write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie,
-			       sizeof(cookie));
-	resp->cookie_offset = 0;
-}
-
-static bool
-svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name,
-			    int namlen, loff_t offset, u64 ino)
-{
-	struct xdr_buf *dirlist = &resp->dirlist;
-	struct xdr_stream *xdr = &resp->xdr;
-
-	if (xdr_stream_encode_item_present(xdr) < 0)
-		return false;
-	/* fileid */
-	if (xdr_stream_encode_u64(xdr, ino) < 0)
-		return false;
-	/* name */
-	if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS3_MAXNAMLEN)) < 0)
-		return false;
-	/* cookie */
-	resp->cookie_offset = dirlist->len;
-	if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0)
-		return false;
-
-	return true;
-}
-
-/**
- * nfs3svc_encode_entry3 - encode one NFSv3 READDIR entry
- * @data: directory context
- * @name: name of the object to be encoded
- * @namlen: length of that name, in bytes
- * @offset: the offset of the previous entry
- * @ino: the fileid of this entry
- * @d_type: unused
- *
- * Return values:
- *   %0: Entry was successfully encoded.
- *   %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
- *
- * On exit, the following fields are updated:
- *   - resp->xdr
- *   - resp->common.err
- *   - resp->cookie_offset
- */
-int nfs3svc_encode_entry3(void *data, const char *name, int namlen,
-			  loff_t offset, u64 ino, unsigned int d_type)
-{
-	struct readdir_cd *ccd = data;
-	struct nfsd3_readdirres *resp = container_of(ccd,
-						     struct nfsd3_readdirres,
-						     common);
-	unsigned int starting_length = resp->dirlist.len;
-
-	/* The offset cookie for the previous entry */
-	nfs3svc_encode_cookie3(resp, offset);
-
-	if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino))
-		goto out_toosmall;
-
-	xdr_commit_encode(&resp->xdr);
-	resp->common.err = nfs_ok;
-	return 0;
-
-out_toosmall:
-	resp->cookie_offset = 0;
-	resp->common.err = nfserr_toosmall;
-	resp->dirlist.len = starting_length;
-	return -EINVAL;
-}
-
-static bool
-svcxdr_encode_entry3_plus(struct nfsd3_readdirres *resp, const char *name,
-			  int namlen, u64 ino)
-{
-	struct xdr_stream *xdr = &resp->xdr;
-	struct svc_fh *fhp = &resp->scratch;
-	bool result;
-
-	result = false;
-	fh_init(fhp, NFS3_FHSIZE);
-	if (compose_entry_fh(resp, fhp, name, namlen, ino) != nfs_ok)
-		goto out_noattrs;
-
-	if (!svcxdr_encode_post_op_attr(resp->rqstp, xdr, fhp))
+	fh_init(fh, NFS3_FHSIZE);
+	err = compose_entry_fh(cd, fh, name, namlen, ino);
+	if (err) {
+		*p++ = 0;
+		*p++ = 0;
 		goto out;
-	if (!svcxdr_encode_post_op_fh3(xdr, fhp))
-		goto out;
-	result = true;
-
+	}
+	p = encode_post_op_attr(cd->rqstp, p, fh);
+	*p++ = xdr_one;			/* yes, a file handle follows */
+	p = encode_fh(p, fh);
 out:
-	fh_put(fhp);
-	return result;
-
-out_noattrs:
-	if (xdr_stream_encode_item_absent(xdr) < 0)
-		return false;
-	if (xdr_stream_encode_item_absent(xdr) < 0)
-		return false;
-	return true;
+	fh_put(fh);
+	return p;
 }
 
-/**
- * nfs3svc_encode_entryplus3 - encode one NFSv3 READDIRPLUS entry
- * @data: directory context
- * @name: name of the object to be encoded
- * @namlen: length of that name, in bytes
- * @offset: the offset of the previous entry
- * @ino: the fileid of this entry
- * @d_type: unused
- *
- * Return values:
- *   %0: Entry was successfully encoded.
- *   %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
- *
- * On exit, the following fields are updated:
- *   - resp->xdr
- *   - resp->common.err
- *   - resp->cookie_offset
+/*
+ * Encode a directory entry. This one works for both normal readdir
+ * and readdirplus.
+ * The normal readdir reply requires 2 (fileid) + 1 (stringlen)
+ * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen.
+ * 
+ * The readdirplus baggage is 1+21 words for post_op_attr, plus the
+ * file handle.
  */
-int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen,
-			      loff_t offset, u64 ino, unsigned int d_type)
+
+#define NFS3_ENTRY_BAGGAGE	(2 + 1 + 2 + 1)
+#define NFS3_ENTRYPLUS_BAGGAGE	(1 + 21 + 1 + (NFS3_FHSIZE >> 2))
+static int
+encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
+	     loff_t offset, u64 ino, unsigned int d_type, int plus)
 {
-	struct readdir_cd *ccd = data;
-	struct nfsd3_readdirres *resp = container_of(ccd,
-						     struct nfsd3_readdirres,
-						     common);
-	unsigned int starting_length = resp->dirlist.len;
+	struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
+		       					common);
+	__be32		*p = cd->buffer;
+	caddr_t		curr_page_addr = NULL;
+	struct page **	page;
+	int		slen;		/* string (name) length */
+	int		elen;		/* estimated entry length in words */
+	int		num_entry_words = 0;	/* actual number of words */
 
-	/* The offset cookie for the previous entry */
-	nfs3svc_encode_cookie3(resp, offset);
+	if (cd->offset) {
+		u64 offset64 = offset;
 
-	if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino))
-		goto out_toosmall;
-	if (!svcxdr_encode_entry3_plus(resp, name, namlen, ino))
-		goto out_toosmall;
+		if (unlikely(cd->offset1)) {
+			/* we ended up with offset on a page boundary */
+			*cd->offset = htonl(offset64 >> 32);
+			*cd->offset1 = htonl(offset64 & 0xffffffff);
+			cd->offset1 = NULL;
+		} else {
+			xdr_encode_hyper(cd->offset, offset64);
+		}
+		cd->offset = NULL;
+	}
 
-	xdr_commit_encode(&resp->xdr);
-	resp->common.err = nfs_ok;
+	/*
+	dprintk("encode_entry(%.*s @%ld%s)\n",
+		namlen, name, (long) offset, plus? " plus" : "");
+	 */
+
+	/* truncate filename if too long */
+	namlen = min(namlen, NFS3_MAXNAMLEN);
+
+	slen = XDR_QUADLEN(namlen);
+	elen = slen + NFS3_ENTRY_BAGGAGE
+		+ (plus? NFS3_ENTRYPLUS_BAGGAGE : 0);
+
+	if (cd->buflen < elen) {
+		cd->common.err = nfserr_toosmall;
+		return -EINVAL;
+	}
+
+	/* determine which page in rq_respages[] we are currently filling */
+	for (page = cd->rqstp->rq_respages + 1;
+				page < cd->rqstp->rq_next_page; page++) {
+		curr_page_addr = page_address(*page);
+
+		if (((caddr_t)cd->buffer >= curr_page_addr) &&
+		    ((caddr_t)cd->buffer <  curr_page_addr + PAGE_SIZE))
+			break;
+	}
+
+	if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) {
+		/* encode entry in current page */
+
+		p = encode_entry_baggage(cd, p, name, namlen, ino);
+
+		if (plus)
+			p = encode_entryplus_baggage(cd, p, name, namlen, ino);
+		num_entry_words = p - cd->buffer;
+	} else if (*(page+1) != NULL) {
+		/* temporarily encode entry into next page, then move back to
+		 * current and next page in rq_respages[] */
+		__be32 *p1, *tmp;
+		int len1, len2;
+
+		/* grab next page for temporary storage of entry */
+		p1 = tmp = page_address(*(page+1));
+
+		p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
+
+		if (plus)
+			p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino);
+
+		/* determine entry word length and lengths to go in pages */
+		num_entry_words = p1 - tmp;
+		len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer;
+		if ((num_entry_words << 2) < len1) {
+			/* the actual number of words in the entry is less
+			 * than elen and can still fit in the current page
+			 */
+			memmove(p, tmp, num_entry_words << 2);
+			p += num_entry_words;
+
+			/* update offset */
+			cd->offset = cd->buffer + (cd->offset - tmp);
+		} else {
+			unsigned int offset_r = (cd->offset - tmp) << 2;
+
+			/* update pointer to offset location.
+			 * This is a 64bit quantity, so we need to
+			 * deal with 3 cases:
+			 *  -	entirely in first page
+			 *  -	entirely in second page
+			 *  -	4 bytes in each page
+			 */
+			if (offset_r + 8 <= len1) {
+				cd->offset = p + (cd->offset - tmp);
+			} else if (offset_r >= len1) {
+				cd->offset -= len1 >> 2;
+			} else {
+				/* sitting on the fence */
+				BUG_ON(offset_r != len1 - 4);
+				cd->offset = p + (cd->offset - tmp);
+				cd->offset1 = tmp;
+			}
+
+			len2 = (num_entry_words << 2) - len1;
+
+			/* move from temp page to current and next pages */
+			memmove(p, tmp, len1);
+			memmove(tmp, (caddr_t)tmp+len1, len2);
+
+			p = tmp + (len2 >> 2);
+		}
+	}
+	else {
+		cd->common.err = nfserr_toosmall;
+		return -EINVAL;
+	}
+
+	cd->buflen -= num_entry_words;
+	cd->buffer = p;
+	cd->common.err = nfs_ok;
 	return 0;
 
-out_toosmall:
-	resp->cookie_offset = 0;
-	resp->common.err = nfserr_toosmall;
-	resp->dirlist.len = starting_length;
-	return -EINVAL;
 }
 
-static bool
-svcxdr_encode_fsstat3resok(struct xdr_stream *xdr,
-			   const struct nfsd3_fsstatres *resp)
+int
+nfs3svc_encode_entry(void *cd, const char *name,
+		     int namlen, loff_t offset, u64 ino, unsigned int d_type)
 {
-	const struct kstatfs *s = &resp->stats;
-	u64 bs = s->f_bsize;
-	__be32 *p;
+	return encode_entry(cd, name, namlen, offset, ino, d_type, 0);
+}
 
-	p = xdr_reserve_space(xdr, XDR_UNIT * 13);
-	if (!p)
-		return false;
-	p = xdr_encode_hyper(p, bs * s->f_blocks);	/* total bytes */
-	p = xdr_encode_hyper(p, bs * s->f_bfree);	/* free bytes */
-	p = xdr_encode_hyper(p, bs * s->f_bavail);	/* user available bytes */
-	p = xdr_encode_hyper(p, s->f_files);		/* total inodes */
-	p = xdr_encode_hyper(p, s->f_ffree);		/* free inodes */
-	p = xdr_encode_hyper(p, s->f_ffree);		/* user available inodes */
-	*p = cpu_to_be32(resp->invarsec);		/* mean unchanged time */
-
-	return true;
+int
+nfs3svc_encode_entry_plus(void *cd, const char *name,
+			  int namlen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	return encode_entry(cd, name, namlen, offset, ino, d_type, 1);
 }
 
 /* FSSTAT */
-bool
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
+	struct kstatfs	*s = &resp->stats;
+	u64		bs = s->f_bsize;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return false;
-		if (!svcxdr_encode_fsstat3resok(xdr, resp))
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return false;
+	*p++ = resp->status;
+	*p++ = xdr_zero;	/* no post_op_attr */
+
+	if (resp->status == 0) {
+		p = xdr_encode_hyper(p, bs * s->f_blocks);	/* total bytes */
+		p = xdr_encode_hyper(p, bs * s->f_bfree);	/* free bytes */
+		p = xdr_encode_hyper(p, bs * s->f_bavail);	/* user available bytes */
+		p = xdr_encode_hyper(p, s->f_files);	/* total inodes */
+		p = xdr_encode_hyper(p, s->f_ffree);	/* free inodes */
+		p = xdr_encode_hyper(p, s->f_ffree);	/* user available inodes */
+		*p++ = htonl(resp->invarsec);	/* mean unchanged time */
 	}
-
-	return true;
-}
-
-static bool
-svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr,
-			   const struct nfsd3_fsinfores *resp)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, XDR_UNIT * 12);
-	if (!p)
-		return false;
-	*p++ = cpu_to_be32(resp->f_rtmax);
-	*p++ = cpu_to_be32(resp->f_rtpref);
-	*p++ = cpu_to_be32(resp->f_rtmult);
-	*p++ = cpu_to_be32(resp->f_wtmax);
-	*p++ = cpu_to_be32(resp->f_wtpref);
-	*p++ = cpu_to_be32(resp->f_wtmult);
-	*p++ = cpu_to_be32(resp->f_dtpref);
-	p = xdr_encode_hyper(p, resp->f_maxfilesize);
-	p = encode_nfstime3(p, &nfs3svc_time_delta);
-	*p = cpu_to_be32(resp->f_properties);
-
-	return true;
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* FSINFO */
-bool
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return false;
-		if (!svcxdr_encode_fsinfo3resok(xdr, resp))
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return false;
+	*p++ = resp->status;
+	*p++ = xdr_zero;	/* no post_op_attr */
+
+	if (resp->status == 0) {
+		*p++ = htonl(resp->f_rtmax);
+		*p++ = htonl(resp->f_rtpref);
+		*p++ = htonl(resp->f_rtmult);
+		*p++ = htonl(resp->f_wtmax);
+		*p++ = htonl(resp->f_wtpref);
+		*p++ = htonl(resp->f_wtmult);
+		*p++ = htonl(resp->f_dtpref);
+		p = xdr_encode_hyper(p, resp->f_maxfilesize);
+		*p++ = xdr_one;
+		*p++ = xdr_zero;
+		*p++ = htonl(resp->f_properties);
 	}
 
-	return true;
-}
-
-static bool
-svcxdr_encode_pathconf3resok(struct xdr_stream *xdr,
-			     const struct nfsd3_pathconfres *resp)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, XDR_UNIT * 6);
-	if (!p)
-		return false;
-	*p++ = cpu_to_be32(resp->p_link_max);
-	*p++ = cpu_to_be32(resp->p_name_max);
-	p = xdr_encode_bool(p, resp->p_no_trunc);
-	p = xdr_encode_bool(p, resp->p_chown_restricted);
-	p = xdr_encode_bool(p, resp->p_case_insensitive);
-	xdr_encode_bool(p, resp->p_case_preserving);
-
-	return true;
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* PATHCONF */
-bool
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return false;
-		if (!svcxdr_encode_pathconf3resok(xdr, resp))
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return false;
+	*p++ = resp->status;
+	*p++ = xdr_zero;	/* no post_op_attr */
+
+	if (resp->status == 0) {
+		*p++ = htonl(resp->p_link_max);
+		*p++ = htonl(resp->p_name_max);
+		*p++ = htonl(resp->p_no_trunc);
+		*p++ = htonl(resp->p_chown_restricted);
+		*p++ = htonl(resp->p_case_insensitive);
+		*p++ = htonl(resp->p_case_preserving);
 	}
 
-	return true;
+	return xdr_ressize_check(rqstp, p);
 }
 
 /* COMMIT */
-bool
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd3_commitres *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
-			return false;
-		if (!svcxdr_encode_writeverf3(xdr, resp->verf))
-			return false;
-		break;
-	default:
-		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
-			return false;
+	*p++ = resp->status;
+	p = encode_wcc_data(rqstp, p, &resp->fh);
+	/* Write verifier */
+	if (resp->status == 0) {
+		*p++ = resp->verf[0];
+		*p++ = resp->verf[1];
 	}
-
-	return true;
+	return xdr_ressize_check(rqstp, p);
 }
 
 /*
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index bb8e2f6d7d03..71292a0d6f09 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -751,26 +751,57 @@ out_estate:
 	return ret;
 }
 
-__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl,
-			 struct nfsd_attrs *attr)
+__be32
+nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct nfs4_acl *acl)
 {
+	__be32 error;
 	int host_error;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct posix_acl *pacl = NULL, *dpacl = NULL;
 	unsigned int flags = 0;
 
-	if (!acl)
-		return nfs_ok;
+	/* Get inode */
+	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR);
+	if (error)
+		return error;
 
-	if (type == NF4DIR)
+	dentry = fhp->fh_dentry;
+	inode = d_inode(dentry);
+
+	if (S_ISDIR(inode->i_mode))
 		flags = NFS4_ACL_DIR;
 
-	host_error = nfs4_acl_nfsv4_to_posix(acl, &attr->na_pacl,
-					     &attr->na_dpacl, flags);
+	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
 	if (host_error == -EINVAL)
 		return nfserr_attrnotsupp;
+	if (host_error < 0)
+		goto out_nfserr;
+
+	fh_lock(fhp);
+
+	host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
+	if (host_error < 0)
+		goto out_drop_lock;
+
+	if (S_ISDIR(inode->i_mode)) {
+		host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
+	}
+
+out_drop_lock:
+	fh_unlock(fhp);
+
+	posix_acl_release(pacl);
+	posix_acl_release(dpacl);
+out_nfserr:
+	if (host_error == -EOPNOTSUPP)
+		return nfserr_attrnotsupp;
 	else
 		return nfserrno(host_error);
 }
 
+
 static short
 ace2type(struct nfs4_ace *ace)
 {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4eae2c5af2ed..f5b7ad0847f2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -76,17 +76,6 @@ static __be32 *xdr_encode_empty_array(__be32 *p)
  * 1 Protocol"
  */
 
-static void encode_uint32(struct xdr_stream *xdr, u32 n)
-{
-	WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0);
-}
-
-static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
-			   size_t len)
-{
-	WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
-}
-
 /*
  *	nfs_cb_opnum4
  *
@@ -132,7 +121,7 @@ static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
 
 	BUG_ON(length > NFS4_FHSIZE);
 	p = xdr_reserve_space(xdr, 4 + length);
-	xdr_encode_opaque(p, &fh->fh_raw, length);
+	xdr_encode_opaque(p, &fh->fh_base, length);
 }
 
 /*
@@ -339,24 +328,6 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
 	hdr->nops++;
 }
 
-/*
- * CB_RECALLANY4args
- *
- *	struct CB_RECALLANY4args {
- *		uint32_t	craa_objects_to_keep;
- *		bitmap4		craa_type_mask;
- *	};
- */
-static void
-encode_cb_recallany4args(struct xdr_stream *xdr,
-	struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
-{
-	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
-	encode_uint32(xdr, ra->ra_keep);
-	encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval));
-	hdr->nops++;
-}
-
 /*
  * CB_SEQUENCE4args
  *
@@ -511,26 +482,6 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_cb_nops(&hdr);
 }
 
-/*
- * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
- */
-static void
-nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
-		struct xdr_stream *xdr, const void *data)
-{
-	const struct nfsd4_callback *cb = data;
-	struct nfsd4_cb_recall_any *ra;
-	struct nfs4_cb_compound_hdr hdr = {
-		.ident = cb->cb_clp->cl_cb_ident,
-		.minorversion = cb->cb_clp->cl_minorversion,
-	};
-
-	ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
-	encode_cb_compound4args(xdr, &hdr);
-	encode_cb_sequence4args(xdr, cb, &hdr);
-	encode_cb_recallany4args(xdr, &hdr, ra);
-	encode_cb_nops(&hdr);
-}
 
 /*
  * NFSv4.0 and NFSv4.1 XDR decode functions
@@ -569,28 +520,6 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
 }
 
-/*
- * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
- */
-static int
-nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
-				  struct xdr_stream *xdr,
-				  void *data)
-{
-	struct nfsd4_callback *cb = data;
-	struct nfs4_cb_compound_hdr hdr;
-	int status;
-
-	status = decode_cb_compound4res(xdr, &hdr);
-	if (unlikely(status))
-		return status;
-	status = decode_cb_sequence4res(xdr, cb);
-	if (unlikely(status || cb->cb_seq_status))
-		return status;
-	status =  decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
-	return status;
-}
-
 #ifdef CONFIG_NFSD_PNFS
 /*
  * CB_LAYOUTRECALL4args
@@ -750,7 +679,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
  *	case NFS4_OK:
  *		write_response4	coa_resok4;
  *	default:
- *		length4		coa_bytes_copied;
+ *	length4		coa_bytes_copied;
  * };
  * struct CB_OFFLOAD4args {
  *	nfs_fh4		coa_fh;
@@ -759,22 +688,21 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
  * };
  */
 static void encode_offload_info4(struct xdr_stream *xdr,
-				 const struct nfsd4_cb_offload *cbo)
+				 __be32 nfserr,
+				 const struct nfsd4_copy *cp)
 {
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 4);
-	*p = cbo->co_nfserr;
-	switch (cbo->co_nfserr) {
-	case nfs_ok:
+	*p++ = nfserr;
+	if (!nfserr) {
 		p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
 		p = xdr_encode_empty_array(p);
-		p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written);
-		*p++ = cpu_to_be32(cbo->co_res.wr_stable_how);
-		p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data,
+		p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
+		*p++ = cpu_to_be32(cp->cp_res.wr_stable_how);
+		p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data,
 					    NFS4_VERIFIER_SIZE);
-		break;
-	default:
+	} else {
 		p = xdr_reserve_space(xdr, 8);
 		/* We always return success if bytes were written */
 		p = xdr_encode_hyper(p, 0);
@@ -782,16 +710,18 @@ static void encode_offload_info4(struct xdr_stream *xdr,
 }
 
 static void encode_cb_offload4args(struct xdr_stream *xdr,
-				   const struct nfsd4_cb_offload *cbo,
+				   __be32 nfserr,
+				   const struct knfsd_fh *fh,
+				   const struct nfsd4_copy *cp,
 				   struct nfs4_cb_compound_hdr *hdr)
 {
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 4);
-	*p = cpu_to_be32(OP_CB_OFFLOAD);
-	encode_nfs_fh4(xdr, &cbo->co_fh);
-	encode_stateid4(xdr, &cbo->co_res.cb_stateid);
-	encode_offload_info4(xdr, cbo);
+	*p++ = cpu_to_be32(OP_CB_OFFLOAD);
+	encode_nfs_fh4(xdr, fh);
+	encode_stateid4(xdr, &cp->cp_res.cb_stateid);
+	encode_offload_info4(xdr, nfserr, cp);
 
 	hdr->nops++;
 }
@@ -801,8 +731,8 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
 				    const void *data)
 {
 	const struct nfsd4_callback *cb = data;
-	const struct nfsd4_cb_offload *cbo =
-		container_of(cb, struct nfsd4_cb_offload, co_cb);
+	const struct nfsd4_copy *cp =
+		container_of(cb, struct nfsd4_copy, cp_cb);
 	struct nfs4_cb_compound_hdr hdr = {
 		.ident = 0,
 		.minorversion = cb->cb_clp->cl_minorversion,
@@ -810,7 +740,7 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
 
 	encode_cb_compound4args(xdr, &hdr);
 	encode_cb_sequence4args(xdr, cb, &hdr);
-	encode_cb_offload4args(xdr, cbo, &hdr);
+	encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr);
 	encode_cb_nops(&hdr);
 }
 
@@ -854,7 +784,6 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
 #endif
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
 	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
-	PROC(CB_RECALL_ANY,	COMPOUND,	cb_recall_any,	cb_recall_any),
 };
 
 static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
@@ -1012,43 +941,37 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
 	clp->cl_cb_client = client;
 	clp->cl_cb_cred = cred;
-	rcu_read_lock();
-	trace_nfsd_cb_setup(clp, rpc_peeraddr2str(client, RPC_DISPLAY_NETID),
-			    args.authflavor);
-	rcu_read_unlock();
+	trace_nfsd_cb_setup(clp);
 	return 0;
 }
 
-static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate)
-{
-	if (clp->cl_cb_state != newstate) {
-		clp->cl_cb_state = newstate;
-		trace_nfsd_cb_state(clp);
-	}
-}
-
 static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
 {
 	if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
 		return;
-	nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN);
+	clp->cl_cb_state = NFSD4_CB_DOWN;
+	trace_nfsd_cb_state(clp);
 }
 
 static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
 {
 	if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
 		return;
-	nfsd4_mark_cb_state(clp, NFSD4_CB_FAULT);
+	clp->cl_cb_state = NFSD4_CB_FAULT;
+	trace_nfsd_cb_state(clp);
 }
 
 static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
 
+	trace_nfsd_cb_done(clp, task->tk_status);
 	if (task->tk_status)
 		nfsd4_mark_cb_down(clp, task->tk_status);
-	else
-		nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
+	else {
+		clp->cl_cb_state = NFSD4_CB_UP;
+		trace_nfsd_cb_state(clp);
+	}
 }
 
 static void nfsd4_cb_probe_release(void *calldata)
@@ -1072,8 +995,8 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
  */
 void nfsd4_probe_callback(struct nfs4_client *clp)
 {
-	trace_nfsd_cb_probe(clp);
-	nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN);
+	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+	trace_nfsd_cb_state(clp);
 	set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
 	nfsd4_run_cb(&clp->cl_cb_null);
 }
@@ -1086,10 +1009,11 @@ void nfsd4_probe_callback_sync(struct nfs4_client *clp)
 
 void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
 {
-	nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN);
+	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
 	spin_lock(&clp->cl_lock);
 	memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
 	spin_unlock(&clp->cl_lock);
+	trace_nfsd_cb_state(clp);
 }
 
 /*
@@ -1246,6 +1170,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 	struct nfsd4_callback *cb = calldata;
 	struct nfs4_client *clp = cb->cb_clp;
 
+	trace_nfsd_cb_done(clp, task->tk_status);
+
 	if (!nfsd4_cb_sequence_done(task, cb))
 		return;
 
@@ -1305,9 +1231,6 @@ void nfsd4_destroy_callback_queue(void)
 /* must be called under the state lock */
 void nfsd4_shutdown_callback(struct nfs4_client *clp)
 {
-	if (clp->cl_cb_state != NFSD4_CB_UNKNOWN)
-		trace_nfsd_cb_shutdown(clp);
-
 	set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
 	/*
 	 * Note this won't actually result in a null callback;
@@ -1353,6 +1276,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
 	 * kill the old client:
 	 */
 	if (clp->cl_cb_client) {
+		trace_nfsd_cb_shutdown(clp);
 		rpc_shutdown_client(clp->cl_cb_client);
 		clp->cl_cb_client = NULL;
 		put_cred(clp->cl_cb_cred);
@@ -1398,6 +1322,8 @@ nfsd4_run_cb_work(struct work_struct *work)
 	struct rpc_clnt *clnt;
 	int flags;
 
+	trace_nfsd_cb_work(clp, cb->cb_msg.rpc_proc->p_name);
+
 	if (cb->cb_need_restart) {
 		cb->cb_need_restart = false;
 	} else {
@@ -1419,7 +1345,7 @@ nfsd4_run_cb_work(struct work_struct *work)
 	 * Don't send probe messages for 4.1 or later.
 	 */
 	if (!cb->cb_ops && clp->cl_minorversion) {
-		nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
+		clp->cl_cb_state = NFSD4_CB_UP;
 		nfsd41_destroy_cb(cb);
 		return;
 	}
@@ -1445,21 +1371,11 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 	cb->cb_holds_slot = false;
 }
 
-/**
- * nfsd4_run_cb - queue up a callback job to run
- * @cb: callback to queue
- *
- * Kick off a callback to do its thing. Returns false if it was already
- * on a queue, true otherwise.
- */
-bool nfsd4_run_cb(struct nfsd4_callback *cb)
+void nfsd4_run_cb(struct nfsd4_callback *cb)
 {
 	struct nfs4_client *clp = cb->cb_clp;
-	bool queued;
 
 	nfsd41_cb_inflight_begin(clp);
-	queued = nfsd4_queue_cb(cb);
-	if (!queued)
+	if (!nfsd4_queue_cb(cb))
 		nfsd41_cb_inflight_end(clp);
-	return queued;
 }
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5e9809aff37e..f92161ce1f97 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -41,7 +41,6 @@
 #include "idmap.h"
 #include "nfsd.h"
 #include "netns.h"
-#include "vfs.h"
 
 /*
  * Turn off idmapping when using AUTH_SYS.
@@ -83,8 +82,8 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
 	new->id = itm->id;
 	new->type = itm->type;
 
-	strscpy(new->name, itm->name, sizeof(new->name));
-	strscpy(new->authname, itm->authname, sizeof(new->authname));
+	strlcpy(new->name, itm->name, sizeof(new->name));
+	strlcpy(new->authname, itm->authname, sizeof(new->authname));
 }
 
 static void
@@ -549,7 +548,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
 		return nfserr_badowner;
 	memcpy(key.name, name, namelen);
 	key.name[namelen] = '\0';
-	strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+	strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
 	ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item);
 	if (ret == -ENOENT)
 		return nfserr_badowner;
@@ -585,7 +584,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr,
 	int ret;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
-	strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+	strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
 	ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
 	if (ret == -ENOENT)
 		return encode_ascii_id(xdr, id);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index e4e23b2a3e65..2673019d30ec 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -421,7 +421,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
 	new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
 	if (!new)
 		return nfserr_jukebox;
-	memcpy(&new->lo_seg, seg, sizeof(new->lo_seg));
+	memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
 	new->lo_state = ls;
 
 	spin_lock(&fp->fi_lock);
@@ -657,7 +657,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
 	ktime_t now, cutoff;
 	const struct nfsd4_layout_ops *ops;
 
-	trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
+
 	switch (task->tk_status) {
 	case 0:
 	case -NFS4ERR_DELAY:
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2c0de247083a..e84996c3867c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,9 +37,6 @@
 #include <linux/falloc.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
-#include <linux/namei.h>
-#include <linux/freezer.h>
-
 #include <linux/sunrpc/addr.h>
 #include <linux/nfs_ssc.h>
 
@@ -53,16 +50,34 @@
 #include "pnfs.h"
 #include "trace.h"
 
-static bool inter_copy_offload_enable;
-module_param(inter_copy_offload_enable, bool, 0644);
-MODULE_PARM_DESC(inter_copy_offload_enable,
-		 "Enable inter server to server copy offload. Default: false");
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+#include <linux/security.h>
 
-#ifdef CONFIG_NFSD_V4_2_INTER_SSC
-static int nfsd4_ssc_umount_timeout = 900000;		/* default to 15 mins */
-module_param(nfsd4_ssc_umount_timeout, int, 0644);
-MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
-		"idle msecs before unmount export from source server");
+static inline void
+nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
+{
+	struct inode *inode = d_inode(resfh->fh_dentry);
+	int status;
+
+	inode_lock(inode);
+	status = security_inode_setsecctx(resfh->fh_dentry,
+		label->data, label->len);
+	inode_unlock(inode);
+
+	if (status)
+		/*
+		 * XXX: We should really fail the whole open, but we may
+		 * already have created a new file, so it may be too
+		 * late.  For now this seems the least of evils:
+		 */
+		bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+
+	return;
+}
+#else
+static inline void
+nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
+{ }
 #endif
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
@@ -129,6 +144,26 @@ is_create_with_attrs(struct nfsd4_open *open)
 		    || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
 }
 
+/*
+ * if error occurs when setting the acl, just clear the acl bit
+ * in the returned attr bitmap.
+ */
+static void
+do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct nfs4_acl *acl, u32 *bmval)
+{
+	__be32 status;
+
+	status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
+	if (status)
+		/*
+		 * We should probably fail the whole open at this point,
+		 * but we've already created the file, so it's too late;
+		 * So this seems the least of evils:
+		 */
+		bmval[0] &= ~FATTR4_WORD0_ACL;
+}
+
 static inline void
 fh_dup2(struct svc_fh *dst, struct svc_fh *src)
 {
@@ -142,6 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
 static __be32
 do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode)
 {
+	__be32 status;
 
 	if (open->op_truncate &&
 		!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
@@ -156,7 +192,9 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 	if (open->op_share_deny & NFS4_SHARE_DENY_READ)
 		accmode |= NFSD_MAY_WRITE;
 
-	return fh_verify(rqstp, current_fh, S_IFREG, accmode);
+	status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
+
+	return status;
 }
 
 static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
@@ -185,202 +223,6 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate
 			&resfh->fh_handle);
 }
 
-static inline bool nfsd4_create_is_exclusive(int createmode)
-{
-	return createmode == NFS4_CREATE_EXCLUSIVE ||
-		createmode == NFS4_CREATE_EXCLUSIVE4_1;
-}
-
-static __be32
-nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
-		 struct nfsd4_open *open)
-{
-	struct file *filp;
-	struct path path;
-	int oflags;
-
-	oflags = O_CREAT | O_LARGEFILE;
-	switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) {
-	case NFS4_SHARE_ACCESS_WRITE:
-		oflags |= O_WRONLY;
-		break;
-	case NFS4_SHARE_ACCESS_BOTH:
-		oflags |= O_RDWR;
-		break;
-	default:
-		oflags |= O_RDONLY;
-	}
-
-	path.mnt = fhp->fh_export->ex_path.mnt;
-	path.dentry = child;
-	filp = dentry_create(&path, oflags, open->op_iattr.ia_mode,
-			     current_cred());
-	if (IS_ERR(filp))
-		return nfserrno(PTR_ERR(filp));
-
-	open->op_filp = filp;
-	return nfs_ok;
-}
-
-/*
- * Implement NFSv4's unchecked, guarded, and exclusive create
- * semantics for regular files. Open state for this new file is
- * subsequently fabricated in nfsd4_process_open2().
- *
- * Upon return, caller must release @fhp and @resfhp.
- */
-static __be32
-nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		  struct svc_fh *resfhp, struct nfsd4_open *open)
-{
-	struct iattr *iap = &open->op_iattr;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= iap,
-		.na_seclabel	= &open->op_label,
-	};
-	struct dentry *parent, *child;
-	__u32 v_mtime, v_atime;
-	struct inode *inode;
-	__be32 status;
-	int host_err;
-
-	if (isdotent(open->op_fname, open->op_fnamelen))
-		return nfserr_exist;
-	if (!(iap->ia_valid & ATTR_MODE))
-		iap->ia_mode = 0;
-
-	status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
-	if (status != nfs_ok)
-		return status;
-	parent = fhp->fh_dentry;
-	inode = d_inode(parent);
-
-	host_err = fh_want_write(fhp);
-	if (host_err)
-		return nfserrno(host_err);
-
-	if (is_create_with_attrs(open))
-		nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs);
-
-	inode_lock_nested(inode, I_MUTEX_PARENT);
-
-	child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
-	if (IS_ERR(child)) {
-		status = nfserrno(PTR_ERR(child));
-		goto out;
-	}
-
-	if (d_really_is_negative(child)) {
-		status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
-		if (status != nfs_ok)
-			goto out;
-	}
-
-	status = fh_compose(resfhp, fhp->fh_export, child, fhp);
-	if (status != nfs_ok)
-		goto out;
-
-	v_mtime = 0;
-	v_atime = 0;
-	if (nfsd4_create_is_exclusive(open->op_createmode)) {
-		u32 *verifier = (u32 *)open->op_verf.data;
-
-		/*
-		 * Solaris 7 gets confused (bugid 4218508) if these have
-		 * the high bit set, as do xfs filesystems without the
-		 * "bigtime" feature. So just clear the high bits. If this
-		 * is ever changed to use different attrs for storing the
-		 * verifier, then do_open_lookup() will also need to be
-		 * fixed accordingly.
-		 */
-		v_mtime = verifier[0] & 0x7fffffff;
-		v_atime = verifier[1] & 0x7fffffff;
-	}
-
-	if (d_really_is_positive(child)) {
-		status = nfs_ok;
-
-		/* NFSv4 protocol requires change attributes even though
-		 * no change happened.
-		 */
-		fh_fill_both_attrs(fhp);
-
-		switch (open->op_createmode) {
-		case NFS4_CREATE_UNCHECKED:
-			if (!d_is_reg(child))
-				break;
-
-			/*
-			 * In NFSv4, we don't want to truncate the file
-			 * now. This would be wrong if the OPEN fails for
-			 * some other reason. Furthermore, if the size is
-			 * nonzero, we should ignore it according to spec!
-			 */
-			open->op_truncate = (iap->ia_valid & ATTR_SIZE) &&
-						!iap->ia_size;
-			break;
-		case NFS4_CREATE_GUARDED:
-			status = nfserr_exist;
-			break;
-		case NFS4_CREATE_EXCLUSIVE:
-			if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
-			    d_inode(child)->i_atime.tv_sec == v_atime &&
-			    d_inode(child)->i_size == 0) {
-				open->op_created = true;
-				break;		/* subtle */
-			}
-			status = nfserr_exist;
-			break;
-		case NFS4_CREATE_EXCLUSIVE4_1:
-			if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
-			    d_inode(child)->i_atime.tv_sec == v_atime &&
-			    d_inode(child)->i_size == 0) {
-				open->op_created = true;
-				goto set_attr;	/* subtle */
-			}
-			status = nfserr_exist;
-		}
-		goto out;
-	}
-
-	if (!IS_POSIXACL(inode))
-		iap->ia_mode &= ~current_umask();
-
-	fh_fill_pre_attrs(fhp);
-	status = nfsd4_vfs_create(fhp, child, open);
-	if (status != nfs_ok)
-		goto out;
-	open->op_created = true;
-	fh_fill_post_attrs(fhp);
-
-	/* A newly created file already has a file size of zero. */
-	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
-		iap->ia_valid &= ~ATTR_SIZE;
-	if (nfsd4_create_is_exclusive(open->op_createmode)) {
-		iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
-				ATTR_MTIME_SET|ATTR_ATIME_SET;
-		iap->ia_mtime.tv_sec = v_mtime;
-		iap->ia_atime.tv_sec = v_atime;
-		iap->ia_mtime.tv_nsec = 0;
-		iap->ia_atime.tv_nsec = 0;
-	}
-
-set_attr:
-	status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
-
-	if (attrs.na_labelerr)
-		open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
-	if (attrs.na_aclerr)
-		open->op_bmval[0] &= ~FATTR4_WORD0_ACL;
-out:
-	inode_unlock(inode);
-	nfsd_attrs_free(&attrs);
-	if (child && !IS_ERR(child))
-		dput(child);
-	fh_drop_write(fhp);
-	return status;
-}
-
 static __be32
 do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
 {
@@ -410,33 +252,47 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
 		 * yes          | yes    | GUARDED4        | GUARDED4
 		 */
 
+		/*
+		 * Note: create modes (UNCHECKED,GUARDED...) are the same
+		 * in NFSv4 as in v3 except EXCLUSIVE4_1.
+		 */
 		current->fs->umask = open->op_umask;
-		status = nfsd4_create_file(rqstp, current_fh, *resfh, open);
+		status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
+					open->op_fname.len, &open->op_iattr,
+					*resfh, open->op_createmode,
+					(u32 *)open->op_verf.data,
+					&open->op_truncate, &open->op_created);
 		current->fs->umask = 0;
 
+		if (!status && open->op_label.len)
+			nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
+
 		/*
 		 * Following rfc 3530 14.2.16, and rfc 5661 18.16.4
 		 * use the returned bitmask to indicate which attributes
 		 * we used to store the verifier:
 		 */
-		if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0)
+		if (nfsd_create_is_exclusive(open->op_createmode) && status == 0)
 			open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
 						FATTR4_WORD1_TIME_MODIFY);
-	} else {
+	} else
+		/*
+		 * Note this may exit with the parent still locked.
+		 * We will hold the lock until nfsd4_open's final
+		 * lookup, to prevent renames or unlinks until we've had
+		 * a chance to an acquire a delegation if appropriate.
+		 */
 		status = nfsd_lookup(rqstp, current_fh,
-				     open->op_fname, open->op_fnamelen, *resfh);
-		if (!status)
-			/* NFSv4 protocol requires change attributes even though
-			 * no change happened.
-			 */
-			fh_fill_both_attrs(current_fh);
-	}
+				     open->op_fname.data, open->op_fname.len, *resfh);
 	if (status)
 		goto out;
 	status = nfsd_check_obj_isreg(*resfh);
 	if (status)
 		goto out;
 
+	if (is_create_with_attrs(open) && open->op_acl != NULL)
+		do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval);
+
 	nfsd4_set_open_owner_reply_cache(cstate, open, *resfh);
 	accmode = NFSD_MAY_NOP;
 	if (open->op_created ||
@@ -452,6 +308,7 @@ static __be32
 do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
 {
 	struct svc_fh *current_fh = &cstate->current_fh;
+	__be32 status;
 	int accmode = 0;
 
 	/* We don't know the target directory, and therefore can not
@@ -476,7 +333,9 @@ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, str
 	if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH)
 		accmode = NFSD_MAY_OWNER_OVERRIDE;
 
-	return do_open_permission(rqstp, current_fh, open, accmode);
+	status = do_open_permission(rqstp, current_fh, open, accmode);
+
+	return status;
 }
 
 static void
@@ -501,12 +360,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	bool reclaim = false;
 
 	dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
-		(int)open->op_fnamelen, open->op_fname,
+		(int)open->op_fname.len, open->op_fname.data,
 		open->op_openowner);
 
-	open->op_filp = NULL;
-	open->op_rqstp = rqstp;
-
 	/* This check required by spec. */
 	if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
 		return nfserr_inval;
@@ -517,7 +373,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * Before RECLAIM_COMPLETE done, server should deny new lock
 	 */
 	if (nfsd4_has_session(cstate) &&
-	    !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags) &&
+	    !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
+		      &cstate->session->se_client->cl_flags) &&
 	    open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
 		return nfserr_grace;
 
@@ -559,46 +416,51 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		goto out;
 
 	switch (open->op_claim_type) {
-	case NFS4_OPEN_CLAIM_DELEGATE_CUR:
-	case NFS4_OPEN_CLAIM_NULL:
-		status = do_open_lookup(rqstp, cstate, open, &resfh);
-		if (status)
+		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+		case NFS4_OPEN_CLAIM_NULL:
+			status = do_open_lookup(rqstp, cstate, open, &resfh);
+			if (status)
+				goto out;
+			break;
+		case NFS4_OPEN_CLAIM_PREVIOUS:
+			status = nfs4_check_open_reclaim(&open->op_clientid,
+							 cstate, nn);
+			if (status)
+				goto out;
+			open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+			reclaim = true;
+			fallthrough;
+		case NFS4_OPEN_CLAIM_FH:
+		case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+			status = do_open_fhandle(rqstp, cstate, open);
+			if (status)
+				goto out;
+			resfh = &cstate->current_fh;
+			break;
+		case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+             	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+			dprintk("NFSD: unsupported OPEN claim type %d\n",
+				open->op_claim_type);
+			status = nfserr_notsupp;
 			goto out;
-		break;
-	case NFS4_OPEN_CLAIM_PREVIOUS:
-		status = nfs4_check_open_reclaim(cstate->clp);
-		if (status)
+		default:
+			dprintk("NFSD: Invalid OPEN claim type %d\n",
+				open->op_claim_type);
+			status = nfserr_inval;
 			goto out;
-		open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
-		reclaim = true;
-		fallthrough;
-	case NFS4_OPEN_CLAIM_FH:
-	case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
-		status = do_open_fhandle(rqstp, cstate, open);
-		if (status)
-			goto out;
-		resfh = &cstate->current_fh;
-		break;
-	case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
-	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
-		status = nfserr_notsupp;
-		goto out;
-	default:
-		status = nfserr_inval;
-		goto out;
 	}
-
+	/*
+	 * nfsd4_process_open2() does the actual opening of the file.  If
+	 * successful, it (1) truncates the file if open->op_truncate was
+	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+	 */
 	status = nfsd4_process_open2(rqstp, resfh, open);
-	if (status && open->op_created)
-		pr_warn("nfsd4_process_open2 failed to open newly-created file: status=%u\n",
-			be32_to_cpu(status));
+	WARN(status && open->op_created,
+	     "nfsd4_process_open2 failed to open newly-created file! status=%u\n",
+	     be32_to_cpu(status));
 	if (reclaim && !status)
 		nn->somebody_reclaimed = true;
 out:
-	if (open->op_filp) {
-		fput(open->op_filp);
-		open->op_filp = NULL;
-	}
 	if (resfh && resfh != &cstate->current_fh) {
 		fh_dup2(&cstate->current_fh, resfh);
 		fh_put(resfh);
@@ -647,7 +509,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	fh_put(&cstate->current_fh);
 	cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
-	memcpy(&cstate->current_fh.fh_handle.fh_raw, putfh->pf_fhval,
+	memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
 	       putfh->pf_fhlen);
 	ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
 #ifdef CONFIG_NFSD_V4_2_INTER_SSC
@@ -663,9 +525,11 @@ static __be32
 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		union nfsd4_op_u *u)
 {
-	fh_put(&cstate->current_fh);
+	__be32 status;
 
-	return exp_pseudoroot(rqstp, &cstate->current_fh);
+	fh_put(&cstate->current_fh);
+	status = exp_pseudoroot(rqstp, &cstate->current_fh);
+	return status;
 }
 
 static __be32
@@ -724,7 +588,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 
 	BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
 
-	nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
+	nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
 }
 
 static __be32
@@ -732,19 +596,10 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     union nfsd4_op_u *u)
 {
 	struct nfsd4_commit *commit = &u->commit;
-	struct nfsd_file *nf;
-	__be32 status;
 
-	status = nfsd_file_acquire(rqstp, &cstate->current_fh, NFSD_MAY_WRITE |
-				   NFSD_MAY_NOT_BREAK_LEASE, &nf);
-	if (status != nfs_ok)
-		return status;
-
-	status = nfsd_commit(rqstp, &cstate->current_fh, nf, commit->co_offset,
+	return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
 			     commit->co_count,
 			     (__be32 *)commit->co_verf.data);
-	nfsd_file_put(nf);
-	return status;
 }
 
 static __be32
@@ -752,10 +607,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     union nfsd4_op_u *u)
 {
 	struct nfsd4_create *create = &u->create;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &create->cr_iattr,
-		.na_seclabel	= &create->cr_label,
-	};
 	struct svc_fh resfh;
 	__be32 status;
 	dev_t rdev;
@@ -771,13 +622,12 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		return status;
 
-	status = nfsd4_acl_to_attr(create->cr_type, create->cr_acl, &attrs);
 	current->fs->umask = create->cr_umask;
 	switch (create->cr_type) {
 	case NF4LNK:
 		status = nfsd_symlink(rqstp, &cstate->current_fh,
 				      create->cr_name, create->cr_namelen,
-				      create->cr_data, &attrs, &resfh);
+				      create->cr_data, &resfh);
 		break;
 
 	case NF4BLK:
@@ -788,7 +638,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out_umask;
 		status = nfsd_create(rqstp, &cstate->current_fh,
 				     create->cr_name, create->cr_namelen,
-				     &attrs, S_IFBLK, rdev, &resfh);
+				     &create->cr_iattr, S_IFBLK, rdev, &resfh);
 		break;
 
 	case NF4CHR:
@@ -799,26 +649,26 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out_umask;
 		status = nfsd_create(rqstp, &cstate->current_fh,
 				     create->cr_name, create->cr_namelen,
-				     &attrs, S_IFCHR, rdev, &resfh);
+				     &create->cr_iattr,S_IFCHR, rdev, &resfh);
 		break;
 
 	case NF4SOCK:
 		status = nfsd_create(rqstp, &cstate->current_fh,
 				     create->cr_name, create->cr_namelen,
-				     &attrs, S_IFSOCK, 0, &resfh);
+				     &create->cr_iattr, S_IFSOCK, 0, &resfh);
 		break;
 
 	case NF4FIFO:
 		status = nfsd_create(rqstp, &cstate->current_fh,
 				     create->cr_name, create->cr_namelen,
-				     &attrs, S_IFIFO, 0, &resfh);
+				     &create->cr_iattr, S_IFIFO, 0, &resfh);
 		break;
 
 	case NF4DIR:
 		create->cr_iattr.ia_valid &= ~ATTR_SIZE;
 		status = nfsd_create(rqstp, &cstate->current_fh,
 				     create->cr_name, create->cr_namelen,
-				     &attrs, S_IFDIR, 0, &resfh);
+				     &create->cr_iattr, S_IFDIR, 0, &resfh);
 		break;
 
 	default:
@@ -828,17 +678,20 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out;
 
-	if (attrs.na_labelerr)
-		create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
-	if (attrs.na_aclerr)
-		create->cr_bmval[0] &= ~FATTR4_WORD0_ACL;
+	if (create->cr_label.len)
+		nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval);
+
+	if (create->cr_acl != NULL)
+		do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
+				create->cr_bmval);
+
+	fh_unlock(&cstate->current_fh);
 	set_change_info(&create->cr_cinfo, &cstate->current_fh);
 	fh_dup2(&cstate->current_fh, &resfh);
 out:
 	fh_put(&resfh);
 out_umask:
 	current->fs->umask = 0;
-	nfsd_attrs_free(&attrs);
 	return status;
 }
 
@@ -919,16 +772,12 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	__be32 status;
 
 	read->rd_nf = NULL;
+	if (read->rd_offset >= OFFSET_MAX)
+		return nfserr_inval;
 
 	trace_nfsd_read_start(rqstp, &cstate->current_fh,
 			      read->rd_offset, read->rd_length);
 
-	read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
-	if (read->rd_offset > (u64)OFFSET_MAX)
-		read->rd_offset = (u64)OFFSET_MAX;
-	if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
-		read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
-
 	/*
 	 * If we do a zero copy read, then a client will see read data
 	 * that reflects the state of the file *after* performing the
@@ -944,7 +793,12 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					&read->rd_stateid, RD_STATE,
 					&read->rd_nf, NULL);
-
+	if (status) {
+		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
+		goto out;
+	}
+	status = nfs_ok;
+out:
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = &cstate->current_fh;
 	return status;
@@ -1006,8 +860,10 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		return nfserr_grace;
 	status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
 			     remove->rm_name, remove->rm_namelen);
-	if (!status)
+	if (!status) {
+		fh_unlock(&cstate->current_fh);
 		set_change_info(&remove->rm_cinfo, &cstate->current_fh);
+	}
 	return status;
 }
 
@@ -1047,6 +903,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 				    &exp, &dentry);
 	if (err)
 		return err;
+	fh_unlock(&cstate->current_fh);
 	if (d_really_is_negative(dentry)) {
 		exp_put(exp);
 		err = nfserr_noent;
@@ -1101,21 +958,17 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	      union nfsd4_op_u *u)
 {
 	struct nfsd4_setattr *setattr = &u->setattr;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &setattr->sa_iattr,
-		.na_seclabel	= &setattr->sa_label,
-	};
-	struct inode *inode;
 	__be32 status = nfs_ok;
-	bool save_no_wcc;
 	int err;
 
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
 		status = nfs4_preprocess_stateid_op(rqstp, cstate,
 				&cstate->current_fh, &setattr->sa_stateid,
 				WR_STATE, NULL, NULL);
-		if (status)
+		if (status) {
+			dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
 			return status;
+		}
 	}
 	err = fh_want_write(&cstate->current_fh);
 	if (err)
@@ -1127,23 +980,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out;
 
-	inode = cstate->current_fh.fh_dentry->d_inode;
-	status = nfsd4_acl_to_attr(S_ISDIR(inode->i_mode) ? NF4DIR : NF4REG,
-				   setattr->sa_acl, &attrs);
-
+	if (setattr->sa_acl != NULL)
+		status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
+					    setattr->sa_acl);
 	if (status)
 		goto out;
-	save_no_wcc = cstate->current_fh.fh_no_wcc;
-	cstate->current_fh.fh_no_wcc = true;
-	status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
+	if (setattr->sa_label.len)
+		status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh,
+				&setattr->sa_label);
+	if (status)
+		goto out;
+	status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
 				0, (time64_t)0);
-	cstate->current_fh.fh_no_wcc = save_no_wcc;
-	if (!status)
-		status = nfserrno(attrs.na_labelerr);
-	if (!status)
-		status = nfserrno(attrs.na_aclerr);
 out:
-	nfsd_attrs_free(&attrs);
 	fh_drop_write(&cstate->current_fh);
 	return status;
 }
@@ -1168,12 +1017,15 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			       write->wr_offset, cnt);
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 						stateid, WR_STATE, &nf, NULL);
-	if (status)
+	if (status) {
+		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
 		return status;
+	}
 
 	write->wr_how_written = write->wr_stable_how;
 
-	nvecs = svc_fill_write_vector(rqstp, &write->wr_payload);
+	nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
+				      &write->wr_head, write->wr_buflen);
 	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
 	status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
@@ -1200,13 +1052,17 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
 					    src_stateid, RD_STATE, src, NULL);
-	if (status)
+	if (status) {
+		dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
 		goto out;
+	}
 
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    dst_stateid, WR_STATE, dst, NULL);
-	if (status)
+	if (status) {
+		dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
 		goto out_put_src;
+	}
 
 	/* fix up for NFS-specific error code */
 	if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
@@ -1239,7 +1095,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out;
 
-	status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
+	status = nfsd4_clone_file_range(src, clone->cl_src_pos,
 			dst, clone->cl_dst_pos, clone->cl_count,
 			EX_ISSYNC(cstate->current_fh.fh_export));
 
@@ -1249,17 +1105,30 @@ out:
 	return status;
 }
 
-static void nfs4_put_copy(struct nfsd4_copy *copy)
+void nfs4_put_copy(struct nfsd4_copy *copy)
 {
 	if (!refcount_dec_and_test(&copy->refcount))
 		return;
-	kfree(copy->cp_src);
 	kfree(copy);
 }
 
+static bool
+check_and_set_stop_copy(struct nfsd4_copy *copy)
+{
+	bool value;
+
+	spin_lock(&copy->cp_clp->async_lock);
+	value = copy->stopped;
+	if (!copy->stopped)
+		copy->stopped = true;
+	spin_unlock(&copy->cp_clp->async_lock);
+	return value;
+}
+
 static void nfsd4_stop_copy(struct nfsd4_copy *copy)
 {
-	if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags))
+	/* only 1 thread should stop the copy */
+	if (!check_and_set_stop_copy(copy))
 		kthread_stop(copy->copy_task);
 	nfs4_put_copy(copy);
 }
@@ -1296,88 +1165,12 @@ extern void nfs_sb_deactive(struct super_block *sb);
 
 #define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys"
 
-/*
- * setup a work entry in the ssc delayed unmount list.
- */
-static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
-				  struct nfsd4_ssc_umount_item **nsui)
-{
-	struct nfsd4_ssc_umount_item *ni = NULL;
-	struct nfsd4_ssc_umount_item *work = NULL;
-	struct nfsd4_ssc_umount_item *tmp;
-	DEFINE_WAIT(wait);
-	__be32 status = 0;
-
-	*nsui = NULL;
-	work = kzalloc(sizeof(*work), GFP_KERNEL);
-try_again:
-	spin_lock(&nn->nfsd_ssc_lock);
-	list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
-		if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
-			continue;
-		/* found a match */
-		if (ni->nsui_busy) {
-			/*  wait - and try again */
-			prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE);
-			spin_unlock(&nn->nfsd_ssc_lock);
-
-			/* allow 20secs for mount/unmount for now - revisit */
-			if (kthread_should_stop() ||
-					(freezable_schedule_timeout(20*HZ) == 0)) {
-				finish_wait(&nn->nfsd_ssc_waitq, &wait);
-				kfree(work);
-				return nfserr_eagain;
-			}
-			finish_wait(&nn->nfsd_ssc_waitq, &wait);
-			goto try_again;
-		}
-		*nsui = ni;
-		refcount_inc(&ni->nsui_refcnt);
-		spin_unlock(&nn->nfsd_ssc_lock);
-		kfree(work);
-
-		/* return vfsmount in (*nsui)->nsui_vfsmount */
-		return 0;
-	}
-	if (work) {
-		strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1);
-		refcount_set(&work->nsui_refcnt, 2);
-		work->nsui_busy = true;
-		list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
-		*nsui = work;
-	} else
-		status = nfserr_resource;
-	spin_unlock(&nn->nfsd_ssc_lock);
-	return status;
-}
-
-static void nfsd4_ssc_update_dul(struct nfsd_net *nn,
-				 struct nfsd4_ssc_umount_item *nsui,
-				 struct vfsmount *ss_mnt)
-{
-	spin_lock(&nn->nfsd_ssc_lock);
-	nsui->nsui_vfsmount = ss_mnt;
-	nsui->nsui_busy = false;
-	wake_up_all(&nn->nfsd_ssc_waitq);
-	spin_unlock(&nn->nfsd_ssc_lock);
-}
-
-static void nfsd4_ssc_cancel_dul(struct nfsd_net *nn,
-				 struct nfsd4_ssc_umount_item *nsui)
-{
-	spin_lock(&nn->nfsd_ssc_lock);
-	list_del(&nsui->nsui_list);
-	wake_up_all(&nn->nfsd_ssc_waitq);
-	spin_unlock(&nn->nfsd_ssc_lock);
-	kfree(nsui);
-}
-
 /*
  * Support one copy source server for now.
  */
 static __be32
 nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
-		       struct nfsd4_ssc_umount_item **nsui)
+		       struct vfsmount **mount)
 {
 	struct file_system_type *type;
 	struct vfsmount *ss_mnt;
@@ -1388,14 +1181,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
 	char *ipaddr, *dev_name, *raw_data;
 	int len, raw_len;
 	__be32 status = nfserr_inval;
-	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	naddr = &nss->u.nl4_addr;
 	tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
 					 naddr->addr_len,
 					 (struct sockaddr *)&tmp_addr,
 					 sizeof(tmp_addr));
-	*nsui = NULL;
 	if (tmp_addrlen == 0)
 		goto out_err;
 
@@ -1438,23 +1229,14 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
 		goto out_free_rawdata;
 	snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
 
-	status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
-	if (status)
-		goto out_free_devname;
-	if ((*nsui)->nsui_vfsmount)
-		goto out_done;
-
 	/* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
 	ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
 	module_put(type->owner);
-	if (IS_ERR(ss_mnt)) {
-		status = nfserr_nodev;
-		nfsd4_ssc_cancel_dul(nn, *nsui);
+	if (IS_ERR(ss_mnt))
 		goto out_free_devname;
-	}
-	nfsd4_ssc_update_dul(nn, *nsui, ss_mnt);
-out_done:
+
 	status = 0;
+	*mount = ss_mnt;
 
 out_free_devname:
 	kfree(dev_name);
@@ -1478,7 +1260,7 @@ out_err:
 static __be32
 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
 		      struct nfsd4_compound_state *cstate,
-		      struct nfsd4_copy *copy)
+		      struct nfsd4_copy *copy, struct vfsmount **mount)
 {
 	struct svc_fh *s_fh = NULL;
 	stateid_t *s_stid = &copy->cp_src_stateid;
@@ -1491,14 +1273,14 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
 	if (status)
 		goto out;
 
-	status = nfsd4_interssc_connect(copy->cp_src, rqstp, &copy->ss_nsui);
+	status = nfsd4_interssc_connect(&copy->cp_src, rqstp, mount);
 	if (status)
 		goto out;
 
 	s_fh = &cstate->save_fh;
 
 	copy->c_fh.size = s_fh->fh_handle.fh_size;
-	memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_raw, copy->c_fh.size);
+	memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size);
 	copy->stateid.seqid = cpu_to_be32(s_stid->si_generation);
 	memcpy(copy->stateid.other, (void *)&s_stid->si_opaque,
 	       sizeof(stateid_opaque_t));
@@ -1509,26 +1291,13 @@ out:
 }
 
 static void
-nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
+nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
 			struct nfsd_file *dst)
 {
-	struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
-	long timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
-
-	nfs42_ssc_close(filp);
-	fput(filp);
-
-	spin_lock(&nn->nfsd_ssc_lock);
-	list_del(&nsui->nsui_list);
-	/*
-	 * vfsmount can be shared by multiple exports,
-	 * decrement refcnt. If the count drops to 1 it
-	 * will be unmounted when nsui_expire expires.
-	 */
-	refcount_dec(&nsui->nsui_refcnt);
-	nsui->nsui_expire = jiffies + timeout;
-	list_add_tail(&nsui->nsui_list, &nn->nfsd_ssc_mount_list);
-	spin_unlock(&nn->nfsd_ssc_lock);
+	nfs42_ssc_close(src->nf_file);
+	fput(src->nf_file);
+	nfsd_file_put(dst);
+	mntput(ss_mnt);
 }
 
 #else /* CONFIG_NFSD_V4_2_INTER_SSC */
@@ -1536,13 +1305,15 @@ nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
 static __be32
 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
 		      struct nfsd4_compound_state *cstate,
-		      struct nfsd4_copy *copy)
+		      struct nfsd4_copy *copy,
+		      struct vfsmount **mount)
 {
+	*mount = NULL;
 	return nfserr_inval;
 }
 
 static void
-nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
+nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
 			struct nfsd_file *dst)
 {
 }
@@ -1565,21 +1336,23 @@ nfsd4_setup_intra_ssc(struct svc_rqst *rqstp,
 				 &copy->nf_dst);
 }
 
+static void
+nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst)
+{
+	nfsd_file_put(src);
+	nfsd_file_put(dst);
+}
+
 static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
 {
-	struct nfsd4_cb_offload *cbo =
-		container_of(cb, struct nfsd4_cb_offload, co_cb);
+	struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
 
-	kfree(cbo);
+	nfs4_put_copy(copy);
 }
 
 static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
 				 struct rpc_task *task)
 {
-	struct nfsd4_cb_offload *cbo =
-		container_of(cb, struct nfsd4_cb_offload, co_cb);
-
-	trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task);
 	return 1;
 }
 
@@ -1590,28 +1363,20 @@ static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = {
 
 static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
 {
-	copy->cp_res.wr_stable_how =
-		test_bit(NFSD4_COPY_F_COMMITTED, &copy->cp_flags) ?
-			NFS_FILE_SYNC : NFS_UNSTABLE;
-	nfsd4_copy_set_sync(copy, sync);
+	copy->cp_res.wr_stable_how = NFS_UNSTABLE;
+	copy->cp_synchronous = sync;
 	gen_boot_verifier(&copy->cp_res.wr_verifier, copy->cp_clp->net);
 }
 
-static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
-				     struct file *dst,
-				     struct file *src)
+static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
 {
-	errseq_t since;
+	struct file *dst = copy->nf_dst->nf_file;
+	struct file *src = copy->nf_src->nf_file;
 	ssize_t bytes_copied = 0;
-	u64 bytes_total = copy->cp_count;
+	size_t bytes_total = copy->cp_count;
 	u64 src_pos = copy->cp_src_pos;
 	u64 dst_pos = copy->cp_dst_pos;
-	int status;
-	loff_t end;
 
-	/* See RFC 7862 p.67: */
-	if (bytes_total == 0)
-		bytes_total = ULLONG_MAX;
 	do {
 		if (kthread_should_stop())
 			break;
@@ -1623,29 +1388,16 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
 		copy->cp_res.wr_bytes_written += bytes_copied;
 		src_pos += bytes_copied;
 		dst_pos += bytes_copied;
-	} while (bytes_total > 0 && nfsd4_copy_is_async(copy));
-	/* for a non-zero asynchronous copy do a commit of data */
-	if (nfsd4_copy_is_async(copy) && copy->cp_res.wr_bytes_written > 0) {
-		since = READ_ONCE(dst->f_wb_err);
-		end = copy->cp_dst_pos + copy->cp_res.wr_bytes_written - 1;
-		status = vfs_fsync_range(dst, copy->cp_dst_pos, end, 0);
-		if (!status)
-			status = filemap_check_wb_err(dst->f_mapping, since);
-		if (!status)
-			set_bit(NFSD4_COPY_F_COMMITTED, &copy->cp_flags);
-	}
+	} while (bytes_total > 0 && !copy->cp_synchronous);
 	return bytes_copied;
 }
 
-static __be32 nfsd4_do_copy(struct nfsd4_copy *copy,
-			    struct file *src, struct file *dst,
-			    bool sync)
+static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
 {
 	__be32 status;
 	ssize_t bytes;
 
-	bytes = _nfsd_copy_file_range(copy, dst, src);
-
+	bytes = _nfsd_copy_file_range(copy);
 	/* for async copy, we ignore the error, client can always retry
 	 * to get the error
 	 */
@@ -1655,6 +1407,13 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy,
 		nfsd4_init_copy_res(copy, sync);
 		status = nfs_ok;
 	}
+
+	if (!copy->cp_intra) /* Inter server SSC */
+		nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src,
+					copy->nf_dst);
+	else
+		nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+
 	return status;
 }
 
@@ -1663,100 +1422,71 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
 	dst->cp_src_pos = src->cp_src_pos;
 	dst->cp_dst_pos = src->cp_dst_pos;
 	dst->cp_count = src->cp_count;
-	dst->cp_flags = src->cp_flags;
+	dst->cp_synchronous = src->cp_synchronous;
 	memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
 	memcpy(&dst->fh, &src->fh, sizeof(src->fh));
 	dst->cp_clp = src->cp_clp;
 	dst->nf_dst = nfsd_file_get(src->nf_dst);
-	/* for inter, nf_src doesn't exist yet */
-	if (!nfsd4_ssc_is_inter(src))
+	dst->cp_intra = src->cp_intra;
+	if (src->cp_intra) /* for inter, file_src doesn't exist yet */
 		dst->nf_src = nfsd_file_get(src->nf_src);
 
 	memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
-	memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server));
+	memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server));
 	memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
 	memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
-	dst->ss_nsui = src->ss_nsui;
-}
-
-static void release_copy_files(struct nfsd4_copy *copy)
-{
-	if (copy->nf_src)
-		nfsd_file_put(copy->nf_src);
-	if (copy->nf_dst)
-		nfsd_file_put(copy->nf_dst);
+	dst->ss_mnt = src->ss_mnt;
 }
 
 static void cleanup_async_copy(struct nfsd4_copy *copy)
 {
 	nfs4_free_copy_state(copy);
-	release_copy_files(copy);
-	if (copy->cp_clp) {
-		spin_lock(&copy->cp_clp->async_lock);
-		if (!list_empty(&copy->copies))
-			list_del_init(&copy->copies);
-		spin_unlock(&copy->cp_clp->async_lock);
-	}
+	nfsd_file_put(copy->nf_dst);
+	if (copy->cp_intra)
+		nfsd_file_put(copy->nf_src);
+	spin_lock(&copy->cp_clp->async_lock);
+	list_del(&copy->copies);
+	spin_unlock(&copy->cp_clp->async_lock);
 	nfs4_put_copy(copy);
 }
 
-static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr)
-{
-	struct nfsd4_cb_offload *cbo;
-
-	cbo = kzalloc(sizeof(*cbo), GFP_KERNEL);
-	if (!cbo)
-		return;
-
-	memcpy(&cbo->co_res, &copy->cp_res, sizeof(copy->cp_res));
-	memcpy(&cbo->co_fh, &copy->fh, sizeof(copy->fh));
-	cbo->co_nfserr = nfserr;
-
-	nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops,
-		      NFSPROC4_CLNT_CB_OFFLOAD);
-	trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid,
-			      &cbo->co_fh, copy->cp_count, nfserr);
-	nfsd4_run_cb(&cbo->co_cb);
-}
-
-/**
- * nfsd4_do_async_copy - kthread function for background server-side COPY
- * @data: arguments for COPY operation
- *
- * Return values:
- *   %0: Copy operation is done.
- */
 static int nfsd4_do_async_copy(void *data)
 {
 	struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
-	__be32 nfserr;
+	struct nfsd4_copy *cb_copy;
 
-	if (nfsd4_ssc_is_inter(copy)) {
-		struct file *filp;
-
-		filp = nfs42_ssc_open(copy->ss_nsui->nsui_vfsmount,
-				      &copy->c_fh, &copy->stateid);
-		if (IS_ERR(filp)) {
-			switch (PTR_ERR(filp)) {
-			case -EBADF:
-				nfserr = nfserr_wrong_type;
-				break;
-			default:
-				nfserr = nfserr_offload_denied;
-			}
+	if (!copy->cp_intra) { /* Inter server SSC */
+		copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL);
+		if (!copy->nf_src) {
+			copy->nfserr = nfserr_serverfault;
+			/* ss_mnt will be unmounted by the laundromat */
+			goto do_callback;
+		}
+		copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, &copy->c_fh,
+					      &copy->stateid);
+		if (IS_ERR(copy->nf_src->nf_file)) {
+			copy->nfserr = nfserr_offload_denied;
 			/* ss_mnt will be unmounted by the laundromat */
 			goto do_callback;
 		}
-		nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
-				       false);
-		nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst);
-	} else {
-		nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
-				       copy->nf_dst->nf_file, false);
 	}
 
+	copy->nfserr = nfsd4_do_copy(copy, 0);
 do_callback:
-	nfsd4_send_cb_offload(copy, nfserr);
+	cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+	if (!cb_copy)
+		goto out;
+	refcount_set(&cb_copy->refcount, 1);
+	memcpy(&cb_copy->cp_res, &copy->cp_res, sizeof(copy->cp_res));
+	cb_copy->cp_clp = copy->cp_clp;
+	cb_copy->nfserr = copy->nfserr;
+	memcpy(&cb_copy->fh, &copy->fh, sizeof(copy->fh));
+	nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp,
+			&nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
+	nfsd4_run_cb(&cb_copy->cp_cb);
+out:
+	if (!copy->cp_intra)
+		kfree(copy->nf_src);
 	cleanup_async_copy(copy);
 	return 0;
 }
@@ -1769,12 +1499,13 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	__be32 status;
 	struct nfsd4_copy *async_copy = NULL;
 
-	if (nfsd4_ssc_is_inter(copy)) {
-		if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) {
+	if (!copy->cp_intra) { /* Inter server SSC */
+		if (!inter_copy_offload_enable || copy->cp_synchronous) {
 			status = nfserr_notsupp;
 			goto out;
 		}
-		status = nfsd4_setup_inter_ssc(rqstp, cstate, copy);
+		status = nfsd4_setup_inter_ssc(rqstp, cstate, copy,
+				&copy->ss_mnt);
 		if (status)
 			return nfserr_offload_denied;
 	} else {
@@ -1786,21 +1517,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	copy->cp_clp = cstate->clp;
 	memcpy(&copy->fh, &cstate->current_fh.fh_handle,
 		sizeof(struct knfsd_fh));
-	if (nfsd4_copy_is_async(copy)) {
+	if (!copy->cp_synchronous) {
 		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 		status = nfserrno(-ENOMEM);
 		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 		if (!async_copy)
 			goto out_err;
-		INIT_LIST_HEAD(&async_copy->copies);
-		refcount_set(&async_copy->refcount, 1);
-		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
-		if (!async_copy->cp_src)
-			goto out_err;
 		if (!nfs4_init_copy_state(nn, copy))
 			goto out_err;
-		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
+		refcount_set(&async_copy->refcount, 1);
+		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.stid,
 			sizeof(copy->cp_res.cb_stateid));
 		dup_copy_fields(copy, async_copy);
 		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
@@ -1814,24 +1541,18 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		wake_up_process(async_copy->copy_task);
 		status = nfs_ok;
 	} else {
-		status = nfsd4_do_copy(copy, copy->nf_src->nf_file,
-				       copy->nf_dst->nf_file, true);
+		status = nfsd4_do_copy(copy, 1);
 	}
 out:
-	release_copy_files(copy);
 	return status;
 out_err:
-	if (nfsd4_ssc_is_inter(copy)) {
-		/*
-		 * Source's vfsmount of inter-copy will be unmounted
-		 * by the laundromat. Use copy instead of async_copy
-		 * since async_copy->ss_nsui might not be set yet.
-		 */
-		refcount_dec(&copy->ss_nsui->nsui_refcnt);
-	}
 	if (async_copy)
 		cleanup_async_copy(async_copy);
 	status = nfserrno(-ENOMEM);
+	/*
+	 * source's vfsmount of inter-copy will be unmounted
+	 * by the laundromat
+	 */
 	goto out;
 }
 
@@ -1842,7 +1563,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
 
 	spin_lock(&clp->async_lock);
 	list_for_each_entry(copy, &clp->async_copies, copies) {
-		if (memcmp(&copy->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE))
+		if (memcmp(&copy->cp_stateid.stid, stateid, NFS4_STATEID_SIZE))
 			continue;
 		refcount_inc(&copy->refcount);
 		spin_unlock(&clp->async_lock);
@@ -1896,16 +1617,16 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	cps = nfs4_alloc_init_cpntf_state(nn, stid);
 	if (!cps)
 		goto out;
-	memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.cs_stid, sizeof(stateid_t));
+	memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t));
 	memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t));
 	memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t));
 
 	/* For now, only return one server address in cpn_src, the
 	 * address used by the client to connect to this server.
 	 */
-	cn->cpn_src->nl4_type = NL4_NETADDR;
+	cn->cpn_src.nl4_type = NL4_NETADDR;
 	status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
-				 &cn->cpn_src->u.nl4_addr);
+				 &cn->cpn_src.u.nl4_addr);
 	WARN_ON_ONCE(status);
 	if (status) {
 		nfs4_put_cpntf_state(nn, cps);
@@ -1926,8 +1647,10 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    &fallocate->falloc_stateid,
 					    WR_STATE, &nf, NULL);
-	if (status != nfs_ok)
+	if (status != nfs_ok) {
+		dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
 		return status;
+	}
 
 	status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
 				     fallocate->falloc_offset,
@@ -1983,8 +1706,10 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
 					    &seek->seek_stateid,
 					    RD_STATE, &nf, NULL);
-	if (status)
+	if (status) {
+		dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
 		return status;
+	}
 
 	switch (seek->seek_whence) {
 	case NFS4_CONTENT_DATA:
@@ -2152,7 +1877,7 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
 	nfserr = nfs_ok;
 	if (gdp->gd_maxcount != 0) {
 		nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
-				rqstp, cstate->clp, gdp);
+				rqstp, cstate->session->se_client, gdp);
 	}
 
 	gdp->gd_notify_types &= ops->notify_types;
@@ -2438,7 +2163,7 @@ nfsd4_proc_null(struct svc_rqst *rqstp)
 static inline void nfsd4_increment_op_stats(u32 opnum)
 {
 	if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
-		percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]);
+		nfsdstats.nfs4_opcount[opnum]++;
 }
 
 static const struct nfsd4_operation nfsd4_ops[];
@@ -2528,6 +2253,25 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
 	return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
 }
 
+static void svcxdr_init_encode(struct svc_rqst *rqstp,
+			       struct nfsd4_compoundres *resp)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	struct xdr_buf *buf = &rqstp->rq_res;
+	struct kvec *head = buf->head;
+
+	xdr->buf = buf;
+	xdr->iov = head;
+	xdr->p   = head->iov_base + head->iov_len;
+	xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
+	/* Tail and page_len should be zero at this point: */
+	buf->len = buf->head[0].iov_len;
+	xdr->scratch.iov_len = 0;
+	xdr->page_ptr = buf->pages - 1;
+	buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
+		- rqstp->rq_auth_slack;
+}
+
 #ifdef CONFIG_NFSD_V4_2_INTER_SSC
 static void
 check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
@@ -2555,7 +2299,7 @@ check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
 				return;
 			}
 			putfh = (struct nfsd4_putfh *)&saved_op->u;
-			if (nfsd4_ssc_is_inter(copy))
+			if (!copy->cp_intra)
 				putfh->no_verify = true;
 		}
 	}
@@ -2582,14 +2326,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	__be32		status;
 
-	resp->xdr = &rqstp->rq_res_stream;
-	resp->statusp = resp->xdr->p;
-
-	/* reserve space for: NFS status code */
-	xdr_reserve_space(resp->xdr, XDR_UNIT);
-
+	svcxdr_init_encode(rqstp, resp);
+	resp->tagp = resp->xdr.p;
 	/* reserve space for: taglen, tag, and opcnt */
-	xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen);
+	xdr_reserve_space(&resp->xdr, 8 + args->taglen);
 	resp->taglen = args->taglen;
 	resp->tag = args->tag;
 	resp->rqstp = rqstp;
@@ -2608,6 +2348,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 	status = nfserr_minor_vers_mismatch;
 	if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0)
 		goto out;
+	status = nfserr_resource;
+	if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
+		goto out;
 
 	status = nfs41_check_op_ordering(args);
 	if (status) {
@@ -2620,20 +2363,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 
 	rqstp->rq_lease_breaker = (void **)&cstate->clp;
 
-	trace_nfsd_compound(rqstp, args->client_opcnt);
+	trace_nfsd_compound(rqstp, args->opcnt);
 	while (!status && resp->opcnt < args->opcnt) {
 		op = &args->ops[resp->opcnt++];
 
-		if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) {
-			/* If there are still more operations to process,
-			 * stop here and report NFS4ERR_RESOURCE. */
-			if (cstate->minorversion == 0 &&
-			    args->client_opcnt > resp->opcnt) {
-				op->status = nfserr_resource;
-				goto encode_op;
-			}
-		}
-
 		/*
 		 * The XDR decode routines may have pre-set op->status;
 		 * for example, if there is a miscellaneous XDR error
@@ -2657,13 +2390,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 			goto encode_op;
 		}
 
-		fh_clear_pre_post_attrs(current_fh);
+		fh_clear_wcc(current_fh);
 
 		/* If op is non-idempotent */
 		if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
 			/*
 			 * Don't execute this op if we couldn't encode a
-			 * successful reply:
+			 * succesful reply:
 			 */
 			u32 plen = op->opdesc->op_rsize_bop(rqstp, op);
 			/*
@@ -2702,15 +2435,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 encode_op:
 		if (op->status == nfserr_replay_me) {
 			op->replay = &cstate->replay_owner->so_replay;
-			nfsd4_encode_replay(resp->xdr, op);
+			nfsd4_encode_replay(&resp->xdr, op);
 			status = op->status = op->replay->rp_status;
 		} else {
 			nfsd4_encode_operation(resp, op);
 			status = op->status;
 		}
 
-		trace_nfsd_compound_status(args->client_opcnt, resp->opcnt,
-					   status, nfsd4_op_name(op->opnum));
+		trace_nfsd_compound_status(args->opcnt, resp->opcnt, status,
+					   nfsd4_op_name(op->opnum));
 
 		nfsd4_cstate_clear_replay(cstate);
 		nfsd4_increment_op_stats(op->opnum);
@@ -2744,49 +2477,28 @@ out:
 
 #define op_encode_channel_attrs_maxsz	(6 + 1 + 1)
 
-/*
- * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which
- * is called before sunrpc sets rq_res.buflen. Thus we have to compute
- * the maximum payload size here, based on transport limits and the size
- * of the remaining space in the rq_pages array.
- */
-static u32 nfsd4_max_payload(const struct svc_rqst *rqstp)
-{
-	u32 buflen;
-
-	buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE;
-	buflen -= rqstp->rq_auth_slack;
-	buflen -= rqstp->rq_res.head[0].iov_len;
-	return min_t(u32, buflen, svc_max_payload(rqstp));
-}
-
-static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp,
-				   const struct nfsd4_op *op)
+static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size) * sizeof(__be32);
 }
 
-static u32 nfsd4_status_stateid_rsize(const struct svc_rqst *rqstp,
-				      const struct nfsd4_op *op)
+static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
 }
 
-static u32 nfsd4_access_rsize(const struct svc_rqst *rqstp,
-			      const struct nfsd4_op *op)
+static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	/* ac_supported, ac_resp_access */
 	return (op_encode_hdr_size + 2)* sizeof(__be32);
 }
 
-static u32 nfsd4_commit_rsize(const struct svc_rqst *rqstp,
-			      const struct nfsd4_op *op)
+static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
 }
 
-static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp,
-			      const struct nfsd4_op *op)
+static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz
 		+ nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
@@ -2797,17 +2509,17 @@ static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp,
  * the op prematurely if the estimate is too large.  We may turn off splice
  * reads unnecessarily.
  */
-static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp,
-			       const struct nfsd4_op *op)
+static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
+				      struct nfsd4_op *op)
 {
-	const u32 *bmap = op->u.getattr.ga_bmval;
+	u32 *bmap = op->u.getattr.ga_bmval;
 	u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2];
 	u32 ret = 0;
 
 	if (bmap0 & FATTR4_WORD0_ACL)
-		return nfsd4_max_payload(rqstp);
+		return svc_max_payload(rqstp);
 	if (bmap0 & FATTR4_WORD0_FS_LOCATIONS)
-		return nfsd4_max_payload(rqstp);
+		return svc_max_payload(rqstp);
 
 	if (bmap1 & FATTR4_WORD1_OWNER) {
 		ret += IDMAP_NAMESZ + 4;
@@ -2835,28 +2547,24 @@ static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp,
 	return ret;
 }
 
-static u32 nfsd4_getfh_rsize(const struct svc_rqst *rqstp,
-			     const struct nfsd4_op *op)
+static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
 }
 
-static u32 nfsd4_link_rsize(const struct svc_rqst *rqstp,
-			    const struct nfsd4_op *op)
+static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz)
 		* sizeof(__be32);
 }
 
-static u32 nfsd4_lock_rsize(const struct svc_rqst *rqstp,
-			    const struct nfsd4_op *op)
+static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_lock_denied_maxsz)
 		* sizeof(__be32);
 }
 
-static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp,
-			    const struct nfsd4_op *op)
+static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_stateid_maxsz
 		+ op_encode_change_info_maxsz + 1
@@ -2864,18 +2572,20 @@ static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp,
 		+ op_encode_delegation_maxsz) * sizeof(__be32);
 }
 
-static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp,
-			    const struct nfsd4_op *op)
+static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-	u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp));
+	u32 maxcount = 0, rlen = 0;
+
+	maxcount = svc_max_payload(rqstp);
+	rlen = min(op->u.read.rd_length, maxcount);
 
 	return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
-static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp,
-				 const struct nfsd4_op *op)
+static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-	u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp));
+	u32 maxcount = svc_max_payload(rqstp);
+	u32 rlen = min(op->u.read.rd_length, maxcount);
 	/*
 	 * If we detect that the file changed during hole encoding, then we
 	 * recover by encoding the remaining reply as data. This means we need
@@ -2886,77 +2596,70 @@ static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp,
 	return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
-static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp,
-			       const struct nfsd4_op *op)
+static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-	u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp));
+	u32 maxcount = 0, rlen = 0;
+
+	maxcount = svc_max_payload(rqstp);
+	rlen = min(op->u.readdir.rd_maxcount, maxcount);
 
 	return (op_encode_hdr_size + op_encode_verifier_maxsz +
 		XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
-static u32 nfsd4_readlink_rsize(const struct svc_rqst *rqstp,
-				const struct nfsd4_op *op)
+static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
 }
 
-static u32 nfsd4_remove_rsize(const struct svc_rqst *rqstp,
-			      const struct nfsd4_op *op)
+static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz)
 		* sizeof(__be32);
 }
 
-static u32 nfsd4_rename_rsize(const struct svc_rqst *rqstp,
-			      const struct nfsd4_op *op)
+static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz
 		+ op_encode_change_info_maxsz) * sizeof(__be32);
 }
 
-static u32 nfsd4_sequence_rsize(const struct svc_rqst *rqstp,
-				const struct nfsd4_op *op)
+static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+				       struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size
 		+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
 }
 
-static u32 nfsd4_test_stateid_rsize(const struct svc_rqst *rqstp,
-				    const struct nfsd4_op *op)
+static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
 		* sizeof(__be32);
 }
 
-static u32 nfsd4_setattr_rsize(const struct svc_rqst *rqstp,
-			       const struct nfsd4_op *op)
+static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
 }
 
-static u32 nfsd4_secinfo_rsize(const struct svc_rqst *rqstp,
-			       const struct nfsd4_op *op)
+static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
 		(4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
 }
 
-static u32 nfsd4_setclientid_rsize(const struct svc_rqst *rqstp,
-				   const struct nfsd4_op *op)
+static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
 								sizeof(__be32);
 }
 
-static u32 nfsd4_write_rsize(const struct svc_rqst *rqstp,
-			     const struct nfsd4_op *op)
+static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32);
 }
 
-static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp,
-				   const struct nfsd4_op *op)
+static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
 		1 + 1 + /* eir_flags, spr_how */\
@@ -2970,16 +2673,14 @@ static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp,
 		0 /* ignored eir_server_impl_id contents */) * sizeof(__be32);
 }
 
-static u32 nfsd4_bind_conn_to_session_rsize(const struct svc_rqst *rqstp,
-					    const struct nfsd4_op *op)
+static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + \
 		XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\
 		2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32);
 }
 
-static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp,
-				      const struct nfsd4_op *op)
+static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + \
 		XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\
@@ -2988,8 +2689,7 @@ static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp,
 		op_encode_channel_attrs_maxsz) * sizeof(__be32);
 }
 
-static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp,
-			    const struct nfsd4_op *op)
+static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size +
 		1 /* wr_callback */ +
@@ -3001,16 +2701,16 @@ static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp,
 		1 /* cr_synchronous */) * sizeof(__be32);
 }
 
-static u32 nfsd4_offload_status_rsize(const struct svc_rqst *rqstp,
-				      const struct nfsd4_op *op)
+static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
+					     struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size +
 		2 /* osr_count */ +
 		1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
 }
 
-static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp,
-				   const struct nfsd4_op *op)
+static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
+					struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size +
 		3 /* cnr_lease_time */ +
@@ -3025,10 +2725,12 @@ static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp,
 }
 
 #ifdef CONFIG_NFSD_PNFS
-static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp,
-				     const struct nfsd4_op *op)
+static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-	u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp));
+	u32 maxcount = 0, rlen = 0;
+
+	maxcount = svc_max_payload(rqstp);
+	rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
 
 	return (op_encode_hdr_size +
 		1 /* gd_layout_type*/ +
@@ -3041,8 +2743,7 @@ static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp,
  * so we need to define an arbitrary upper bound here.
  */
 #define MAX_LAYOUT_SIZE		128
-static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp,
-				 const struct nfsd4_op *op)
+static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size +
 		1 /* logr_return_on_close */ +
@@ -3051,16 +2752,14 @@ static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp,
 		MAX_LAYOUT_SIZE) * sizeof(__be32);
 }
 
-static u32 nfsd4_layoutcommit_rsize(const struct svc_rqst *rqstp,
-				    const struct nfsd4_op *op)
+static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size +
 		1 /* locr_newsize */ +
 		2 /* ns_size */) * sizeof(__be32);
 }
 
-static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
-				    const struct nfsd4_op *op)
+static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size +
 		1 /* lrs_stateid */ +
@@ -3069,36 +2768,41 @@ static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
 #endif /* CONFIG_NFSD_PNFS */
 
 
-static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
-			    const struct nfsd4_op *op)
+static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 3) * sizeof(__be32);
 }
 
-static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp,
-				const struct nfsd4_op *op)
+static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp,
+				       struct nfsd4_op *op)
 {
-	u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp));
+	u32 maxcount, rlen;
+
+	maxcount = svc_max_payload(rqstp);
+	rlen = min_t(u32, XATTR_SIZE_MAX, maxcount);
 
 	return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
-static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp,
-				const struct nfsd4_op *op)
+static inline u32 nfsd4_setxattr_rsize(struct svc_rqst *rqstp,
+				       struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz)
 		* sizeof(__be32);
 }
-static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp,
-				  const struct nfsd4_op *op)
+static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp,
+					 struct nfsd4_op *op)
 {
-	u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp));
+	u32 maxcount, rlen;
+
+	maxcount = svc_max_payload(rqstp);
+	rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount);
 
 	return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
-static u32 nfsd4_removexattr_rsize(const struct svc_rqst *rqstp,
-				   const struct nfsd4_op *op)
+static inline u32 nfsd4_removexattr_rsize(struct svc_rqst *rqstp,
+					  struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz)
 		* sizeof(__be32);
@@ -3531,7 +3235,7 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
 {
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
-	struct nfsd4_op *this;
+	struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
 	struct nfsd4_compound_state *cstate = &resp->cstate;
 	struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow;
 	u32 opiter;
@@ -3568,7 +3272,7 @@ int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
 void warn_on_nonidempotent_op(struct nfsd4_op *op)
 {
 	if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) {
-		pr_err("unable to encode reply to nonidempotent op %u (%s)\n",
+		pr_err("unable to encode reply to nonidempotent op %d (%s)\n",
 			op->opnum, nfsd4_op_name(op->opnum));
 		WARN_ON_ONCE(1);
 	}
@@ -3581,29 +3285,28 @@ static const char *nfsd4_op_name(unsigned opnum)
 	return "unknown_operation";
 }
 
+#define nfsd4_voidres			nfsd4_voidargs
+struct nfsd4_voidargs { int dummy; };
+
 static const struct svc_procedure nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
 		.pc_func = nfsd4_proc_null,
-		.pc_decode = nfssvc_decode_voidarg,
-		.pc_encode = nfssvc_encode_voidres,
-		.pc_argsize = sizeof(struct nfsd_voidargs),
-		.pc_argzero = sizeof(struct nfsd_voidargs),
-		.pc_ressize = sizeof(struct nfsd_voidres),
+		.pc_decode = nfs4svc_decode_voidarg,
+		.pc_encode = nfs4svc_encode_voidres,
+		.pc_argsize = sizeof(struct nfsd4_voidargs),
+		.pc_ressize = sizeof(struct nfsd4_voidres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = 1,
-		.pc_name = "NULL",
 	},
 	[NFSPROC4_COMPOUND] = {
 		.pc_func = nfsd4_proc_compound,
 		.pc_decode = nfs4svc_decode_compoundargs,
 		.pc_encode = nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
-		.pc_argzero = offsetof(struct nfsd4_compoundargs, iops),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
 		.pc_release = nfsd4_release_compoundargs,
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = NFSD_BUFSIZE/4,
-		.pc_name = "COMPOUND",
 	},
 };
 
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 189c622dde61..83c4e6883953 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -626,7 +626,7 @@ nfsd4_legacy_tracking_init(struct net *net)
 	status = nfsd4_load_reboot_recovery_data(net);
 	if (status)
 		goto err;
-	pr_info("NFSD: Using legacy client tracking operations.\n");
+	printk("NFSD: Using legacy client tracking operations.\n");
 	return 0;
 
 err:
@@ -807,17 +807,17 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
 			if (get_user(namelen, &ci->cc_name.cn_len))
 				return -EFAULT;
 			name.data = memdup_user(&ci->cc_name.cn_id, namelen);
-			if (IS_ERR(name.data))
-				return PTR_ERR(name.data);
+			if (IS_ERR_OR_NULL(name.data))
+				return -EFAULT;
 			name.len = namelen;
 			get_user(princhashlen, &ci->cc_princhash.cp_len);
 			if (princhashlen > 0) {
 				princhash.data = memdup_user(
 						&ci->cc_princhash.cp_data,
 						princhashlen);
-				if (IS_ERR(princhash.data)) {
+				if (IS_ERR_OR_NULL(princhash.data)) {
 					kfree(name.data);
-					return PTR_ERR(princhash.data);
+					return -EFAULT;
 				}
 				princhash.len = princhashlen;
 			} else
@@ -829,8 +829,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
 			if (get_user(namelen, &cnm->cn_len))
 				return -EFAULT;
 			name.data = memdup_user(&cnm->cn_id, namelen);
-			if (IS_ERR(name.data))
-				return PTR_ERR(name.data);
+			if (IS_ERR_OR_NULL(name.data))
+				return -EFAULT;
 			name.len = namelen;
 		}
 		if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
@@ -1030,7 +1030,7 @@ nfsd4_init_cld_pipe(struct net *net)
 
 	status = __nfsd4_init_cld_pipe(net);
 	if (!status)
-		pr_info("NFSD: Using old nfsdcld client tracking operations.\n");
+		printk("NFSD: Using old nfsdcld client tracking operations.\n");
 	return status;
 }
 
@@ -1607,7 +1607,7 @@ nfsd4_cld_tracking_init(struct net *net)
 		nfs4_release_reclaim(nn);
 		goto err_remove;
 	} else
-		pr_info("NFSD: Using nfsdcld client tracking operations.\n");
+		printk("NFSD: Using nfsdcld client tracking operations.\n");
 	return 0;
 
 err_remove:
@@ -1866,7 +1866,7 @@ nfsd4_umh_cltrack_init(struct net *net)
 	ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
 	kfree(grace_start);
 	if (!ret)
-		pr_info("NFSD: Using UMH upcall client tracking operations.\n");
+		printk("NFSD: Using UMH upcall client tracking operations.\n");
 	return ret;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 228560f3fd0e..d402ca0b535f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -43,10 +43,6 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/jhash.h>
 #include <linux/string_helpers.h>
-#include <linux/fsnotify.h>
-#include <linux/rhashtable.h>
-#include <linux/nfs_ssc.h>
-
 #include "xdr4.h"
 #include "xdr4cb.h"
 #include "vfs.h"
@@ -86,7 +82,6 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
 void nfsd4_end_grace(struct nfsd_net *nn);
 static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
-static void nfsd4_file_hash_remove(struct nfs4_file *fi);
 
 /* Locking: */
 
@@ -128,23 +123,6 @@ static void free_session(struct nfsd4_session *);
 static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
 static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
 
-static struct workqueue_struct *laundry_wq;
-
-int nfsd4_create_laundry_wq(void)
-{
-	int rc = 0;
-
-	laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
-	if (laundry_wq == NULL)
-		rc = -ENOMEM;
-	return rc;
-}
-
-void nfsd4_destroy_laundry_wq(void)
-{
-	destroy_workqueue(laundry_wq);
-}
-
 static bool is_session_dead(struct nfsd4_session *ses)
 {
 	return ses->se_flags & NFS4_SESSION_DEAD;
@@ -163,13 +141,6 @@ static bool is_client_expired(struct nfs4_client *clp)
 	return clp->cl_time == 0;
 }
 
-static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn,
-					struct nfs4_client *clp)
-{
-	if (clp->cl_state != NFSD4_ACTIVE)
-		atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0);
-}
-
 static __be32 get_client_locked(struct nfs4_client *clp)
 {
 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -179,8 +150,6 @@ static __be32 get_client_locked(struct nfs4_client *clp)
 	if (is_client_expired(clp))
 		return nfserr_expired;
 	atomic_inc(&clp->cl_rpc_users);
-	nfsd4_dec_courtesy_client_count(nn, clp);
-	clp->cl_state = NFSD4_ACTIVE;
 	return nfs_ok;
 }
 
@@ -201,8 +170,6 @@ renew_client_locked(struct nfs4_client *clp)
 
 	list_move_tail(&clp->cl_lru, &nn->client_lru);
 	clp->cl_time = ktime_get_boottime_seconds();
-	nfsd4_dec_courtesy_client_count(nn, clp);
-	clp->cl_state = NFSD4_ACTIVE;
 }
 
 static void put_client_renew_locked(struct nfs4_client *clp)
@@ -277,7 +244,6 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
 	list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
 		if (fh_match(fh, &cur->nbl_fh)) {
 			list_del_init(&cur->nbl_list);
-			WARN_ON(list_empty(&cur->nbl_lru));
 			list_del_init(&cur->nbl_lru);
 			found = cur;
 			break;
@@ -303,7 +269,6 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
 			INIT_LIST_HEAD(&nbl->nbl_lru);
 			fh_copy_shallow(&nbl->nbl_fh, fh);
 			locks_init_lock(&nbl->nbl_lock);
-			kref_init(&nbl->nbl_kref);
 			nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
 					&nfsd4_cb_notify_lock_ops,
 					NFSPROC4_CLNT_CB_NOTIFY_LOCK);
@@ -312,21 +277,12 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
 	return nbl;
 }
 
-static void
-free_nbl(struct kref *kref)
-{
-	struct nfsd4_blocked_lock *nbl;
-
-	nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
-	locks_release_private(&nbl->nbl_lock);
-	kfree(nbl);
-}
-
 static void
 free_blocked_lock(struct nfsd4_blocked_lock *nbl)
 {
 	locks_delete_block(&nbl->nbl_lock);
-	kref_put(&nbl->nbl_kref, free_nbl);
+	locks_release_private(&nbl->nbl_lock);
+	kfree(nbl);
 }
 
 static void
@@ -344,7 +300,6 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
 					struct nfsd4_blocked_lock,
 					nbl_list);
 		list_del_init(&nbl->nbl_list);
-		WARN_ON(list_empty(&nbl->nbl_lru));
 		list_move(&nbl->nbl_lru, &reaplist);
 	}
 	spin_unlock(&nn->blocked_locks_lock);
@@ -369,8 +324,6 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
 static int
 nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
 {
-	trace_nfsd_cb_notify_lock_done(&zero_stateid, task);
-
 	/*
 	 * Since this is just an optimization, we don't try very hard if it
 	 * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
@@ -400,130 +353,6 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
 	.release	= nfsd4_cb_notify_lock_release,
 };
 
-/*
- * We store the NONE, READ, WRITE, and BOTH bits separately in the
- * st_{access,deny}_bmap field of the stateid, in order to track not
- * only what share bits are currently in force, but also what
- * combinations of share bits previous opens have used.  This allows us
- * to enforce the recommendation in
- * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that
- * the server return an error if the client attempt to downgrade to a
- * combination of share bits not explicable by closing some of its
- * previous opens.
- *
- * This enforcement is arguably incomplete, since we don't keep
- * track of access/deny bit combinations; so, e.g., we allow:
- *
- *	OPEN allow read, deny write
- *	OPEN allow both, deny none
- *	DOWNGRADE allow read, deny none
- *
- * which we should reject.
- *
- * But you could also argue that our current code is already overkill,
- * since it only exists to return NFS4ERR_INVAL on incorrect client
- * behavior.
- */
-static unsigned int
-bmap_to_share_mode(unsigned long bmap)
-{
-	int i;
-	unsigned int access = 0;
-
-	for (i = 1; i < 4; i++) {
-		if (test_bit(i, &bmap))
-			access |= i;
-	}
-	return access;
-}
-
-/* set share access for a given stateid */
-static inline void
-set_access(u32 access, struct nfs4_ol_stateid *stp)
-{
-	unsigned char mask = 1 << access;
-
-	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
-	stp->st_access_bmap |= mask;
-}
-
-/* clear share access for a given stateid */
-static inline void
-clear_access(u32 access, struct nfs4_ol_stateid *stp)
-{
-	unsigned char mask = 1 << access;
-
-	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
-	stp->st_access_bmap &= ~mask;
-}
-
-/* test whether a given stateid has access */
-static inline bool
-test_access(u32 access, struct nfs4_ol_stateid *stp)
-{
-	unsigned char mask = 1 << access;
-
-	return (bool)(stp->st_access_bmap & mask);
-}
-
-/* set share deny for a given stateid */
-static inline void
-set_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
-	unsigned char mask = 1 << deny;
-
-	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
-	stp->st_deny_bmap |= mask;
-}
-
-/* clear share deny for a given stateid */
-static inline void
-clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
-	unsigned char mask = 1 << deny;
-
-	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
-	stp->st_deny_bmap &= ~mask;
-}
-
-/* test whether a given stateid is denying specific access */
-static inline bool
-test_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
-	unsigned char mask = 1 << deny;
-
-	return (bool)(stp->st_deny_bmap & mask);
-}
-
-static int nfs4_access_to_omode(u32 access)
-{
-	switch (access & NFS4_SHARE_ACCESS_BOTH) {
-	case NFS4_SHARE_ACCESS_READ:
-		return O_RDONLY;
-	case NFS4_SHARE_ACCESS_WRITE:
-		return O_WRONLY;
-	case NFS4_SHARE_ACCESS_BOTH:
-		return O_RDWR;
-	}
-	WARN_ON_ONCE(1);
-	return O_RDONLY;
-}
-
-static inline int
-access_permit_read(struct nfs4_ol_stateid *stp)
-{
-	return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
-		test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
-		test_access(NFS4_SHARE_ACCESS_WRITE, stp);
-}
-
-static inline int
-access_permit_write(struct nfs4_ol_stateid *stp)
-{
-	return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
-		test_access(NFS4_SHARE_ACCESS_BOTH, stp);
-}
-
 static inline struct nfs4_stateowner *
 nfs4_get_stateowner(struct nfs4_stateowner *sop)
 {
@@ -591,8 +420,11 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu)
 void
 put_nfs4_file(struct nfs4_file *fi)
 {
-	if (refcount_dec_and_test(&fi->fi_ref)) {
-		nfsd4_file_hash_remove(fi);
+	might_lock(&state_lock);
+
+	if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
+		hlist_del_rcu(&fi->fi_hash);
+		spin_unlock(&state_lock);
 		WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
 		WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
 		call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
@@ -602,7 +434,9 @@ put_nfs4_file(struct nfs4_file *fi)
 static struct nfsd_file *
 __nfs4_get_fd(struct nfs4_file *f, int oflag)
 {
-	return nfsd_file_get(f->fi_fds[oflag]);
+	if (f->fi_fds[oflag])
+		return nfsd_file_get(f->fi_fds[oflag]);
+	return NULL;
 }
 
 static struct nfsd_file *
@@ -715,72 +549,22 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
 	return ret & OWNER_HASH_MASK;
 }
 
-static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
+/* hash table for nfs4_file */
+#define FILE_HASH_BITS                   8
+#define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
 
-static const struct rhashtable_params nfs4_file_rhash_params = {
-	.key_len		= sizeof_field(struct nfs4_file, fi_inode),
-	.key_offset		= offsetof(struct nfs4_file, fi_inode),
-	.head_offset		= offsetof(struct nfs4_file, fi_rlist),
-
-	/*
-	 * Start with a single page hash table to reduce resizing churn
-	 * on light workloads.
-	 */
-	.min_size		= 256,
-	.automatic_shrinking	= true,
-};
-
-/*
- * Check if courtesy clients have conflicting access and resolve it if possible
- *
- * access:  is op_share_access if share_access is true.
- *	    Check if access mode, op_share_access, would conflict with
- *	    the current deny mode of the file 'fp'.
- * access:  is op_share_deny if share_access is false.
- *	    Check if the deny mode, op_share_deny, would conflict with
- *	    current access of the file 'fp'.
- * stp:     skip checking this entry.
- * new_stp: normal open, not open upgrade.
- *
- * Function returns:
- *	false - access/deny mode conflict with normal client.
- *	true  - no conflict or conflict with courtesy client(s) is resolved.
- */
-static bool
-nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp,
-		struct nfs4_ol_stateid *stp, u32 access, bool share_access)
+static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
 {
-	struct nfs4_ol_stateid *st;
-	bool resolvable = true;
-	unsigned char bmap;
-	struct nfsd_net *nn;
-	struct nfs4_client *clp;
-
-	lockdep_assert_held(&fp->fi_lock);
-	list_for_each_entry(st, &fp->fi_stateids, st_perfile) {
-		/* ignore lock stateid */
-		if (st->st_openstp)
-			continue;
-		if (st == stp && new_stp)
-			continue;
-		/* check file access against deny mode or vice versa */
-		bmap = share_access ? st->st_deny_bmap : st->st_access_bmap;
-		if (!(access & bmap_to_share_mode(bmap)))
-			continue;
-		clp = st->st_stid.sc_client;
-		if (try_to_expire_client(clp))
-			continue;
-		resolvable = false;
-		break;
-	}
-	if (resolvable) {
-		clp = stp->st_stid.sc_client;
-		nn = net_generic(clp->net, nfsd_net_id);
-		mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
-	}
-	return resolvable;
+	return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
 }
 
+static unsigned int file_hashval(struct knfsd_fh *fh)
+{
+	return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
+}
+
+static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
+
 static void
 __nfs4_file_get_access(struct nfs4_file *fp, u32 access)
 {
@@ -984,23 +768,23 @@ out_free:
  * Create a unique stateid_t to represent each COPY.
  */
 static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
-			      unsigned char cs_type)
+			      unsigned char sc_type)
 {
 	int new_id;
 
-	stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
-	stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+	stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
+	stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+	stid->sc_type = sc_type;
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&nn->s2s_cp_lock);
 	new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
-	stid->cs_stid.si_opaque.so_id = new_id;
-	stid->cs_stid.si_generation = 1;
+	stid->stid.si_opaque.so_id = new_id;
+	stid->stid.si_generation = 1;
 	spin_unlock(&nn->s2s_cp_lock);
 	idr_preload_end();
 	if (new_id < 0)
 		return 0;
-	stid->cs_type = cs_type;
 	return 1;
 }
 
@@ -1018,7 +802,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
 	if (!cps)
 		return NULL;
 	cps->cpntf_time = ktime_get_boottime_seconds();
-	refcount_set(&cps->cp_stateid.cs_count, 1);
+	refcount_set(&cps->cp_stateid.sc_count, 1);
 	if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
 		goto out_free;
 	spin_lock(&nn->s2s_cp_lock);
@@ -1034,12 +818,11 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy)
 {
 	struct nfsd_net *nn;
 
-	if (copy->cp_stateid.cs_type != NFS4_COPY_STID)
-		return;
+	WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID);
 	nn = net_generic(copy->cp_clp->net, nfsd_net_id);
 	spin_lock(&nn->s2s_cp_lock);
 	idr_remove(&nn->s2s_cp_stateids,
-		   copy->cp_stateid.cs_stid.si_opaque.so_id);
+		   copy->cp_stateid.stid.si_opaque.so_id);
 	spin_unlock(&nn->s2s_cp_lock);
 }
 
@@ -1071,12 +854,7 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
 
 static void nfs4_free_deleg(struct nfs4_stid *stid)
 {
-	struct nfs4_delegation *dp = delegstateid(stid);
-
-	WARN_ON_ONCE(!list_empty(&stid->sc_cp_list));
-	WARN_ON_ONCE(!list_empty(&dp->dl_perfile));
-	WARN_ON_ONCE(!list_empty(&dp->dl_perclnt));
-	WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru));
+	WARN_ON(!list_empty(&stid->sc_cp_list));
 	kmem_cache_free(deleg_slab, stid);
 	atomic_long_dec(&num_delegations);
 }
@@ -1126,7 +904,7 @@ static int delegation_blocked(struct knfsd_fh *fh)
 		}
 		spin_unlock(&blocked_delegations_lock);
 	}
-	hash = jhash(&fh->fh_raw, fh->fh_size, 0);
+	hash = jhash(&fh->fh_base, fh->fh_size, 0);
 	if (test_bit(hash&255, bd->set[0]) &&
 	    test_bit((hash>>8)&255, bd->set[0]) &&
 	    test_bit((hash>>16)&255, bd->set[0]))
@@ -1145,7 +923,7 @@ static void block_delegations(struct knfsd_fh *fh)
 	u32 hash;
 	struct bloom_pair *bd = &blocked_delegations;
 
-	hash = jhash(&fh->fh_raw, fh->fh_size, 0);
+	hash = jhash(&fh->fh_base, fh->fh_size, 0);
 
 	spin_lock(&blocked_delegations_lock);
 	__set_bit(hash&255, bd->set[bd->new]);
@@ -1159,6 +937,7 @@ static void block_delegations(struct knfsd_fh *fh)
 
 static struct nfs4_delegation *
 alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
+		 struct svc_fh *current_fh,
 		 struct nfs4_clnt_odstate *odstate)
 {
 	struct nfs4_delegation *dp;
@@ -1168,7 +947,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
 	n = atomic_long_inc_return(&num_delegations);
 	if (n < 0 || n > max_delegations)
 		goto out_dec;
-	if (delegation_blocked(&fp->fi_fhandle))
+	if (delegation_blocked(&current_fh->fh_handle))
 		goto out_dec;
 	dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg));
 	if (dp == NULL)
@@ -1187,7 +966,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
 	get_clnt_odstate(odstate);
 	dp->dl_type = NFS4_OPEN_DELEGATE_READ;
 	dp->dl_retries = 1;
-	dp->dl_recalled = false;
 	nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
 		      &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
 	get_nfs4_file(fp);
@@ -1366,8 +1144,6 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 
 	WARN_ON(!list_empty(&dp->dl_recall_lru));
 
-	trace_nfsd_stid_revoke(&dp->dl_stid);
-
 	if (clp->cl_minorversion) {
 		spin_lock(&clp->cl_lock);
 		dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
@@ -1392,6 +1168,108 @@ static unsigned int clientstr_hashval(struct xdr_netobj name)
 	return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK;
 }
 
+/*
+ * We store the NONE, READ, WRITE, and BOTH bits separately in the
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used.  This allows us
+ * to enforce the recommendation of rfc 3530 14.2.19 that the server
+ * return an error if the client attempt to downgrade to a combination
+ * of share bits not explicable by closing some of its previous opens.
+ *
+ * XXX: This enforcement is actually incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ *	OPEN allow read, deny write
+ *	OPEN allow both, deny none
+ *	DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
+ */
+static unsigned int
+bmap_to_share_mode(unsigned long bmap) {
+	int i;
+	unsigned int access = 0;
+
+	for (i = 1; i < 4; i++) {
+		if (test_bit(i, &bmap))
+			access |= i;
+	}
+	return access;
+}
+
+/* set share access for a given stateid */
+static inline void
+set_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+	unsigned char mask = 1 << access;
+
+	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+	stp->st_access_bmap |= mask;
+}
+
+/* clear share access for a given stateid */
+static inline void
+clear_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+	unsigned char mask = 1 << access;
+
+	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+	stp->st_access_bmap &= ~mask;
+}
+
+/* test whether a given stateid has access */
+static inline bool
+test_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+	unsigned char mask = 1 << access;
+
+	return (bool)(stp->st_access_bmap & mask);
+}
+
+/* set share deny for a given stateid */
+static inline void
+set_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+	unsigned char mask = 1 << deny;
+
+	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+	stp->st_deny_bmap |= mask;
+}
+
+/* clear share deny for a given stateid */
+static inline void
+clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+	unsigned char mask = 1 << deny;
+
+	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+	stp->st_deny_bmap &= ~mask;
+}
+
+/* test whether a given stateid is denying specific access */
+static inline bool
+test_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+	unsigned char mask = 1 << deny;
+
+	return (bool)(stp->st_deny_bmap & mask);
+}
+
+static int nfs4_access_to_omode(u32 access)
+{
+	switch (access & NFS4_SHARE_ACCESS_BOTH) {
+	case NFS4_SHARE_ACCESS_READ:
+		return O_RDONLY;
+	case NFS4_SHARE_ACCESS_WRITE:
+		return O_WRONLY;
+	case NFS4_SHARE_ACCESS_BOTH:
+		return O_RDWR;
+	}
+	WARN_ON_ONCE(1);
+	return O_RDONLY;
+}
+
 /*
  * A stateid that had a deny mode associated with it is being released
  * or downgraded. Recalculate the deny mode on the file.
@@ -1832,12 +1710,13 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	int numslots = fattrs->maxreqs;
 	int slotsize = slot_bytes(fattrs);
 	struct nfsd4_session *new;
-	int i;
+	int mem, i;
 
-	BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
-		     > PAGE_SIZE);
+	BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
+			+ sizeof(struct nfsd4_session) > PAGE_SIZE);
+	mem = numslots * sizeof(struct nfsd4_slot *);
 
-	new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+	new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
 	if (!new)
 		return NULL;
 	/* allocate each struct nfsd4_slot and data cache in one piece */
@@ -1869,8 +1748,6 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u)
 	struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
 	struct nfs4_client *clp = c->cn_session->se_client;
 
-	trace_nfsd_cb_lost(clp);
-
 	spin_lock(&clp->cl_lock);
 	if (!list_empty(&c->cn_persession)) {
 		list_del(&c->cn_persession);
@@ -2082,16 +1959,11 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
  * This type of memory management is somewhat inefficient, but we use it
  * anyway since SETCLIENTID is not a common operation.
  */
-static struct nfs4_client *alloc_client(struct xdr_netobj name,
-				struct nfsd_net *nn)
+static struct nfs4_client *alloc_client(struct xdr_netobj name)
 {
 	struct nfs4_client *clp;
 	int i;
 
-	if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) {
-		mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
-		return NULL;
-	}
 	clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
 	if (clp == NULL)
 		return NULL;
@@ -2109,9 +1981,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
 	idr_init(&clp->cl_stateids);
 	atomic_set(&clp->cl_rpc_users, 0);
 	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
-	clp->cl_state = NFSD4_ACTIVE;
-	atomic_inc(&nn->nfs4_client_count);
-	atomic_set(&clp->cl_delegs_in_recall, 0);
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_openowners);
 	INIT_LIST_HEAD(&clp->cl_delegations);
@@ -2143,7 +2012,6 @@ static void __free_client(struct kref *k)
 	kfree(clp->cl_nii_domain.data);
 	kfree(clp->cl_nii_name.data);
 	idr_destroy(&clp->cl_stateids);
-	kfree(clp->cl_ra);
 	kmem_cache_free(client_slab, clp);
 }
 
@@ -2219,7 +2087,6 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
 static void
 __destroy_client(struct nfs4_client *clp)
 {
-	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 	int i;
 	struct nfs4_openowner *oo;
 	struct nfs4_delegation *dp;
@@ -2263,8 +2130,6 @@ __destroy_client(struct nfs4_client *clp)
 	nfsd4_shutdown_callback(clp);
 	if (clp->cl_cb_conn.cb_xprt)
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
-	atomic_add_unless(&nn->nfs4_client_count, -1, 0);
-	nfsd4_dec_courtesy_client_count(nn, clp);
 	free_client(clp);
 	wake_up_all(&expiry_wq);
 }
@@ -2493,24 +2358,9 @@ static void seq_quote_mem(struct seq_file *m, char *data, int len)
 	seq_printf(m, "\"");
 }
 
-static const char *cb_state2str(int state)
-{
-	switch (state) {
-	case NFSD4_CB_UP:
-		return "UP";
-	case NFSD4_CB_UNKNOWN:
-		return "UNKNOWN";
-	case NFSD4_CB_DOWN:
-		return "DOWN";
-	case NFSD4_CB_FAULT:
-		return "FAULT";
-	}
-	return "UNDEFINED";
-}
-
 static int client_info_show(struct seq_file *m, void *v)
 {
-	struct inode *inode = file_inode(m->file);
+	struct inode *inode = m->private;
 	struct nfs4_client *clp;
 	u64 clid;
 
@@ -2520,17 +2370,6 @@ static int client_info_show(struct seq_file *m, void *v)
 	memcpy(&clid, &clp->cl_clientid, sizeof(clid));
 	seq_printf(m, "clientid: 0x%llx\n", clid);
 	seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
-
-	if (clp->cl_state == NFSD4_COURTESY)
-		seq_puts(m, "status: courtesy\n");
-	else if (clp->cl_state == NFSD4_EXPIRABLE)
-		seq_puts(m, "status: expirable\n");
-	else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
-		seq_puts(m, "status: confirmed\n");
-	else
-		seq_puts(m, "status: unconfirmed\n");
-	seq_printf(m, "seconds from last renew: %lld\n",
-		ktime_get_boottime_seconds() - clp->cl_time);
 	seq_printf(m, "name: ");
 	seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
 	seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
@@ -2543,14 +2382,22 @@ static int client_info_show(struct seq_file *m, void *v)
 		seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
 			clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
 	}
-	seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
-	seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
 	drop_client(clp);
 
 	return 0;
 }
 
-DEFINE_SHOW_ATTRIBUTE(client_info);
+static int client_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, client_info_show, inode);
+}
+
+static const struct file_operations client_info_fops = {
+	.open		= client_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 static void *states_start(struct seq_file *s, loff_t *pos)
 	__acquires(&clp->cl_lock)
@@ -2593,7 +2440,7 @@ static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f)
 
 static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
 {
-	struct inode *inode = file_inode(f->nf_file);
+	struct inode *inode = f->nf_inode;
 
 	seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
 					MAJOR(inode->i_sb->s_dev),
@@ -2821,8 +2668,6 @@ static void force_expire_client(struct nfs4_client *clp)
 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
 	bool already_expired;
 
-	trace_nfsd_clid_admin_expired(&clp->cl_clientid);
-
 	spin_lock(&nn->client_lock);
 	clp->cl_time = 0;
 	spin_unlock(&nn->client_lock);
@@ -2871,36 +2716,6 @@ static const struct tree_descr client_files[] = {
 	[3] = {""},
 };
 
-static int
-nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
-				struct rpc_task *task)
-{
-	switch (task->tk_status) {
-	case -NFS4ERR_DELAY:
-		rpc_delay(task, 2 * HZ);
-		return 0;
-	default:
-		return 1;
-	}
-}
-
-static void
-nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
-{
-	struct nfs4_client *clp = cb->cb_clp;
-	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
-
-	spin_lock(&nn->client_lock);
-	clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
-	put_client_renew_locked(clp);
-	spin_unlock(&nn->client_lock);
-}
-
-static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
-	.done		= nfsd4_cb_recall_any_done,
-	.release	= nfsd4_cb_recall_any_release,
-};
-
 static struct nfs4_client *create_client(struct xdr_netobj name,
 		struct svc_rqst *rqstp, nfs4_verifier *verf)
 {
@@ -2909,9 +2724,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 	int ret;
 	struct net *net = SVC_NET(rqstp);
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct dentry *dentries[ARRAY_SIZE(client_files)];
 
-	clp = alloc_client(name, nn);
+	clp = alloc_client(name);
 	if (clp == NULL)
 		return NULL;
 
@@ -2929,23 +2743,13 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
 	memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
 	clp->cl_cb_session = NULL;
 	clp->net = net;
-	clp->cl_nfsd_dentry = nfsd_client_mkdir(
-		nn, &clp->cl_nfsdfs,
-		clp->cl_clientid.cl_id - nn->clientid_base,
-		client_files, dentries);
-	clp->cl_nfsd_info_dentry = dentries[0];
+	clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs,
+			clp->cl_clientid.cl_id - nn->clientid_base,
+			client_files);
 	if (!clp->cl_nfsd_dentry) {
 		free_client(clp);
 		return NULL;
 	}
-	clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
-	if (!clp->cl_ra) {
-		free_client(clp);
-		return NULL;
-	}
-	clp->cl_ra_time = 0;
-	nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
-			NFSPROC4_CLNT_CB_RECALL_ANY);
 	return clp;
 }
 
@@ -3012,11 +2816,11 @@ move_to_confirmed(struct nfs4_client *clp)
 
 	lockdep_assert_held(&nn->client_lock);
 
+	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
 	list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
 	rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
 	add_clp_to_name_tree(clp, &nn->conf_name_tree);
 	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
-	trace_nfsd_clid_confirmed(&clp->cl_clientid);
 	renew_client_locked(clp);
 }
 
@@ -3121,7 +2925,7 @@ out_err:
 static void
 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 {
-	struct xdr_buf *buf = resp->xdr->buf;
+	struct xdr_buf *buf = resp->xdr.buf;
 	struct nfsd4_slot *slot = resp->cstate.slot;
 	unsigned int base;
 
@@ -3191,7 +2995,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 			 struct nfsd4_sequence *seq)
 {
 	struct nfsd4_slot *slot = resp->cstate.slot;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 	__be32 status;
 
@@ -3285,7 +3089,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	rpc_ntop(sa, addr_str, sizeof(addr_str));
 	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
-		"ip_addr=%s flags %x, spa_how %u\n",
+		"ip_addr=%s flags %x, spa_how %d\n",
 		__func__, rqstp, exid, exid->clname.len, exid->clname.data,
 		addr_str, exid->flags, exid->spa_how);
 
@@ -3332,7 +3136,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out_nolock;
 		}
 		new->cl_mach_cred = true;
-		break;
 	case SP4_NONE:
 		break;
 	default:				/* checked by xdr code */
@@ -3369,24 +3172,20 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			}
 			/* case 6 */
 			exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
-			trace_nfsd_clid_confirmed_r(conf);
 			goto out_copy;
 		}
 		if (!creds_match) { /* case 3 */
 			if (client_has_state(conf)) {
 				status = nfserr_clid_inuse;
-				trace_nfsd_clid_cred_mismatch(conf, rqstp);
 				goto out;
 			}
 			goto out_new;
 		}
 		if (verfs_match) { /* case 2 */
 			conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
-			trace_nfsd_clid_confirmed_r(conf);
 			goto out_copy;
 		}
 		/* case 5, client reboot */
-		trace_nfsd_clid_verf_mismatch(conf, rqstp, &verf);
 		conf = NULL;
 		goto out_new;
 	}
@@ -3396,19 +3195,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		goto out;
 	}
 
-	unconf = find_unconfirmed_client_by_name(&exid->clname, nn);
+	unconf  = find_unconfirmed_client_by_name(&exid->clname, nn);
 	if (unconf) /* case 4, possible retry or client restart */
 		unhash_client_locked(unconf);
 
-	/* case 1, new owner ID */
-	trace_nfsd_clid_fresh(new);
-
+	/* case 1 (normal case) */
 out_new:
 	if (conf) {
 		status = mark_client_expired_locked(conf);
 		if (status)
 			goto out;
-		trace_nfsd_clid_replaced(&conf->cl_clientid);
 	}
 	new->cl_minorversion = cstate->minorversion;
 	new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
@@ -3432,10 +3228,8 @@ out:
 out_nolock:
 	if (new)
 		expire_client(new);
-	if (unconf) {
-		trace_nfsd_clid_expire_unconf(&unconf->cl_clientid);
+	if (unconf)
 		expire_client(unconf);
-	}
 	return status;
 }
 
@@ -3627,10 +3421,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			goto out_free_conn;
 		}
 	} else if (unconf) {
-		status = nfserr_clid_inuse;
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
 		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
-			trace_nfsd_clid_cred_mismatch(unconf, rqstp);
+			status = nfserr_clid_inuse;
 			goto out_free_conn;
 		}
 		status = nfserr_wrong_cred;
@@ -3650,7 +3443,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 				old = NULL;
 				goto out_free_conn;
 			}
-			trace_nfsd_clid_replaced(&old->cl_clientid);
 		}
 		move_to_confirmed(unconf);
 		conf = unconf;
@@ -3675,8 +3467,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	/* cache solo and embedded create sessions under the client_lock */
 	nfsd4_cache_create_session(cr_ses, cs_slot, status);
 	spin_unlock(&nn->client_lock);
-	if (conf == unconf)
-		fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
 	/* init connection and backchannel */
 	nfsd4_init_conn(rqstp, conn, new);
 	nfsd4_put_session(new);
@@ -3950,7 +3740,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	struct nfsd4_sequence *seq = &u->sequence;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	struct nfsd4_session *session;
 	struct nfs4_client *clp;
 	struct nfsd4_slot *slot;
@@ -4120,7 +3910,6 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp,
 		status = nfserr_wrong_cred;
 		goto out;
 	}
-	trace_nfsd_clid_destroyed(&clp->cl_clientid);
 	unhash_client_locked(clp);
 out:
 	spin_unlock(&nn->client_lock);
@@ -4134,7 +3923,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
 	struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
-	struct nfs4_client *clp = cstate->clp;
 	__be32 status = 0;
 
 	if (rc->rca_one_fs) {
@@ -4148,11 +3936,12 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp,
 	}
 
 	status = nfserr_complete_already;
-	if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags))
+	if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
+			     &cstate->session->se_client->cl_flags))
 		goto out;
 
 	status = nfserr_stale_clientid;
-	if (is_client_expired(clp))
+	if (is_client_expired(cstate->session->se_client))
 		/*
 		 * The following error isn't really legal.
 		 * But we only get here if the client just explicitly
@@ -4163,9 +3952,8 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp,
 		goto out;
 
 	status = nfs_ok;
-	trace_nfsd_clid_reclaim_complete(&clp->cl_clientid);
-	nfsd4_client_record_create(clp);
-	inc_reclaim_complete(clp);
+	nfsd4_client_record_create(cstate->session->se_client);
+	inc_reclaim_complete(cstate->session->se_client);
 out:
 	return status;
 }
@@ -4185,29 +3973,27 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	new = create_client(clname, rqstp, &clverifier);
 	if (new == NULL)
 		return nfserr_jukebox;
+	/* Cases below refer to rfc 3530 section 14.2.33: */
 	spin_lock(&nn->client_lock);
 	conf = find_confirmed_client_by_name(&clname, nn);
 	if (conf && client_has_state(conf)) {
+		/* case 0: */
 		status = nfserr_clid_inuse;
 		if (clp_used_exchangeid(conf))
 			goto out;
 		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
-			trace_nfsd_clid_cred_mismatch(conf, rqstp);
+			trace_nfsd_clid_inuse_err(conf);
 			goto out;
 		}
 	}
 	unconf = find_unconfirmed_client_by_name(&clname, nn);
 	if (unconf)
 		unhash_client_locked(unconf);
-	if (conf) {
-		if (same_verf(&conf->cl_verifier, &clverifier)) {
-			copy_clid(new, conf);
-			gen_confirm(new, nn);
-		} else
-			trace_nfsd_clid_verf_mismatch(conf, rqstp,
-						      &clverifier);
-	} else
-		trace_nfsd_clid_fresh(new);
+	/* We need to handle only case 1: probable callback update */
+	if (conf && same_verf(&conf->cl_verifier, &clverifier)) {
+		copy_clid(new, conf);
+		gen_confirm(new, nn);
+	}
 	new->cl_minorversion = 0;
 	gen_callback(new, setclid, rqstp);
 	add_to_unconfirmed(new);
@@ -4220,13 +4006,12 @@ out:
 	spin_unlock(&nn->client_lock);
 	if (new)
 		free_client(new);
-	if (unconf) {
-		trace_nfsd_clid_expire_unconf(&unconf->cl_clientid);
+	if (unconf)
 		expire_client(unconf);
-	}
 	return status;
 }
 
+
 __be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 			struct nfsd4_compound_state *cstate,
@@ -4255,27 +4040,25 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 	 * Nevertheless, RFC 7530 recommends INUSE for this case:
 	 */
 	status = nfserr_clid_inuse;
-	if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
-		trace_nfsd_clid_cred_mismatch(unconf, rqstp);
+	if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred))
 		goto out;
-	}
-	if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
-		trace_nfsd_clid_cred_mismatch(conf, rqstp);
+	if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred))
 		goto out;
-	}
+	/* cases below refer to rfc 3530 section 14.2.34: */
 	if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) {
 		if (conf && same_verf(&confirm, &conf->cl_confirm)) {
+			/* case 2: probable retransmit */
 			status = nfs_ok;
-		} else
+		} else /* case 4: client hasn't noticed we rebooted yet? */
 			status = nfserr_stale_clientid;
 		goto out;
 	}
 	status = nfs_ok;
-	if (conf) {
+	if (conf) { /* case 1: callback update */
 		old = unconf;
 		unhash_client_locked(old);
 		nfsd4_change_callback(conf, &unconf->cl_cb_conn);
-	} else {
+	} else { /* case 3: normal case; new or rebooted client */
 		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
 		if (old) {
 			status = nfserr_clid_inuse;
@@ -4290,15 +4073,12 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 				old = NULL;
 				goto out;
 			}
-			trace_nfsd_clid_replaced(&old->cl_clientid);
 		}
 		move_to_confirmed(unconf);
 		conf = unconf;
 	}
 	get_client_locked(conf);
 	spin_unlock(&nn->client_lock);
-	if (conf == unconf)
-		fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
 	nfsd4_probe_callback(conf);
 	spin_lock(&nn->client_lock);
 	put_client_renew_locked(conf);
@@ -4315,26 +4095,27 @@ static struct nfs4_file *nfsd4_alloc_file(void)
 }
 
 /* OPEN Share state helper functions */
-
-static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp)
+static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
+				struct nfs4_file *fp)
 {
+	lockdep_assert_held(&state_lock);
+
 	refcount_set(&fp->fi_ref, 1);
 	spin_lock_init(&fp->fi_lock);
 	INIT_LIST_HEAD(&fp->fi_stateids);
 	INIT_LIST_HEAD(&fp->fi_delegations);
 	INIT_LIST_HEAD(&fp->fi_clnt_odstate);
-	fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle);
+	fh_copy_shallow(&fp->fi_fhandle, fh);
 	fp->fi_deleg_file = NULL;
 	fp->fi_had_conflict = false;
 	fp->fi_share_deny = 0;
 	memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
 	memset(fp->fi_access, 0, sizeof(fp->fi_access));
-	fp->fi_aliased = false;
-	fp->fi_inode = d_inode(fh->fh_dentry);
 #ifdef CONFIG_NFSD_PNFS
 	INIT_LIST_HEAD(&fp->fi_lo_states);
 	atomic_set(&fp->fi_lo_recalls, 0);
 #endif
+	hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
 }
 
 void
@@ -4398,51 +4179,6 @@ out:
 	return -ENOMEM;
 }
 
-static unsigned long
-nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
-{
-	int count;
-	struct nfsd_net *nn = container_of(shrink,
-			struct nfsd_net, nfsd_client_shrinker);
-
-	count = atomic_read(&nn->nfsd_courtesy_clients);
-	if (!count)
-		count = atomic_long_read(&num_delegations);
-	if (count)
-		queue_work(laundry_wq, &nn->nfsd_shrinker_work);
-	return (unsigned long)count;
-}
-
-static unsigned long
-nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
-{
-	return SHRINK_STOP;
-}
-
-void
-nfsd4_init_leases_net(struct nfsd_net *nn)
-{
-	struct sysinfo si;
-	u64 max_clients;
-
-	nn->nfsd4_lease = 90;	/* default lease time */
-	nn->nfsd4_grace = 90;
-	nn->somebody_reclaimed = false;
-	nn->track_reclaim_completes = false;
-	nn->clverifier_counter = prandom_u32();
-	nn->clientid_base = prandom_u32();
-	nn->clientid_counter = nn->clientid_base + 1;
-	nn->s2s_cp_cl_id = nn->clientid_counter++;
-
-	atomic_set(&nn->nfs4_client_count, 0);
-	si_meminfo(&si);
-	max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024);
-	max_clients *= NFS4_CLIENTS_PER_GB;
-	nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
-
-	atomic_set(&nn->nfsd_courtesy_clients, 0);
-}
-
 static void init_nfs4_replay(struct nfs4_replay *rp)
 {
 	rp->rp_status = nfserr_serverfault;
@@ -4711,80 +4447,55 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
 		nfs4_put_stid(&last->st_stid);
 }
 
-static noinline_for_stack struct nfs4_file *
-nfsd4_file_hash_lookup(const struct svc_fh *fhp)
+/* search file_hashtbl[] for file */
+static struct nfs4_file *
+find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
 {
-	struct inode *inode = d_inode(fhp->fh_dentry);
-	struct rhlist_head *tmp, *list;
-	struct nfs4_file *fi;
+	struct nfs4_file *fp;
 
-	rcu_read_lock();
-	list = rhltable_lookup(&nfs4_file_rhltable, &inode,
-			       nfs4_file_rhash_params);
-	rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
-		if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
-			if (refcount_inc_not_zero(&fi->fi_ref)) {
-				rcu_read_unlock();
-				return fi;
-			}
+	hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
+				lockdep_is_held(&state_lock)) {
+		if (fh_match(&fp->fi_fhandle, fh)) {
+			if (refcount_inc_not_zero(&fp->fi_ref))
+				return fp;
 		}
 	}
-	rcu_read_unlock();
 	return NULL;
 }
 
-/*
- * On hash insertion, identify entries with the same inode but
- * distinct filehandles. They will all be on the list returned
- * by rhltable_lookup().
- *
- * inode->i_lock prevents racing insertions from adding an entry
- * for the same inode/fhp pair twice.
- */
-static noinline_for_stack struct nfs4_file *
-nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp)
+struct nfs4_file *
+find_file(struct knfsd_fh *fh)
 {
-	struct inode *inode = d_inode(fhp->fh_dentry);
-	struct rhlist_head *tmp, *list;
-	struct nfs4_file *ret = NULL;
-	bool alias_found = false;
-	struct nfs4_file *fi;
-	int err;
+	struct nfs4_file *fp;
+	unsigned int hashval = file_hashval(fh);
 
 	rcu_read_lock();
-	spin_lock(&inode->i_lock);
-
-	list = rhltable_lookup(&nfs4_file_rhltable, &inode,
-			       nfs4_file_rhash_params);
-	rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
-		if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
-			if (refcount_inc_not_zero(&fi->fi_ref))
-				ret = fi;
-		} else
-			fi->fi_aliased = alias_found = true;
-	}
-	if (ret)
-		goto out_unlock;
-
-	nfsd4_file_init(fhp, new);
-	err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist,
-			      nfs4_file_rhash_params);
-	if (err)
-		goto out_unlock;
-
-	new->fi_aliased = alias_found;
-	ret = new;
-
-out_unlock:
-	spin_unlock(&inode->i_lock);
+	fp = find_file_locked(fh, hashval);
 	rcu_read_unlock();
-	return ret;
+	return fp;
 }
 
-static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi)
+static struct nfs4_file *
+find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
 {
-	rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist,
-			nfs4_file_rhash_params);
+	struct nfs4_file *fp;
+	unsigned int hashval = file_hashval(fh);
+
+	rcu_read_lock();
+	fp = find_file_locked(fh, hashval);
+	rcu_read_unlock();
+	if (fp)
+		return fp;
+
+	spin_lock(&state_lock);
+	fp = find_file_locked(fh, hashval);
+	if (likely(fp == NULL)) {
+		nfsd4_init_file(fh, hashval, new);
+		fp = new;
+	}
+	spin_unlock(&state_lock);
+
+	return fp;
 }
 
 /*
@@ -4797,10 +4508,9 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	struct nfs4_file *fp;
 	__be32 ret = nfs_ok;
 
-	fp = nfsd4_file_hash_lookup(current_fh);
+	fp = find_file(&current_fh->fh_handle);
 	if (!fp)
 		return ret;
-
 	/* Check for conflicting share reservations */
 	spin_lock(&fp->fi_lock);
 	if (fp->fi_share_deny & deny_type)
@@ -4810,35 +4520,6 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	return ret;
 }
 
-static bool nfsd4_deleg_present(const struct inode *inode)
-{
-	struct file_lock_context *ctx = locks_inode_context(inode);
-
-	return ctx && !list_empty_careful(&ctx->flc_lease);
-}
-
-/**
- * nfsd_wait_for_delegreturn - wait for delegations to be returned
- * @rqstp: the RPC transaction being executed
- * @inode: in-core inode of the file being waited for
- *
- * The timeout prevents deadlock if all nfsd threads happen to be
- * tied up waiting for returning delegations.
- *
- * Return values:
- *   %true: delegation was returned
- *   %false: timed out waiting for delegreturn
- */
-bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode)
-{
-	long __maybe_unused timeo;
-
-	timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode),
-				       NFSD_DELEGRETURN_TIMEOUT);
-	trace_nfsd_delegret_wakeup(rqstp, inode, timeo);
-	return timeo > 0;
-}
-
 static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
 {
 	struct nfs4_delegation *dp = cb_to_delegation(cb);
@@ -4867,8 +4548,6 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
 {
 	struct nfs4_delegation *dp = cb_to_delegation(cb);
 
-	trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
-
 	if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
 	    dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
 	        return 1;
@@ -4914,30 +4593,22 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
 	 * We're assuming the state code never drops its reference
 	 * without first removing the lease.  Since we're in this lease
 	 * callback (and since the lease code is serialized by the
-	 * flc_lock) we know the server hasn't removed the lease yet, and
+	 * i_lock) we know the server hasn't removed the lease yet, and
 	 * we know it's safe to take a reference.
 	 */
 	refcount_inc(&dp->dl_stid.sc_count);
-	WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
+	nfsd4_run_cb(&dp->dl_recall);
 }
 
-/* Called from break_lease() with flc_lock held. */
+/* Called from break_lease() with i_lock held. */
 static bool
 nfsd_break_deleg_cb(struct file_lock *fl)
 {
+	bool ret = false;
 	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
 	struct nfs4_file *fp = dp->dl_stid.sc_file;
-	struct nfs4_client *clp = dp->dl_stid.sc_client;
-	struct nfsd_net *nn;
 
-	trace_nfsd_cb_recall(&dp->dl_stid);
-
-	dp->dl_recalled = true;
-	atomic_inc(&clp->cl_delegs_in_recall);
-	if (try_to_expire_client(clp)) {
-		nn = net_generic(clp->net, nfsd_net_id);
-		mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
-	}
+	trace_nfsd_deleg_break(&dp->dl_stid.sc_stateid);
 
 	/*
 	 * We don't want the locks code to timeout the lease for us;
@@ -4946,9 +4617,11 @@ nfsd_break_deleg_cb(struct file_lock *fl)
 	 */
 	fl->fl_break_time = 0;
 
+	spin_lock(&fp->fi_lock);
 	fp->fi_had_conflict = true;
 	nfsd_break_one_deleg(dp);
-	return false;
+	spin_unlock(&fp->fi_lock);
+	return ret;
 }
 
 /**
@@ -4979,14 +4652,9 @@ static int
 nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
 		     struct list_head *dispose)
 {
-	struct nfs4_delegation *dp = (struct nfs4_delegation *)onlist->fl_owner;
-	struct nfs4_client *clp = dp->dl_stid.sc_client;
-
-	if (arg & F_UNLCK) {
-		if (dp->dl_recalled)
-			atomic_dec(&clp->cl_delegs_in_recall);
+	if (arg & F_UNLCK)
 		return lease_modify(onlist, arg, dispose);
-	} else
+	else
 		return -EAGAIN;
 }
 
@@ -5007,37 +4675,40 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
 	return nfserr_bad_seqid;
 }
 
-static struct nfs4_client *lookup_clientid(clientid_t *clid, bool sessions,
-						struct nfsd_net *nn)
+static __be32 lookup_clientid(clientid_t *clid,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd_net *nn,
+		bool sessions)
 {
 	struct nfs4_client *found;
 
-	spin_lock(&nn->client_lock);
-	found = find_confirmed_client(clid, sessions, nn);
-	if (found)
-		atomic_inc(&found->cl_rpc_users);
-	spin_unlock(&nn->client_lock);
-	return found;
-}
-
-static __be32 set_client(clientid_t *clid,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd_net *nn)
-{
 	if (cstate->clp) {
-		if (!same_clid(&cstate->clp->cl_clientid, clid))
+		found = cstate->clp;
+		if (!same_clid(&found->cl_clientid, clid))
 			return nfserr_stale_clientid;
 		return nfs_ok;
 	}
+
 	if (STALE_CLIENTID(clid, nn))
 		return nfserr_stale_clientid;
+
 	/*
-	 * We're in the 4.0 case (otherwise the SEQUENCE op would have
-	 * set cstate->clp), so session = false:
+	 * For v4.1+ we get the client in the SEQUENCE op. If we don't have one
+	 * cached already then we know this is for is for v4.0 and "sessions"
+	 * will be false.
 	 */
-	cstate->clp = lookup_clientid(clid, false, nn);
-	if (!cstate->clp)
+	WARN_ON_ONCE(cstate->session);
+	spin_lock(&nn->client_lock);
+	found = find_confirmed_client(clid, sessions, nn);
+	if (!found) {
+		spin_unlock(&nn->client_lock);
 		return nfserr_expired;
+	}
+	atomic_inc(&found->cl_rpc_users);
+	spin_unlock(&nn->client_lock);
+
+	/* Cache the nfs4_client in cstate! */
+	cstate->clp = found;
 	return nfs_ok;
 }
 
@@ -5051,6 +4722,8 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 	struct nfs4_openowner *oo = NULL;
 	__be32 status;
 
+	if (STALE_CLIENTID(&open->op_clientid, nn))
+		return nfserr_stale_clientid;
 	/*
 	 * In case we need it later, after we've already created the
 	 * file and don't want to risk a further failure:
@@ -5059,7 +4732,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
 	if (open->op_file == NULL)
 		return nfserr_jukebox;
 
-	status = set_client(clientid, cstate, nn);
+	status = lookup_clientid(clientid, cstate, nn, false);
 	if (status)
 		return status;
 	clp = cstate->clp;
@@ -5183,19 +4856,16 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
 		.ia_valid = ATTR_SIZE,
 		.ia_size = 0,
 	};
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &iattr,
-	};
 	if (!open->op_truncate)
 		return 0;
 	if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
 		return nfserr_inval;
-	return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0);
+	return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0);
 }
 
 static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
 		struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
-		struct nfsd4_open *open, bool new_stp)
+		struct nfsd4_open *open)
 {
 	struct nfsd_file *nf = NULL;
 	__be32 status;
@@ -5211,13 +4881,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
 	 */
 	status = nfs4_file_check_deny(fp, open->op_share_deny);
 	if (status != nfs_ok) {
-		if (status != nfserr_share_denied) {
-			spin_unlock(&fp->fi_lock);
-			goto out;
-		}
-		if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
-				stp, open->op_share_deny, false))
-			status = nfserr_jukebox;
 		spin_unlock(&fp->fi_lock);
 		goto out;
 	}
@@ -5225,13 +4888,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
 	/* set access to the file */
 	status = nfs4_file_get_access(fp, open->op_share_access);
 	if (status != nfs_ok) {
-		if (status != nfserr_share_denied) {
-			spin_unlock(&fp->fi_lock);
-			goto out;
-		}
-		if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
-				stp, open->op_share_access, true))
-			status = nfserr_jukebox;
 		spin_unlock(&fp->fi_lock);
 		goto out;
 	}
@@ -5247,12 +4903,9 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
 
 	if (!fp->fi_fds[oflag]) {
 		spin_unlock(&fp->fi_lock);
-
-		status = nfsd_file_acquire_opened(rqstp, cur_fh, access,
-						  open->op_filp, &nf);
-		if (status != nfs_ok)
+		status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
+		if (status)
 			goto out_put_access;
-
 		spin_lock(&fp->fi_lock);
 		if (!fp->fi_fds[oflag]) {
 			fp->fi_fds[oflag] = nf;
@@ -5281,30 +4934,21 @@ out_put_access:
 }
 
 static __be32
-nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp,
-		struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
-		struct nfsd4_open *open)
+nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
 {
 	__be32 status;
 	unsigned char old_deny_bmap = stp->st_deny_bmap;
 
 	if (!test_access(open->op_share_access, stp))
-		return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false);
+		return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
 
 	/* test and set deny mode */
 	spin_lock(&fp->fi_lock);
 	status = nfs4_file_check_deny(fp, open->op_share_deny);
-	switch (status) {
-	case nfs_ok:
+	if (status == nfs_ok) {
 		set_deny(open->op_share_deny, stp);
 		fp->fi_share_deny |=
-			(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
-		break;
-	case nfserr_share_denied:
-		if (nfs4_resolve_deny_conflicts_locked(fp, false,
-				stp, open->op_share_deny, false))
-			status = nfserr_jukebox;
-		break;
+				(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
 	}
 	spin_unlock(&fp->fi_lock);
 
@@ -5348,118 +4992,11 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
 	return fl;
 }
 
-static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
-					 struct nfs4_file *fp)
-{
-	struct nfs4_ol_stateid *st;
-	struct file *f = fp->fi_deleg_file->nf_file;
-	struct inode *ino = locks_inode(f);
-	int writes;
-
-	writes = atomic_read(&ino->i_writecount);
-	if (!writes)
-		return 0;
-	/*
-	 * There could be multiple filehandles (hence multiple
-	 * nfs4_files) referencing this file, but that's not too
-	 * common; let's just give up in that case rather than
-	 * trying to go look up all the clients using that other
-	 * nfs4_file as well:
-	 */
-	if (fp->fi_aliased)
-		return -EAGAIN;
-	/*
-	 * If there's a close in progress, make sure that we see it
-	 * clear any fi_fds[] entries before we see it decrement
-	 * i_writecount:
-	 */
-	smp_mb__after_atomic();
-
-	if (fp->fi_fds[O_WRONLY])
-		writes--;
-	if (fp->fi_fds[O_RDWR])
-		writes--;
-	if (writes > 0)
-		return -EAGAIN; /* There may be non-NFSv4 writers */
-	/*
-	 * It's possible there are non-NFSv4 write opens in progress,
-	 * but if they haven't incremented i_writecount yet then they
-	 * also haven't called break lease yet; so, they'll break this
-	 * lease soon enough.  So, all that's left to check for is NFSv4
-	 * opens:
-	 */
-	spin_lock(&fp->fi_lock);
-	list_for_each_entry(st, &fp->fi_stateids, st_perfile) {
-		if (st->st_openstp == NULL /* it's an open */ &&
-		    access_permit_write(st) &&
-		    st->st_stid.sc_client != clp) {
-			spin_unlock(&fp->fi_lock);
-			return -EAGAIN;
-		}
-	}
-	spin_unlock(&fp->fi_lock);
-	/*
-	 * There's a small chance that we could be racing with another
-	 * NFSv4 open.  However, any open that hasn't added itself to
-	 * the fi_stateids list also hasn't called break_lease yet; so,
-	 * they'll break this lease soon enough.
-	 */
-	return 0;
-}
-
-/*
- * It's possible that between opening the dentry and setting the delegation,
- * that it has been renamed or unlinked. Redo the lookup to verify that this
- * hasn't happened.
- */
-static int
-nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,
-			  struct svc_fh *parent)
-{
-	struct svc_export *exp;
-	struct dentry *child;
-	__be32 err;
-
-	err = nfsd_lookup_dentry(open->op_rqstp, parent,
-				 open->op_fname, open->op_fnamelen,
-				 &exp, &child);
-
-	if (err)
-		return -EAGAIN;
-
-	exp_put(exp);
-	dput(child);
-	if (child != file_dentry(fp->fi_deleg_file->nf_file))
-		return -EAGAIN;
-
-	return 0;
-}
-
-/*
- * We avoid breaking delegations held by a client due to its own activity, but
- * clearing setuid/setgid bits on a write is an implicit activity and the client
- * may not notice and continue using the old mode. Avoid giving out a delegation
- * on setuid/setgid files when the client is requesting an open for write.
- */
-static int
-nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf)
-{
-	struct inode *inode = file_inode(nf->nf_file);
-
-	if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) &&
-	    (inode->i_mode & (S_ISUID|S_ISGID)))
-		return -EAGAIN;
-	return 0;
-}
-
 static struct nfs4_delegation *
-nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
-		    struct svc_fh *parent)
+nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
+		    struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
 {
 	int status = 0;
-	struct nfs4_client *clp = stp->st_stid.sc_client;
-	struct nfs4_file *fp = stp->st_stid.sc_file;
-	struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
 	struct nfs4_delegation *dp;
 	struct nfsd_file *nf;
 	struct file_lock *fl;
@@ -5474,19 +5011,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 
 	nf = find_readable_file(fp);
 	if (!nf) {
-		/*
-		 * We probably could attempt another open and get a read
-		 * delegation, but for now, don't bother until the
-		 * client actually sends us one.
-		 */
-		return ERR_PTR(-EAGAIN);
+		/* We should always have a readable file here */
+		WARN_ON_ONCE(1);
+		return ERR_PTR(-EBADF);
 	}
 	spin_lock(&state_lock);
 	spin_lock(&fp->fi_lock);
 	if (nfs4_delegation_exists(clp, fp))
 		status = -EAGAIN;
-	else if (nfsd4_verify_setuid_write(open, nf))
-		status = -EAGAIN;
 	else if (!fp->fi_deleg_file) {
 		fp->fi_deleg_file = nf;
 		/* increment early to prevent fi_deleg_file from being
@@ -5503,7 +5035,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 		return ERR_PTR(status);
 
 	status = -ENOMEM;
-	dp = alloc_init_deleg(clp, fp, odstate);
+	dp = alloc_init_deleg(clp, fp, fh, odstate);
 	if (!dp)
 		goto out_delegees;
 
@@ -5517,31 +5049,12 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 	if (status)
 		goto out_clnt_odstate;
 
-	if (parent) {
-		status = nfsd4_verify_deleg_dentry(open, fp, parent);
-		if (status)
-			goto out_unlock;
-	}
-
-	status = nfsd4_check_conflicting_opens(clp, fp);
-	if (status)
-		goto out_unlock;
-
-	/*
-	 * Now that the deleg is set, check again to ensure that nothing
-	 * raced in and changed the mode while we weren't lookng.
-	 */
-	status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file);
-	if (status)
-		goto out_unlock;
-
-	status = -EAGAIN;
-	if (fp->fi_had_conflict)
-		goto out_unlock;
-
 	spin_lock(&state_lock);
 	spin_lock(&fp->fi_lock);
-	status = hash_delegation_locked(dp, fp);
+	if (fp->fi_had_conflict)
+		status = -EAGAIN;
+	else
+		status = hash_delegation_locked(dp, fp);
 	spin_unlock(&fp->fi_lock);
 	spin_unlock(&state_lock);
 
@@ -5587,13 +5100,12 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
  * proper support for them.
  */
 static void
-nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
-		     struct svc_fh *currentfh)
+nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
+			struct nfs4_ol_stateid *stp)
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
 	struct nfs4_client *clp = stp->st_stid.sc_client;
-	struct svc_fh *parent = NULL;
 	int cb_up;
 	int status = 0;
 
@@ -5607,8 +5119,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 				goto out_no_deleg;
 			break;
 		case NFS4_OPEN_CLAIM_NULL:
-			parent = currentfh;
-			fallthrough;
 		case NFS4_OPEN_CLAIM_FH:
 			/*
 			 * Let's not give out any delegations till everyone's
@@ -5619,11 +5129,22 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
 				goto out_no_deleg;
 			if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
 				goto out_no_deleg;
+			/*
+			 * Also, if the file was opened for write or
+			 * create, there's a good chance the client's
+			 * about to write to it, resulting in an
+			 * immediate recall (since we don't support
+			 * write delegations):
+			 */
+			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+				goto out_no_deleg;
+			if (open->op_create == NFS4_OPEN_CREATE)
+				goto out_no_deleg;
 			break;
 		default:
 			goto out_no_deleg;
 	}
-	dp = nfs4_set_delegation(open, stp, parent);
+	dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
 	if (IS_ERR(dp))
 		goto out_no_deleg;
 
@@ -5665,18 +5186,6 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
 	 */
 }
 
-/**
- * nfsd4_process_open2 - finish open processing
- * @rqstp: the RPC transaction being executed
- * @current_fh: NFSv4 COMPOUND's current filehandle
- * @open: OPEN arguments
- *
- * If successful, (1) truncate the file if open->op_truncate was
- * set, (2) set open->op_stateid, (3) set open->op_delegation.
- *
- * Returns %nfs_ok on success; otherwise an nfs4stat value in
- * network byte order is returned.
- */
 __be32
 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
@@ -5693,9 +5202,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	 * and check for delegations in the process of being recalled.
 	 * If not found, create the nfs4_file struct
 	 */
-	fp = nfsd4_file_hash_insert(open->op_file, current_fh);
-	if (unlikely(!fp))
-		return nfserr_jukebox;
+	fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
 	if (fp != open->op_file) {
 		status = nfs4_check_deleg(cl, open, &dp);
 		if (status)
@@ -5728,7 +5235,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 			goto out;
 		}
 	} else {
-		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
+		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
 		if (status) {
 			stp->st_stid.sc_type = NFS4_CLOSED_STID;
 			release_open_stateid(stp);
@@ -5757,7 +5264,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	* Attempt to hand out a delegation. No error return, because the
 	* OPEN succeeds even if we fail.
 	*/
-	nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
+	nfs4_open_delegation(current_fh, open, stp);
 nodeleg:
 	status = nfs_ok;
 	trace_nfsd_open(&stp->st_stid.sc_stateid);
@@ -5815,14 +5322,17 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	trace_nfsd_clid_renew(clid);
-	status = set_client(clid, cstate, nn);
+	status = lookup_clientid(clid, cstate, nn, false);
 	if (status)
-		return status;
+		goto out;
 	clp = cstate->clp;
+	status = nfserr_cb_path_down;
 	if (!list_empty(&clp->cl_delegations)
 			&& clp->cl_cb_state != NFSD4_CB_UP)
-		return nfserr_cb_path_down;
-	return nfs_ok;
+		goto out;
+	status = nfs_ok;
+out:
+	return status;
 }
 
 void
@@ -5883,245 +5393,66 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
 	return true;
 }
 
-struct laundry_time {
-	time64_t cutoff;
-	time64_t new_timeo;
-};
-
-static bool state_expired(struct laundry_time *lt, time64_t last_refresh)
-{
-	time64_t time_remaining;
-
-	if (last_refresh < lt->cutoff)
-		return true;
-	time_remaining = last_refresh - lt->cutoff;
-	lt->new_timeo = min(lt->new_timeo, time_remaining);
-	return false;
-}
-
-#ifdef CONFIG_NFSD_V4_2_INTER_SSC
-void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
-{
-	spin_lock_init(&nn->nfsd_ssc_lock);
-	INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list);
-	init_waitqueue_head(&nn->nfsd_ssc_waitq);
-}
-EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
-
-/*
- * This is called when nfsd is being shutdown, after all inter_ssc
- * cleanup were done, to destroy the ssc delayed unmount list.
- */
-static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
-{
-	struct nfsd4_ssc_umount_item *ni = NULL;
-	struct nfsd4_ssc_umount_item *tmp;
-
-	spin_lock(&nn->nfsd_ssc_lock);
-	list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
-		list_del(&ni->nsui_list);
-		spin_unlock(&nn->nfsd_ssc_lock);
-		mntput(ni->nsui_vfsmount);
-		kfree(ni);
-		spin_lock(&nn->nfsd_ssc_lock);
-	}
-	spin_unlock(&nn->nfsd_ssc_lock);
-}
-
-static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
-{
-	bool do_wakeup = false;
-	struct nfsd4_ssc_umount_item *ni = NULL;
-	struct nfsd4_ssc_umount_item *tmp;
-
-	spin_lock(&nn->nfsd_ssc_lock);
-	list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
-		if (time_after(jiffies, ni->nsui_expire)) {
-			if (refcount_read(&ni->nsui_refcnt) > 1)
-				continue;
-
-			/* mark being unmount */
-			ni->nsui_busy = true;
-			spin_unlock(&nn->nfsd_ssc_lock);
-			mntput(ni->nsui_vfsmount);
-			spin_lock(&nn->nfsd_ssc_lock);
-
-			/* waiters need to start from begin of list */
-			list_del(&ni->nsui_list);
-			kfree(ni);
-
-			/* wakeup ssc_connect waiters */
-			do_wakeup = true;
-			continue;
-		}
-		break;
-	}
-	if (do_wakeup)
-		wake_up_all(&nn->nfsd_ssc_waitq);
-	spin_unlock(&nn->nfsd_ssc_lock);
-}
-#endif
-
-/* Check if any lock belonging to this lockowner has any blockers */
-static bool
-nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
-{
-	struct file_lock_context *ctx;
-	struct nfs4_ol_stateid *stp;
-	struct nfs4_file *nf;
-
-	list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
-		nf = stp->st_stid.sc_file;
-		ctx = locks_inode_context(nf->fi_inode);
-		if (!ctx)
-			continue;
-		if (locks_owner_has_blockers(ctx, lo))
-			return true;
-	}
-	return false;
-}
-
-static bool
-nfs4_anylock_blockers(struct nfs4_client *clp)
-{
-	int i;
-	struct nfs4_stateowner *so;
-	struct nfs4_lockowner *lo;
-
-	if (atomic_read(&clp->cl_delegs_in_recall))
-		return true;
-	spin_lock(&clp->cl_lock);
-	for (i = 0; i < OWNER_HASH_SIZE; i++) {
-		list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i],
-				so_strhash) {
-			if (so->so_is_open_owner)
-				continue;
-			lo = lockowner(so);
-			if (nfs4_lockowner_has_blockers(lo)) {
-				spin_unlock(&clp->cl_lock);
-				return true;
-			}
-		}
-	}
-	spin_unlock(&clp->cl_lock);
-	return false;
-}
-
-static void
-nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
-				struct laundry_time *lt)
-{
-	unsigned int maxreap, reapcnt = 0;
-	struct list_head *pos, *next;
-	struct nfs4_client *clp;
-
-	maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ?
-			NFSD_CLIENT_MAX_TRIM_PER_RUN : 0;
-	INIT_LIST_HEAD(reaplist);
-	spin_lock(&nn->client_lock);
-	list_for_each_safe(pos, next, &nn->client_lru) {
-		clp = list_entry(pos, struct nfs4_client, cl_lru);
-		if (clp->cl_state == NFSD4_EXPIRABLE)
-			goto exp_client;
-		if (!state_expired(lt, clp->cl_time))
-			break;
-		if (!atomic_read(&clp->cl_rpc_users)) {
-			if (clp->cl_state == NFSD4_ACTIVE)
-				atomic_inc(&nn->nfsd_courtesy_clients);
-			clp->cl_state = NFSD4_COURTESY;
-		}
-		if (!client_has_state(clp))
-			goto exp_client;
-		if (!nfs4_anylock_blockers(clp))
-			if (reapcnt >= maxreap)
-				continue;
-exp_client:
-		if (!mark_client_expired_locked(clp)) {
-			list_add(&clp->cl_lru, reaplist);
-			reapcnt++;
-		}
-	}
-	spin_unlock(&nn->client_lock);
-}
-
-static void
-nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn,
-				struct list_head *reaplist)
-{
-	unsigned int maxreap = 0, reapcnt = 0;
-	struct list_head *pos, *next;
-	struct nfs4_client *clp;
-
-	maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN;
-	INIT_LIST_HEAD(reaplist);
-
-	spin_lock(&nn->client_lock);
-	list_for_each_safe(pos, next, &nn->client_lru) {
-		clp = list_entry(pos, struct nfs4_client, cl_lru);
-		if (clp->cl_state == NFSD4_ACTIVE)
-			break;
-		if (reapcnt >= maxreap)
-			break;
-		if (!mark_client_expired_locked(clp)) {
-			list_add(&clp->cl_lru, reaplist);
-			reapcnt++;
-		}
-	}
-	spin_unlock(&nn->client_lock);
-}
-
-static void
-nfs4_process_client_reaplist(struct list_head *reaplist)
-{
-	struct list_head *pos, *next;
-	struct nfs4_client *clp;
-
-	list_for_each_safe(pos, next, reaplist) {
-		clp = list_entry(pos, struct nfs4_client, cl_lru);
-		trace_nfsd_clid_purged(&clp->cl_clientid);
-		list_del_init(&clp->cl_lru);
-		expire_client(clp);
-	}
-}
-
 static time64_t
 nfs4_laundromat(struct nfsd_net *nn)
 {
+	struct nfs4_client *clp;
 	struct nfs4_openowner *oo;
 	struct nfs4_delegation *dp;
 	struct nfs4_ol_stateid *stp;
 	struct nfsd4_blocked_lock *nbl;
 	struct list_head *pos, *next, reaplist;
-	struct laundry_time lt = {
-		.cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease,
-		.new_timeo = nn->nfsd4_lease
-	};
+	time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease;
+	time64_t t, new_timeo = nn->nfsd4_lease;
 	struct nfs4_cpntf_state *cps;
 	copy_stateid_t *cps_t;
 	int i;
 
 	if (clients_still_reclaiming(nn)) {
-		lt.new_timeo = 0;
+		new_timeo = 0;
 		goto out;
 	}
 	nfsd4_end_grace(nn);
+	INIT_LIST_HEAD(&reaplist);
 
 	spin_lock(&nn->s2s_cp_lock);
 	idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
 		cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
-		if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
-				state_expired(&lt, cps->cpntf_time))
+		if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
+				cps->cpntf_time < cutoff)
 			_free_cpntf_state_locked(nn, cps);
 	}
 	spin_unlock(&nn->s2s_cp_lock);
-	nfs4_get_client_reaplist(nn, &reaplist, &lt);
-	nfs4_process_client_reaplist(&reaplist);
 
+	spin_lock(&nn->client_lock);
+	list_for_each_safe(pos, next, &nn->client_lru) {
+		clp = list_entry(pos, struct nfs4_client, cl_lru);
+		if (clp->cl_time > cutoff) {
+			t = clp->cl_time - cutoff;
+			new_timeo = min(new_timeo, t);
+			break;
+		}
+		if (mark_client_expired_locked(clp)) {
+			trace_nfsd_clid_expired(&clp->cl_clientid);
+			continue;
+		}
+		list_add(&clp->cl_lru, &reaplist);
+	}
+	spin_unlock(&nn->client_lock);
+	list_for_each_safe(pos, next, &reaplist) {
+		clp = list_entry(pos, struct nfs4_client, cl_lru);
+		trace_nfsd_clid_purged(&clp->cl_clientid);
+		list_del_init(&clp->cl_lru);
+		expire_client(clp);
+	}
 	spin_lock(&state_lock);
 	list_for_each_safe(pos, next, &nn->del_recall_lru) {
 		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-		if (!state_expired(&lt, dp->dl_time))
+		if (dp->dl_time > cutoff) {
+			t = dp->dl_time - cutoff;
+			new_timeo = min(new_timeo, t);
 			break;
+		}
 		WARN_ON(!unhash_delegation_locked(dp));
 		list_add(&dp->dl_recall_lru, &reaplist);
 	}
@@ -6137,8 +5468,11 @@ nfs4_laundromat(struct nfsd_net *nn)
 	while (!list_empty(&nn->close_lru)) {
 		oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
 					oo_close_lru);
-		if (!state_expired(&lt, oo->oo_time))
+		if (oo->oo_time > cutoff) {
+			t = oo->oo_time - cutoff;
+			new_timeo = min(new_timeo, t);
 			break;
+		}
 		list_del_init(&oo->oo_close_lru);
 		stp = oo->oo_last_closed_stid;
 		oo->oo_last_closed_stid = NULL;
@@ -6164,8 +5498,11 @@ nfs4_laundromat(struct nfsd_net *nn)
 	while (!list_empty(&nn->blocked_locks_lru)) {
 		nbl = list_first_entry(&nn->blocked_locks_lru,
 					struct nfsd4_blocked_lock, nbl_lru);
-		if (!state_expired(&lt, nbl->nbl_time))
+		if (nbl->nbl_time > cutoff) {
+			t = nbl->nbl_time - cutoff;
+			new_timeo = min(new_timeo, t);
 			break;
+		}
 		list_move(&nbl->nbl_lru, &reaplist);
 		list_del_init(&nbl->nbl_list);
 	}
@@ -6177,14 +5514,12 @@ nfs4_laundromat(struct nfsd_net *nn)
 		list_del_init(&nbl->nbl_lru);
 		free_blocked_lock(nbl);
 	}
-#ifdef CONFIG_NFSD_V4_2_INTER_SSC
-	/* service the server-to-server copy delayed unmount list */
-	nfsd4_ssc_expire_umount(nn);
-#endif
 out:
-	return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
+	new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
+	return new_timeo;
 }
 
+static struct workqueue_struct *laundry_wq;
 static void laundromat_main(struct work_struct *);
 
 static void
@@ -6199,63 +5534,6 @@ laundromat_main(struct work_struct *laundry)
 	queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
 }
 
-static void
-courtesy_client_reaper(struct nfsd_net *nn)
-{
-	struct list_head reaplist;
-
-	nfs4_get_courtesy_client_reaplist(nn, &reaplist);
-	nfs4_process_client_reaplist(&reaplist);
-}
-
-static void
-deleg_reaper(struct nfsd_net *nn)
-{
-	struct list_head *pos, *next;
-	struct nfs4_client *clp;
-	struct list_head cblist;
-
-	INIT_LIST_HEAD(&cblist);
-	spin_lock(&nn->client_lock);
-	list_for_each_safe(pos, next, &nn->client_lru) {
-		clp = list_entry(pos, struct nfs4_client, cl_lru);
-		if (clp->cl_state != NFSD4_ACTIVE ||
-			list_empty(&clp->cl_delegations) ||
-			atomic_read(&clp->cl_delegs_in_recall) ||
-			test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
-			(ktime_get_boottime_seconds() -
-				clp->cl_ra_time < 5)) {
-			continue;
-		}
-		list_add(&clp->cl_ra_cblist, &cblist);
-
-		/* release in nfsd4_cb_recall_any_release */
-		atomic_inc(&clp->cl_rpc_users);
-		set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
-		clp->cl_ra_time = ktime_get_boottime_seconds();
-	}
-	spin_unlock(&nn->client_lock);
-
-	while (!list_empty(&cblist)) {
-		clp = list_first_entry(&cblist, struct nfs4_client,
-					cl_ra_cblist);
-		list_del_init(&clp->cl_ra_cblist);
-		clp->cl_ra->ra_keep = 0;
-		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
-		nfsd4_run_cb(&clp->cl_ra->ra_cb);
-	}
-}
-
-static void
-nfsd4_state_shrinker_worker(struct work_struct *work)
-{
-	struct nfsd_net *nn = container_of(work, struct nfsd_net,
-				nfsd_shrinker_work);
-
-	courtesy_client_reaper(nn);
-	deleg_reaper(nn);
-}
-
 static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
 {
 	if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -6263,6 +5541,21 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
 	return nfs_ok;
 }
 
+static inline int
+access_permit_read(struct nfs4_ol_stateid *stp)
+{
+	return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
+		test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
+		test_access(NFS4_SHARE_ACCESS_WRITE, stp);
+}
+
+static inline int
+access_permit_write(struct nfs4_ol_stateid *stp)
+{
+	return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
+		test_access(NFS4_SHARE_ACCESS_BOTH, stp);
+}
+
 static
 __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
 {
@@ -6399,7 +5692,6 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		     struct nfs4_stid **s, struct nfsd_net *nn)
 {
 	__be32 status;
-	struct nfs4_stid *stid;
 	bool return_revoked = false;
 
 	/*
@@ -6414,7 +5706,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
 		CLOSE_STATEID(stateid))
 		return nfserr_bad_stateid;
-	status = set_client(&stateid->si_opaque.so_clid, cstate, nn);
+	status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn,
+				 false);
 	if (status == nfserr_stale_clientid) {
 		if (cstate->session)
 			return nfserr_bad_stateid;
@@ -6422,16 +5715,15 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 	}
 	if (status)
 		return status;
-	stid = find_stateid_by_type(cstate->clp, stateid, typemask);
-	if (!stid)
+	*s = find_stateid_by_type(cstate->clp, stateid, typemask);
+	if (!*s)
 		return nfserr_bad_stateid;
-	if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
-		nfs4_put_stid(stid);
+	if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+		nfs4_put_stid(*s);
 		if (cstate->minorversion)
 			return nfserr_deleg_revoked;
 		return nfserr_bad_stateid;
 	}
-	*s = stid;
 	return nfs_ok;
 }
 
@@ -6496,12 +5788,12 @@ out:
 static void
 _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
 {
-	WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID);
-	if (!refcount_dec_and_test(&cps->cp_stateid.cs_count))
+	WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID);
+	if (!refcount_dec_and_test(&cps->cp_stateid.sc_count))
 		return;
 	list_del(&cps->cp_list);
 	idr_remove(&nn->s2s_cp_stateids,
-		   cps->cp_stateid.cs_stid.si_opaque.so_id);
+		   cps->cp_stateid.stid.si_opaque.so_id);
 	kfree(cps);
 }
 /*
@@ -6523,12 +5815,12 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
 	if (cps_t) {
 		state = container_of(cps_t, struct nfs4_cpntf_state,
 				     cp_stateid);
-		if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) {
+		if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) {
 			state = NULL;
 			goto unlock;
 		}
 		if (!clp)
-			refcount_inc(&state->cp_stateid.cs_count);
+			refcount_inc(&state->cp_stateid.sc_count);
 		else
 			_free_cpntf_state_locked(nn, state);
 	}
@@ -6546,27 +5838,21 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
 {
 	__be32 status;
 	struct nfs4_cpntf_state *cps = NULL;
-	struct nfs4_client *found;
+	struct nfsd4_compound_state cstate;
 
 	status = manage_cpntf_state(nn, st, NULL, &cps);
 	if (status)
 		return status;
 
 	cps->cpntf_time = ktime_get_boottime_seconds();
-
-	status = nfserr_expired;
-	found = lookup_clientid(&cps->cp_p_clid, true, nn);
-	if (!found)
+	memset(&cstate, 0, sizeof(cstate));
+	status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true);
+	if (status)
 		goto out;
-
-	*stid = find_stateid_by_type(found, &cps->cp_p_stateid,
-			NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID);
-	if (*stid)
-		status = nfs_ok;
-	else
-		status = nfserr_bad_stateid;
-
-	put_client_renew(found);
+	status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid,
+				NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+				stid, nn);
+	put_client_renew(cstate.clp);
 out:
 	nfs4_put_cpntf_state(nn, cps);
 	return status;
@@ -6601,11 +5887,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 		return nfserr_grace;
 
 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
-		if (cstid)
-			status = nfserr_bad_stateid;
-		else
-			status = check_special_stateids(net, fhp, stateid,
-									flags);
+		status = check_special_stateids(net, fhp, stateid, flags);
 		goto done;
 	}
 
@@ -6659,7 +5941,7 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
 	struct nfsd4_test_stateid_id *stateid;
-	struct nfs4_client *cl = cstate->clp;
+	struct nfs4_client *cl = cstate->session->se_client;
 
 	list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
 		stateid->ts_id_status =
@@ -6705,7 +5987,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	stateid_t *stateid = &free_stateid->fr_stateid;
 	struct nfs4_stid *s;
 	struct nfs4_delegation *dp;
-	struct nfs4_client *cl = cstate->clp;
+	struct nfs4_client *cl = cstate->session->se_client;
 	__be32 ret = nfserr_bad_stateid;
 
 	spin_lock(&cl->cl_lock);
@@ -7034,8 +6316,6 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto put_stateid;
 
-	trace_nfsd_deleg_return(stateid);
-	wake_up_var(d_inode(cstate->current_fh.fh_dentry));
 	destroy_delegation(dp);
 put_stateid:
 	nfs4_put_stid(&dp->dl_stid);
@@ -7043,6 +6323,15 @@ out:
 	return status;
 }
 
+static inline u64
+end_offset(u64 start, u64 len)
+{
+	u64 end;
+
+	end = start + len;
+	return end >= start ? end: NFS4_MAX_UINT64;
+}
+
 /* last octet in a range */
 static inline u64
 last_byte_offset(u64 start, u64 len)
@@ -7072,7 +6361,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 }
 
 static fl_owner_t
-nfsd4_lm_get_owner(fl_owner_t owner)
+nfsd4_fl_get_owner(fl_owner_t owner)
 {
 	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
 
@@ -7081,7 +6370,7 @@ nfsd4_lm_get_owner(fl_owner_t owner)
 }
 
 static void
-nfsd4_lm_put_owner(fl_owner_t owner)
+nfsd4_fl_put_owner(fl_owner_t owner)
 {
 	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
 
@@ -7089,29 +6378,6 @@ nfsd4_lm_put_owner(fl_owner_t owner)
 		nfs4_put_stateowner(&lo->lo_owner);
 }
 
-/* return pointer to struct nfs4_client if client is expirable */
-static bool
-nfsd4_lm_lock_expirable(struct file_lock *cfl)
-{
-	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)cfl->fl_owner;
-	struct nfs4_client *clp = lo->lo_owner.so_client;
-	struct nfsd_net *nn;
-
-	if (try_to_expire_client(clp)) {
-		nn = net_generic(clp->net, nfsd_net_id);
-		mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
-		return true;
-	}
-	return false;
-}
-
-/* schedule laundromat to run immediately and wait for it to complete */
-static void
-nfsd4_lm_expire_lock(void)
-{
-	flush_workqueue(laundry_wq);
-}
-
 static void
 nfsd4_lm_notify(struct file_lock *fl)
 {
@@ -7131,19 +6397,14 @@ nfsd4_lm_notify(struct file_lock *fl)
 	}
 	spin_unlock(&nn->blocked_locks_lock);
 
-	if (queue) {
-		trace_nfsd_cb_notify_lock(lo, nbl);
+	if (queue)
 		nfsd4_run_cb(&nbl->nbl_cb);
-	}
 }
 
 static const struct lock_manager_operations nfsd_posix_mng_ops  = {
-	.lm_mod_owner = THIS_MODULE,
 	.lm_notify = nfsd4_lm_notify,
-	.lm_get_owner = nfsd4_lm_get_owner,
-	.lm_put_owner = nfsd4_lm_put_owner,
-	.lm_lock_expirable = nfsd4_lm_lock_expirable,
-	.lm_expire_lock = nfsd4_lm_expire_lock,
+	.lm_get_owner = nfsd4_fl_get_owner,
+	.lm_put_owner = nfsd4_fl_put_owner,
 };
 
 static inline void
@@ -7458,9 +6719,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		if (nfsd4_has_session(cstate))
 			/* See rfc 5661 18.10.3: given clientid is ignored: */
 			memcpy(&lock->lk_new_clientid,
-				&cstate->clp->cl_clientid,
+				&cstate->session->se_client->cl_clientid,
 				sizeof(clientid_t));
 
+		status = nfserr_stale_clientid;
+		if (STALE_CLIENTID(&lock->lk_new_clientid, nn))
+			goto out;
+
 		/* validate and update open stateid and open seqid */
 		status = nfs4_preprocess_confirmed_seqid_op(cstate,
 				        lock->lk_new_open_seqid,
@@ -7498,9 +6763,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (!locks_in_grace(net) && lock->lk_reclaim)
 		goto out;
 
-	if (lock->lk_reclaim)
-		fl_flags |= FL_RECLAIM;
-
 	fp = lock_stp->st_stid.sc_file;
 	switch (lock->lk_type) {
 		case NFS4_READW_LT:
@@ -7537,16 +6799,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		goto out;
 	}
 
-	/*
-	 * Most filesystems with their own ->lock operations will block
-	 * the nfsd thread waiting to acquire the lock.  That leads to
-	 * deadlocks (we don't want every nfsd thread tied up waiting
-	 * for file locks), so don't attempt blocking lock notifications
-	 * on those filesystems:
-	 */
-	if (nf->nf_file->f_op->lock)
-		fl_flags &= ~FL_SLEEP;
-
 	nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
 	if (!nbl) {
 		dprintk("NFSD: %s: unable to allocate block!\n", __func__);
@@ -7577,7 +6829,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		spin_lock(&nn->blocked_locks_lock);
 		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
 		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
-		kref_get(&nbl->nbl_kref);
 		spin_unlock(&nn->blocked_locks_lock);
 	}
 
@@ -7590,7 +6841,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			nn->somebody_reclaimed = true;
 		break;
 	case FILE_LOCK_DEFERRED:
-		kref_put(&nbl->nbl_kref, free_nbl);
 		nbl = NULL;
 		fallthrough;
 	case -EAGAIN:		/* conflock holds conflicting lock */
@@ -7611,13 +6861,8 @@ out:
 		/* dequeue it if we queued it before */
 		if (fl_flags & FL_SLEEP) {
 			spin_lock(&nn->blocked_locks_lock);
-			if (!list_empty(&nbl->nbl_list) &&
-			    !list_empty(&nbl->nbl_lru)) {
-				list_del_init(&nbl->nbl_list);
-				list_del_init(&nbl->nbl_lru);
-				kref_put(&nbl->nbl_kref, free_nbl);
-			}
-			/* nbl can use one of lists to be linked to reaplist */
+			list_del_init(&nbl->nbl_list);
+			list_del_init(&nbl->nbl_lru);
 			spin_unlock(&nn->blocked_locks_lock);
 		}
 		free_blocked_lock(nbl);
@@ -7658,22 +6903,21 @@ out:
 static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
 {
 	struct nfsd_file *nf;
-	struct inode *inode;
 	__be32 err;
 
 	err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
 	if (err)
 		return err;
-	inode = fhp->fh_dentry->d_inode;
-	inode_lock(inode); /* to block new leases till after test_lock: */
-	err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+	fh_lock(fhp); /* to block new leases till after test_lock: */
+	err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode,
+							NFSD_MAY_READ));
 	if (err)
 		goto out;
 	lock->fl_file = nf->nf_file;
 	err = nfserrno(vfs_test_lock(nf->nf_file, lock));
 	lock->fl_file = NULL;
 out:
-	inode_unlock(inode);
+	fh_unlock(fhp);
 	nfsd_file_put(nf);
 	return err;
 }
@@ -7698,7 +6942,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 return nfserr_inval;
 
 	if (!nfsd4_has_session(cstate)) {
-		status = set_client(&lockt->lt_clientid, cstate, nn);
+		status = lookup_clientid(&lockt->lt_clientid, cstate, nn,
+					 false);
 		if (status)
 			goto out;
 	}
@@ -7835,20 +7080,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 {
 	struct file_lock *fl;
 	int status = false;
-	struct nfsd_file *nf;
+	struct nfsd_file *nf = find_any_file(fp);
 	struct inode *inode;
 	struct file_lock_context *flctx;
 
-	spin_lock(&fp->fi_lock);
-	nf = find_any_file_locked(fp);
 	if (!nf) {
 		/* Any valid lock stateid should have some sort of access */
 		WARN_ON_ONCE(1);
-		goto out;
+		return status;
 	}
 
 	inode = locks_inode(nf->nf_file);
-	flctx = locks_inode_context(inode);
+	flctx = inode->i_flctx;
 
 	if (flctx && !list_empty_careful(&flctx->flc_posix)) {
 		spin_lock(&flctx->flc_lock);
@@ -7860,62 +7103,57 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 		}
 		spin_unlock(&flctx->flc_lock);
 	}
-out:
-	spin_unlock(&fp->fi_lock);
+	nfsd_file_put(nf);
 	return status;
 }
 
-/**
- * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations
- * @rqstp: RPC transaction
- * @cstate: NFSv4 COMPOUND state
- * @u: RELEASE_LOCKOWNER arguments
- *
- * Check if theree are any locks still held and if not - free the lockowner
- * and any lock state that is owned.
- *
- * Return values:
- *   %nfs_ok: lockowner released or not found
- *   %nfserr_locks_held: lockowner still in use
- *   %nfserr_stale_clientid: clientid no longer active
- *   %nfserr_expired: clientid not recognized
- */
 __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 			struct nfsd4_compound_state *cstate,
 			union nfsd4_op_u *u)
 {
 	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
-	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	clientid_t *clid = &rlockowner->rl_clientid;
+	struct nfs4_stateowner *sop;
+	struct nfs4_lockowner *lo = NULL;
 	struct nfs4_ol_stateid *stp;
-	struct nfs4_lockowner *lo;
-	struct nfs4_client *clp;
-	LIST_HEAD(reaplist);
+	struct xdr_netobj *owner = &rlockowner->rl_owner;
+	unsigned int hashval = ownerstr_hashval(owner);
 	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfs4_client *clp;
+	LIST_HEAD (reaplist);
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
 		clid->cl_boot, clid->cl_id);
 
-	status = set_client(clid, cstate, nn);
+	status = lookup_clientid(clid, cstate, nn, false);
 	if (status)
 		return status;
+
 	clp = cstate->clp;
-
+	/* Find the matching lock stateowner */
 	spin_lock(&clp->cl_lock);
-	lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
-	if (!lo) {
-		spin_unlock(&clp->cl_lock);
-		return nfs_ok;
-	}
+	list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
+			    so_strhash) {
 
-	list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
-		if (check_for_locks(stp->st_stid.sc_file, lo)) {
+		if (sop->so_is_open_owner || !same_owner_str(sop, owner))
+			continue;
+
+		if (atomic_read(&sop->so_count) != 1) {
 			spin_unlock(&clp->cl_lock);
-			nfs4_put_stateowner(&lo->lo_owner);
 			return nfserr_locks_held;
 		}
+
+		lo = lockowner(sop);
+		nfs4_get_stateowner(sop);
+		break;
 	}
+	if (!lo) {
+		spin_unlock(&clp->cl_lock);
+		return status;
+	}
+
 	unhash_lockowner_locked(lo);
 	while (!list_empty(&lo->lo_owner.so_stateids)) {
 		stp = list_first_entry(&lo->lo_owner.so_stateids,
@@ -7925,11 +7163,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		put_ol_stateid_locked(stp, &reaplist);
 	}
 	spin_unlock(&clp->cl_lock);
-
 	free_ol_stateid_reaplist(&reaplist);
 	remove_blocked_locks(lo);
 	nfs4_put_stateowner(&lo->lo_owner);
-	return nfs_ok;
+
+	return status;
 }
 
 static inline struct nfs4_client_reclaim *
@@ -8018,13 +7256,25 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn)
 	return NULL;
 }
 
+/*
+* Called from OPEN. Look for clientid in reclaim list.
+*/
 __be32
-nfs4_check_open_reclaim(struct nfs4_client *clp)
+nfs4_check_open_reclaim(clientid_t *clid,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd_net *nn)
 {
-	if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags))
+	__be32 status;
+
+	/* find clientid in conf_id_hashtbl */
+	status = lookup_clientid(clid, cstate, nn, false);
+	if (status)
+		return nfserr_reclaim_bad;
+
+	if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags))
 		return nfserr_no_grace;
 
-	if (nfsd4_client_record_check(clp))
+	if (nfsd4_client_record_check(cstate->clp))
 		return nfserr_reclaim_bad;
 
 	return nfs_ok;
@@ -8095,20 +7345,10 @@ static int nfs4_state_create_net(struct net *net)
 	INIT_LIST_HEAD(&nn->blocked_locks_lru);
 
 	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
-	INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
 	get_net(net);
 
-	nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
-	nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
-	nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
-
-	if (register_shrinker(&nn->nfsd_client_shrinker))
-		goto err_shrinker;
 	return 0;
 
-err_shrinker:
-	put_net(net);
-	kfree(nn->sessionid_hashtbl);
 err_sessionid:
 	kfree(nn->unconf_id_hashtbl);
 err_unconf_id:
@@ -8180,18 +7420,22 @@ nfs4_state_start(void)
 {
 	int ret;
 
-	ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
-	if (ret)
-		return ret;
-
-	ret = nfsd4_create_callback_queue();
-	if (ret) {
-		rhltable_destroy(&nfs4_file_rhltable);
-		return ret;
+	laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
+	if (laundry_wq == NULL) {
+		ret = -ENOMEM;
+		goto out;
 	}
+	ret = nfsd4_create_callback_queue();
+	if (ret)
+		goto out_free_laundry;
 
 	set_max_delegations();
 	return 0;
+
+out_free_laundry:
+	destroy_workqueue(laundry_wq);
+out:
+	return ret;
 }
 
 void
@@ -8201,8 +7445,6 @@ nfs4_state_shutdown_net(struct net *net)
 	struct list_head *pos, *next, reaplist;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	unregister_shrinker(&nn->nfsd_client_shrinker);
-	cancel_work(&nn->nfsd_shrinker_work);
 	cancel_delayed_work_sync(&nn->laundromat_work);
 	locks_end_grace(&nn->nfsd4_manager);
 
@@ -8222,16 +7464,13 @@ nfs4_state_shutdown_net(struct net *net)
 
 	nfsd4_client_tracking_exit(net);
 	nfs4_state_destroy_net(net);
-#ifdef CONFIG_NFSD_V4_2_INTER_SSC
-	nfsd4_ssc_shutdown_umount(nn);
-#endif
 }
 
 void
 nfs4_state_shutdown(void)
 {
+	destroy_workqueue(laundry_wq);
 	nfsd4_destroy_callback_queue();
-	rhltable_destroy(&nfs4_file_rhltable);
 }
 
 static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5a68c6286492..dbfa24cf3390 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -42,8 +42,6 @@
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/xattr.h>
-#include <linux/vmalloc.h>
-
 #include <uapi/linux/xattr.h>
 
 #include "idmap.h"
@@ -56,8 +54,6 @@
 #include "pnfs.h"
 #include "filecache.h"
 
-#include "trace.h"
-
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
 #endif
@@ -94,8 +90,6 @@ check_filename(char *str, int len)
 
 	if (len == 0)
 		return nfserr_inval;
-	if (len > NFS4_MAXNAMLEN)
-		return nfserr_nametoolong;
 	if (isdotent(str, len))
 		return nfserr_badname;
 	for (i = 0; i < len; i++)
@@ -104,6 +98,122 @@ check_filename(char *str, int len)
 	return 0;
 }
 
+#define DECODE_HEAD				\
+	__be32 *p;				\
+	__be32 status
+#define DECODE_TAIL				\
+	status = 0;				\
+out:						\
+	return status;				\
+xdr_error:					\
+	dprintk("NFSD: xdr error (%s:%d)\n",	\
+			__FILE__, __LINE__);	\
+	status = nfserr_bad_xdr;		\
+	goto out
+
+#define READMEM(x,nbytes) do {			\
+	x = (char *)p;				\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+#define SAVEMEM(x,nbytes) do {			\
+	if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
+ 		savemem(argp, p, nbytes) :	\
+ 		(char *)p)) {			\
+		dprintk("NFSD: xdr error (%s:%d)\n", \
+				__FILE__, __LINE__); \
+		goto xdr_error;			\
+		}				\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+#define COPYMEM(x,nbytes) do {			\
+	memcpy((x), p, nbytes);			\
+	p += XDR_QUADLEN(nbytes);		\
+} while (0)
+
+/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */
+#define READ_BUF(nbytes)  do {			\
+	if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) {	\
+		p = argp->p;			\
+		argp->p += XDR_QUADLEN(nbytes);	\
+	} else if (!(p = read_buf(argp, nbytes))) { \
+		dprintk("NFSD: xdr error (%s:%d)\n", \
+				__FILE__, __LINE__); \
+		goto xdr_error;			\
+	}					\
+} while (0)
+
+static void next_decode_page(struct nfsd4_compoundargs *argp)
+{
+	argp->p = page_address(argp->pagelist[0]);
+	argp->pagelist++;
+	if (argp->pagelen < PAGE_SIZE) {
+		argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
+		argp->pagelen = 0;
+	} else {
+		argp->end = argp->p + (PAGE_SIZE>>2);
+		argp->pagelen -= PAGE_SIZE;
+	}
+}
+
+static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
+{
+	/* We want more bytes than seem to be available.
+	 * Maybe we need a new page, maybe we have just run out
+	 */
+	unsigned int avail = (char *)argp->end - (char *)argp->p;
+	__be32 *p;
+
+	if (argp->pagelen == 0) {
+		struct kvec *vec = &argp->rqstp->rq_arg.tail[0];
+
+		if (!argp->tail) {
+			argp->tail = true;
+			avail = vec->iov_len;
+			argp->p = vec->iov_base;
+			argp->end = vec->iov_base + avail;
+		}
+
+		if (avail < nbytes)
+			return NULL;
+
+		p = argp->p;
+		argp->p += XDR_QUADLEN(nbytes);
+		return p;
+	}
+
+	if (avail + argp->pagelen < nbytes)
+		return NULL;
+	if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
+		return NULL;
+	/* ok, we can do it with the current plus the next page */
+	if (nbytes <= sizeof(argp->tmp))
+		p = argp->tmp;
+	else {
+		kfree(argp->tmpp);
+		p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL);
+		if (!p)
+			return NULL;
+		
+	}
+	/*
+	 * The following memcpy is safe because read_buf is always
+	 * called with nbytes > avail, and the two cases above both
+	 * guarantee p points to at least nbytes bytes.
+	 */
+	memcpy(p, argp->p, avail);
+	next_decode_page(argp);
+	memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
+	argp->p += XDR_QUADLEN(nbytes - avail);
+	return p;
+}
+
+static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
+{
+	unsigned int this = (char *)argp->end - (char *)argp->p;
+
+	return this + argp->pagelen;
+}
+
 static int zero_clientid(clientid_t *clid)
 {
 	return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -149,246 +259,118 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
 	return p;
 }
 
-static void *
-svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len)
+static __be32
+svcxdr_construct_vector(struct nfsd4_compoundargs *argp, struct kvec *head,
+			struct page ***pagelist, u32 buflen)
 {
-	__be32 *tmp;
+	int avail;
+	int len;
+	int pages;
 
-	/*
-	 * The location of the decoded data item is stable,
-	 * so @p is OK to use. This is the common case.
+	/* Sorry .. no magic macros for this.. *
+	 * READ_BUF(write->wr_buflen);
+	 * SAVEMEM(write->wr_buf, write->wr_buflen);
 	 */
-	if (p != argp->xdr->scratch.iov_base)
-		return p;
+	avail = (char *)argp->end - (char *)argp->p;
+	if (avail + argp->pagelen < buflen) {
+		dprintk("NFSD: xdr error (%s:%d)\n",
+			       __FILE__, __LINE__);
+		return nfserr_bad_xdr;
+	}
+	head->iov_base = argp->p;
+	head->iov_len = avail;
+	*pagelist = argp->pagelist;
 
-	tmp = svcxdr_tmpalloc(argp, len);
-	if (!tmp)
+	len = XDR_QUADLEN(buflen) << 2;
+	if (len >= avail) {
+		len -= avail;
+
+		pages = len >> PAGE_SHIFT;
+		argp->pagelist += pages;
+		argp->pagelen -= pages * PAGE_SIZE;
+		len -= pages * PAGE_SIZE;
+
+		next_decode_page(argp);
+	}
+	argp->p += XDR_QUADLEN(len);
+
+	return 0;
+}
+
+/**
+ * savemem - duplicate a chunk of memory for later processing
+ * @argp: NFSv4 compound argument structure to be freed with
+ * @p: pointer to be duplicated
+ * @nbytes: length to be duplicated
+ *
+ * Returns a pointer to a copy of @nbytes bytes of memory at @p
+ * that are preserved until processing of the NFSv4 compound
+ * operation described by @argp finishes.
+ */
+static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
+{
+	void *ret;
+
+	ret = svcxdr_tmpalloc(argp, nbytes);
+	if (!ret)
 		return NULL;
-	memcpy(tmp, p, len);
-	return tmp;
-}
-
-/*
- * NFSv4 basic data type decoders
- */
-
-/*
- * This helper handles variable-length opaques which belong to protocol
- * elements that this implementation does not support.
- */
-static __be32
-nfsd4_decode_ignored_string(struct nfsd4_compoundargs *argp, u32 maxlen)
-{
-	u32 len;
-
-	if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
-		return nfserr_bad_xdr;
-	if (maxlen && len > maxlen)
-		return nfserr_bad_xdr;
-	if (!xdr_inline_decode(argp->xdr, len))
-		return nfserr_bad_xdr;
-
-	return nfs_ok;
+	memcpy(ret, p, nbytes);
+	return ret;
 }
 
 static __be32
-nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
+nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
 {
-	__be32 *p;
-	u32 len;
+	DECODE_HEAD;
 
-	if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
-		return nfserr_bad_xdr;
-	if (len == 0 || len > NFS4_OPAQUE_LIMIT)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, len);
-	if (!p)
-		return nfserr_bad_xdr;
-	o->data = svcxdr_savemem(argp, p, len);
-	if (!o->data)
-		return nfserr_jukebox;
-	o->len = len;
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp)
-{
-	__be32 *p, status;
-
-	if (xdr_stream_decode_u32(argp->xdr, lenp) < 0)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, *lenp);
-	if (!p)
-		return nfserr_bad_xdr;
-	status = check_filename((char *)p, *lenp);
-	if (status)
-		return status;
-	*namp = svcxdr_savemem(argp, p, *lenp);
-	if (!*namp)
-		return nfserr_jukebox;
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_nfstime4(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
-{
-	__be32 *p;
-
-	p = xdr_inline_decode(argp->xdr, XDR_UNIT * 3);
-	if (!p)
-		return nfserr_bad_xdr;
+	READ_BUF(12);
 	p = xdr_decode_hyper(p, &tv->tv_sec);
 	tv->tv_nsec = be32_to_cpup(p++);
 	if (tv->tv_nsec >= (u32)1000000000)
 		return nfserr_inval;
-	return nfs_ok;
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf)
+nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 {
-	__be32 *p;
+	u32 bmlen;
+	DECODE_HEAD;
 
-	p = xdr_inline_decode(argp->xdr, NFS4_VERIFIER_SIZE);
-	if (!p)
-		return nfserr_bad_xdr;
-	memcpy(verf->data, p, sizeof(verf->data));
-	return nfs_ok;
-}
+	bmval[0] = 0;
+	bmval[1] = 0;
+	bmval[2] = 0;
 
-/**
- * nfsd4_decode_bitmap4 - Decode an NFSv4 bitmap4
- * @argp: NFSv4 compound argument structure
- * @bmval: pointer to an array of u32's to decode into
- * @bmlen: size of the @bmval array
- *
- * The server needs to return nfs_ok rather than nfserr_bad_xdr when
- * encountering bitmaps containing bits it does not recognize. This
- * includes bits in bitmap words past WORDn, where WORDn is the last
- * bitmap WORD the implementation currently supports. Thus we are
- * careful here to simply ignore bits in bitmap words that this
- * implementation has yet to support explicitly.
- *
- * Return values:
- *   %nfs_ok: @bmval populated successfully
- *   %nfserr_bad_xdr: the encoded bitmap was invalid
- */
-static __be32
-nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
-{
-	ssize_t status;
+	READ_BUF(4);
+	bmlen = be32_to_cpup(p++);
+	if (bmlen > 1000)
+		goto xdr_error;
 
-	status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen);
-	return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok;
+	READ_BUF(bmlen << 2);
+	if (bmlen > 0)
+		bmval[0] = be32_to_cpup(p++);
+	if (bmlen > 1)
+		bmval[1] = be32_to_cpup(p++);
+	if (bmlen > 2)
+		bmval[2] = be32_to_cpup(p++);
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_nfsace4(struct nfsd4_compoundargs *argp, struct nfs4_ace *ace)
+nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
+		   struct iattr *iattr, struct nfs4_acl **acl,
+		   struct xdr_netobj *label, int *umask)
 {
-	__be32 *p, status;
-	u32 length;
-
-	if (xdr_stream_decode_u32(argp->xdr, &ace->type) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &ace->flag) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &ace->access_mask) < 0)
-		return nfserr_bad_xdr;
-
-	if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, length);
-	if (!p)
-		return nfserr_bad_xdr;
-	ace->whotype = nfs4_acl_get_whotype((char *)p, length);
-	if (ace->whotype != NFS4_ACL_WHO_NAMED)
-		status = nfs_ok;
-	else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
-		status = nfsd_map_name_to_gid(argp->rqstp,
-				(char *)p, length, &ace->who_gid);
-	else
-		status = nfsd_map_name_to_uid(argp->rqstp,
-				(char *)p, length, &ace->who_uid);
-
-	return status;
-}
-
-/* A counted array of nfsace4's */
-static noinline __be32
-nfsd4_decode_acl(struct nfsd4_compoundargs *argp, struct nfs4_acl **acl)
-{
-	struct nfs4_ace *ace;
-	__be32 status;
-	u32 count;
-
-	if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
-		return nfserr_bad_xdr;
-
-	if (count > xdr_stream_remaining(argp->xdr) / 20)
-		/*
-		 * Even with 4-byte names there wouldn't be
-		 * space for that many aces; something fishy is
-		 * going on:
-		 */
-		return nfserr_fbig;
-
-	*acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(count));
-	if (*acl == NULL)
-		return nfserr_jukebox;
-
-	(*acl)->naces = count;
-	for (ace = (*acl)->aces; ace < (*acl)->aces + count; ace++) {
-		status = nfsd4_decode_nfsace4(argp, ace);
-		if (status)
-			return status;
-	}
-
-	return nfs_ok;
-}
-
-static noinline __be32
-nfsd4_decode_security_label(struct nfsd4_compoundargs *argp,
-			    struct xdr_netobj *label)
-{
-	u32 lfs, pi, length;
-	__be32 *p;
-
-	if (xdr_stream_decode_u32(argp->xdr, &lfs) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &pi) < 0)
-		return nfserr_bad_xdr;
-
-	if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
-		return nfserr_bad_xdr;
-	if (length > NFS4_MAXLABELLEN)
-		return nfserr_badlabel;
-	p = xdr_inline_decode(argp->xdr, length);
-	if (!p)
-		return nfserr_bad_xdr;
-	label->len = length;
-	label->data = svcxdr_dupstr(argp, p, length);
-	if (!label->data)
-		return nfserr_jukebox;
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
-		    struct iattr *iattr, struct nfs4_acl **acl,
-		    struct xdr_netobj *label, int *umask)
-{
-	unsigned int starting_pos;
-	u32 attrlist4_count;
-	__be32 *p, status;
+	int expected_len, len = 0;
+	u32 dummy32;
+	char *buf;
 
+	DECODE_HEAD;
 	iattr->ia_valid = 0;
-	status = nfsd4_decode_bitmap4(argp, bmval, bmlen);
-	if (status)
-		return nfserr_bad_xdr;
+	if ((status = nfsd4_decode_bitmap(argp, bmval)))
+		return status;
 
 	if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
 	    || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
@@ -398,69 +380,96 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
 		return nfserr_attrnotsupp;
 	}
 
-	if (xdr_stream_decode_u32(argp->xdr, &attrlist4_count) < 0)
-		return nfserr_bad_xdr;
-	starting_pos = xdr_stream_pos(argp->xdr);
+	READ_BUF(4);
+	expected_len = be32_to_cpup(p++);
 
 	if (bmval[0] & FATTR4_WORD0_SIZE) {
-		u64 size;
-
-		if (xdr_stream_decode_u64(argp->xdr, &size) < 0)
-			return nfserr_bad_xdr;
-		iattr->ia_size = size;
+		READ_BUF(8);
+		len += 8;
+		p = xdr_decode_hyper(p, &iattr->ia_size);
 		iattr->ia_valid |= ATTR_SIZE;
 	}
 	if (bmval[0] & FATTR4_WORD0_ACL) {
-		status = nfsd4_decode_acl(argp, acl);
-		if (status)
-			return status;
+		u32 nace;
+		struct nfs4_ace *ace;
+
+		READ_BUF(4); len += 4;
+		nace = be32_to_cpup(p++);
+
+		if (nace > compoundargs_bytes_left(argp)/20)
+			/*
+			 * Even with 4-byte names there wouldn't be
+			 * space for that many aces; something fishy is
+			 * going on:
+			 */
+			return nfserr_fbig;
+
+		*acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
+		if (*acl == NULL)
+			return nfserr_jukebox;
+
+		(*acl)->naces = nace;
+		for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
+			READ_BUF(16); len += 16;
+			ace->type = be32_to_cpup(p++);
+			ace->flag = be32_to_cpup(p++);
+			ace->access_mask = be32_to_cpup(p++);
+			dummy32 = be32_to_cpup(p++);
+			READ_BUF(dummy32);
+			len += XDR_QUADLEN(dummy32) << 2;
+			READMEM(buf, dummy32);
+			ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
+			status = nfs_ok;
+			if (ace->whotype != NFS4_ACL_WHO_NAMED)
+				;
+			else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
+				status = nfsd_map_name_to_gid(argp->rqstp,
+						buf, dummy32, &ace->who_gid);
+			else
+				status = nfsd_map_name_to_uid(argp->rqstp,
+						buf, dummy32, &ace->who_uid);
+			if (status)
+				return status;
+		}
 	} else
 		*acl = NULL;
 	if (bmval[1] & FATTR4_WORD1_MODE) {
-		u32 mode;
-
-		if (xdr_stream_decode_u32(argp->xdr, &mode) < 0)
-			return nfserr_bad_xdr;
-		iattr->ia_mode = mode;
+		READ_BUF(4);
+		len += 4;
+		iattr->ia_mode = be32_to_cpup(p++);
 		iattr->ia_mode &= (S_IFMT | S_IALLUGO);
 		iattr->ia_valid |= ATTR_MODE;
 	}
 	if (bmval[1] & FATTR4_WORD1_OWNER) {
-		u32 length;
-
-		if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
-			return nfserr_bad_xdr;
-		p = xdr_inline_decode(argp->xdr, length);
-		if (!p)
-			return nfserr_bad_xdr;
-		status = nfsd_map_name_to_uid(argp->rqstp, (char *)p, length,
-					      &iattr->ia_uid);
-		if (status)
+		READ_BUF(4);
+		len += 4;
+		dummy32 = be32_to_cpup(p++);
+		READ_BUF(dummy32);
+		len += (XDR_QUADLEN(dummy32) << 2);
+		READMEM(buf, dummy32);
+		if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
 			return status;
 		iattr->ia_valid |= ATTR_UID;
 	}
 	if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
-		u32 length;
-
-		if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
-			return nfserr_bad_xdr;
-		p = xdr_inline_decode(argp->xdr, length);
-		if (!p)
-			return nfserr_bad_xdr;
-		status = nfsd_map_name_to_gid(argp->rqstp, (char *)p, length,
-					      &iattr->ia_gid);
-		if (status)
+		READ_BUF(4);
+		len += 4;
+		dummy32 = be32_to_cpup(p++);
+		READ_BUF(dummy32);
+		len += (XDR_QUADLEN(dummy32) << 2);
+		READMEM(buf, dummy32);
+		if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
 			return status;
 		iattr->ia_valid |= ATTR_GID;
 	}
 	if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
-		u32 set_it;
-
-		if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0)
-			return nfserr_bad_xdr;
-		switch (set_it) {
+		READ_BUF(4);
+		len += 4;
+		dummy32 = be32_to_cpup(p++);
+		switch (dummy32) {
 		case NFS4_SET_TO_CLIENT_TIME:
-			status = nfsd4_decode_nfstime4(argp, &iattr->ia_atime);
+			len += 12;
+			status = nfsd4_decode_time(argp, &iattr->ia_atime);
 			if (status)
 				return status;
 			iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -469,26 +478,17 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
 			iattr->ia_valid |= ATTR_ATIME;
 			break;
 		default:
-			return nfserr_bad_xdr;
+			goto xdr_error;
 		}
 	}
-	if (bmval[1] & FATTR4_WORD1_TIME_CREATE) {
-		struct timespec64 ts;
-
-		/* No Linux filesystem supports setting this attribute. */
-		bmval[1] &= ~FATTR4_WORD1_TIME_CREATE;
-		status = nfsd4_decode_nfstime4(argp, &ts);
-		if (status)
-			return status;
-	}
 	if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
-		u32 set_it;
-
-		if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0)
-			return nfserr_bad_xdr;
-		switch (set_it) {
+		READ_BUF(4);
+		len += 4;
+		dummy32 = be32_to_cpup(p++);
+		switch (dummy32) {
 		case NFS4_SET_TO_CLIENT_TIME:
-			status = nfsd4_decode_nfstime4(argp, &iattr->ia_mtime);
+			len += 12;
+			status = nfsd4_decode_time(argp, &iattr->ia_mtime);
 			if (status)
 				return status;
 			iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -497,335 +497,222 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
 			iattr->ia_valid |= ATTR_MTIME;
 			break;
 		default:
-			return nfserr_bad_xdr;
+			goto xdr_error;
 		}
 	}
+
 	label->len = 0;
 	if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) &&
 	    bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
-		status = nfsd4_decode_security_label(argp, label);
-		if (status)
-			return status;
+		READ_BUF(4);
+		len += 4;
+		dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */
+		READ_BUF(4);
+		len += 4;
+		dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */
+		READ_BUF(4);
+		len += 4;
+		dummy32 = be32_to_cpup(p++);
+		READ_BUF(dummy32);
+		if (dummy32 > NFS4_MAXLABELLEN)
+			return nfserr_badlabel;
+		len += (XDR_QUADLEN(dummy32) << 2);
+		READMEM(buf, dummy32);
+		label->len = dummy32;
+		label->data = svcxdr_dupstr(argp, buf, dummy32);
+		if (!label->data)
+			return nfserr_jukebox;
 	}
 	if (bmval[2] & FATTR4_WORD2_MODE_UMASK) {
-		u32 mode, mask;
-
 		if (!umask)
-			return nfserr_bad_xdr;
-		if (xdr_stream_decode_u32(argp->xdr, &mode) < 0)
-			return nfserr_bad_xdr;
-		iattr->ia_mode = mode & (S_IFMT | S_IALLUGO);
-		if (xdr_stream_decode_u32(argp->xdr, &mask) < 0)
-			return nfserr_bad_xdr;
-		*umask = mask & S_IRWXUGO;
+			goto xdr_error;
+		READ_BUF(8);
+		len += 8;
+		dummy32 = be32_to_cpup(p++);
+		iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO);
+		dummy32 = be32_to_cpup(p++);
+		*umask = dummy32 & S_IRWXUGO;
 		iattr->ia_valid |= ATTR_MODE;
 	}
+	if (len != expected_len)
+		goto xdr_error;
 
-	/* request sanity: did attrlist4 contain the expected number of words? */
-	if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos)
-		return nfserr_bad_xdr;
-
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_stateid4(struct nfsd4_compoundargs *argp, stateid_t *sid)
+nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid)
 {
-	__be32 *p;
+	DECODE_HEAD;
 
-	p = xdr_inline_decode(argp->xdr, NFS4_STATEID_SIZE);
-	if (!p)
-		return nfserr_bad_xdr;
+	READ_BUF(sizeof(stateid_t));
 	sid->si_generation = be32_to_cpup(p++);
-	memcpy(&sid->si_opaque, p, sizeof(sid->si_opaque));
-	return nfs_ok;
+	COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_clientid4(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
 {
-	__be32 *p;
+	DECODE_HEAD;
 
-	p = xdr_inline_decode(argp->xdr, sizeof(__be64));
-	if (!p)
-		return nfserr_bad_xdr;
-	memcpy(clientid, p, sizeof(*clientid));
-	return nfs_ok;
+	READ_BUF(4);
+	access->ac_req_access = be32_to_cpup(p++);
+
+	DECODE_TAIL;
 }
 
-static __be32
-nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp,
-			  clientid_t *clientid, struct xdr_netobj *owner)
+static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
 {
-	__be32 status;
-
-	status = nfsd4_decode_clientid4(argp, clientid);
-	if (status)
-		return status;
-	return nfsd4_decode_opaque(argp, owner);
-}
-
-#ifdef CONFIG_NFSD_PNFS
-static __be32
-nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp,
-		       struct nfsd4_deviceid *devid)
-{
-	__be32 *p;
-
-	p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE);
-	if (!p)
-		return nfserr_bad_xdr;
-	memcpy(devid, p, sizeof(*devid));
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
-			   struct nfsd4_layoutcommit *lcp)
-{
-	if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0)
-		return nfserr_bad_xdr;
-	if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES)
-		return nfserr_bad_xdr;
-	if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX)
-		return nfserr_bad_xdr;
-
-	if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
-		return nfserr_bad_xdr;
-	if (lcp->lc_up_len > 0) {
-		lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
-		if (!lcp->lc_up_layout)
-			return nfserr_bad_xdr;
-	}
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_layoutreturn4(struct nfsd4_compoundargs *argp,
-			   struct nfsd4_layoutreturn *lrp)
-{
-	__be32 status;
-
-	if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_return_type) < 0)
-		return nfserr_bad_xdr;
-	switch (lrp->lr_return_type) {
-	case RETURN_FILE:
-		if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.offset) < 0)
-			return nfserr_bad_xdr;
-		if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.length) < 0)
-			return nfserr_bad_xdr;
-		status = nfsd4_decode_stateid4(argp, &lrp->lr_sid);
-		if (status)
-			return status;
-		if (xdr_stream_decode_u32(argp->xdr, &lrp->lrf_body_len) < 0)
-			return nfserr_bad_xdr;
-		if (lrp->lrf_body_len > 0) {
-			lrp->lrf_body = xdr_inline_decode(argp->xdr, lrp->lrf_body_len);
-			if (!lrp->lrf_body)
-				return nfserr_bad_xdr;
-		}
-		break;
-	case RETURN_FSID:
-	case RETURN_ALL:
-		lrp->lr_seg.offset = 0;
-		lrp->lr_seg.length = NFS4_MAX_UINT64;
-		break;
-	default:
-		return nfserr_bad_xdr;
-	}
-
-	return nfs_ok;
-}
-
-#endif /* CONFIG_NFSD_PNFS */
-
-static __be32
-nfsd4_decode_sessionid4(struct nfsd4_compoundargs *argp,
-			struct nfs4_sessionid *sessionid)
-{
-	__be32 *p;
-
-	p = xdr_inline_decode(argp->xdr, NFS4_MAX_SESSIONID_LEN);
-	if (!p)
-		return nfserr_bad_xdr;
-	memcpy(sessionid->data, p, sizeof(sessionid->data));
-	return nfs_ok;
-}
-
-/* Defined in Appendix A of RFC 5531 */
-static __be32
-nfsd4_decode_authsys_parms(struct nfsd4_compoundargs *argp,
-			   struct nfsd4_cb_sec *cbs)
-{
-	u32 stamp, gidcount, uid, gid;
-	__be32 *p, status;
-
-	if (xdr_stream_decode_u32(argp->xdr, &stamp) < 0)
-		return nfserr_bad_xdr;
-	/* machine name */
-	status = nfsd4_decode_ignored_string(argp, 255);
-	if (status)
-		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &uid) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &gid) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &gidcount) < 0)
-		return nfserr_bad_xdr;
-	if (gidcount > 16)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, gidcount << 2);
-	if (!p)
-		return nfserr_bad_xdr;
-	if (cbs->flavor == (u32)(-1)) {
-		struct user_namespace *userns = nfsd_user_namespace(argp->rqstp);
-
-		kuid_t kuid = make_kuid(userns, uid);
-		kgid_t kgid = make_kgid(userns, gid);
-		if (uid_valid(kuid) && gid_valid(kgid)) {
-			cbs->uid = kuid;
-			cbs->gid = kgid;
-			cbs->flavor = RPC_AUTH_UNIX;
-		} else {
-			dprintk("RPC_AUTH_UNIX with invalid uid or gid, ignoring!\n");
-		}
-	}
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_gss_cb_handles4(struct nfsd4_compoundargs *argp,
-			     struct nfsd4_cb_sec *cbs)
-{
-	__be32 status;
-	u32 service;
-
-	dprintk("RPC_AUTH_GSS callback secflavor not supported!\n");
-
-	if (xdr_stream_decode_u32(argp->xdr, &service) < 0)
-		return nfserr_bad_xdr;
-	if (service < RPC_GSS_SVC_NONE || service > RPC_GSS_SVC_PRIVACY)
-		return nfserr_bad_xdr;
-	/* gcbp_handle_from_server */
-	status = nfsd4_decode_ignored_string(argp, 0);
-	if (status)
-		return status;
-	/* gcbp_handle_from_client */
-	status = nfsd4_decode_ignored_string(argp, 0);
-	if (status)
-		return status;
-
-	return nfs_ok;
-}
-
-/* a counted array of callback_sec_parms4 items */
-static __be32
-nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
-{
-	u32 i, secflavor, nr_secflavs;
-	__be32 status;
+	DECODE_HEAD;
+	struct user_namespace *userns = nfsd_user_namespace(argp->rqstp);
+	u32 dummy, uid, gid;
+	char *machine_name;
+	int i;
+	int nr_secflavs;
 
 	/* callback_sec_params4 */
-	if (xdr_stream_decode_u32(argp->xdr, &nr_secflavs) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	nr_secflavs = be32_to_cpup(p++);
 	if (nr_secflavs)
 		cbs->flavor = (u32)(-1);
 	else
 		/* Is this legal? Be generous, take it to mean AUTH_NONE: */
 		cbs->flavor = 0;
-
 	for (i = 0; i < nr_secflavs; ++i) {
-		if (xdr_stream_decode_u32(argp->xdr, &secflavor) < 0)
-			return nfserr_bad_xdr;
-		switch (secflavor) {
+		READ_BUF(4);
+		dummy = be32_to_cpup(p++);
+		switch (dummy) {
 		case RPC_AUTH_NULL:
-			/* void */
+			/* Nothing to read */
 			if (cbs->flavor == (u32)(-1))
 				cbs->flavor = RPC_AUTH_NULL;
 			break;
 		case RPC_AUTH_UNIX:
-			status = nfsd4_decode_authsys_parms(argp, cbs);
-			if (status)
-				return status;
+			READ_BUF(8);
+			/* stamp */
+			dummy = be32_to_cpup(p++);
+
+			/* machine name */
+			dummy = be32_to_cpup(p++);
+			READ_BUF(dummy);
+			SAVEMEM(machine_name, dummy);
+
+			/* uid, gid */
+			READ_BUF(8);
+			uid = be32_to_cpup(p++);
+			gid = be32_to_cpup(p++);
+
+			/* more gids */
+			READ_BUF(4);
+			dummy = be32_to_cpup(p++);
+			READ_BUF(dummy * 4);
+			if (cbs->flavor == (u32)(-1)) {
+				kuid_t kuid = make_kuid(userns, uid);
+				kgid_t kgid = make_kgid(userns, gid);
+				if (uid_valid(kuid) && gid_valid(kgid)) {
+					cbs->uid = kuid;
+					cbs->gid = kgid;
+					cbs->flavor = RPC_AUTH_UNIX;
+				} else {
+					dprintk("RPC_AUTH_UNIX with invalid"
+						"uid or gid ignoring!\n");
+				}
+			}
 			break;
 		case RPC_AUTH_GSS:
-			status = nfsd4_decode_gss_cb_handles4(argp, cbs);
-			if (status)
-				return status;
+			dprintk("RPC_AUTH_GSS callback secflavor "
+				"not supported!\n");
+			READ_BUF(8);
+			/* gcbp_service */
+			dummy = be32_to_cpup(p++);
+			/* gcbp_handle_from_server */
+			dummy = be32_to_cpup(p++);
+			READ_BUF(dummy);
+			p += XDR_QUADLEN(dummy);
+			/* gcbp_handle_from_client */
+			READ_BUF(4);
+			dummy = be32_to_cpup(p++);
+			READ_BUF(dummy);
 			break;
 		default:
+			dprintk("Illegal callback secflavor\n");
 			return nfserr_inval;
 		}
 	}
-
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
-
-/*
- * NFSv4 operation argument decoders
- */
-
-static __be32
-nfsd4_decode_access(struct nfsd4_compoundargs *argp,
-		    union nfsd4_op_u *u)
+static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
 {
-	struct nfsd4_access *access = &u->access;
-	if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0)
-		return nfserr_bad_xdr;
-	return nfs_ok;
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	bc->bc_cb_program = be32_to_cpup(p++);
+	nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
+
+	DECODE_TAIL;
+}
+
+static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
+{
+	DECODE_HEAD;
+
+	READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
+	COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+	bcts->dir = be32_to_cpup(p++);
+	/* XXX: skipping ctsa_use_conn_in_rdma_mode.  Perhaps Tom Tucker
+	 * could help us figure out we should be using it. */
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
 {
-	struct nfsd4_close *close = &u->close;
-	if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0)
-		return nfserr_bad_xdr;
-	return nfsd4_decode_stateid4(argp, &close->cl_stateid);
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	close->cl_seqid = be32_to_cpup(p++);
+	return nfsd4_decode_stateid(argp, &close->cl_stateid);
+
+	DECODE_TAIL;
 }
 
 
 static __be32
-nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
 {
-	struct nfsd4_commit *commit = &u->commit;
-	if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
-		return nfserr_bad_xdr;
-	memset(&commit->co_verf, 0, sizeof(commit->co_verf));
-	return nfs_ok;
+	DECODE_HEAD;
+
+	READ_BUF(12);
+	p = xdr_decode_hyper(p, &commit->co_offset);
+	commit->co_count = be32_to_cpup(p++);
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
 {
-	struct nfsd4_create *create = &u->create;
-	__be32 *p, status;
+	DECODE_HEAD;
 
-	memset(create, 0, sizeof(*create));
-	if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	create->cr_type = be32_to_cpup(p++);
 	switch (create->cr_type) {
 	case NF4LNK:
-		if (xdr_stream_decode_u32(argp->xdr, &create->cr_datalen) < 0)
-			return nfserr_bad_xdr;
-		p = xdr_inline_decode(argp->xdr, create->cr_datalen);
-		if (!p)
-			return nfserr_bad_xdr;
+		READ_BUF(4);
+		create->cr_datalen = be32_to_cpup(p++);
+		READ_BUF(create->cr_datalen);
 		create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
 		if (!create->cr_data)
 			return nfserr_jukebox;
 		break;
 	case NF4BLK:
 	case NF4CHR:
-		if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata1) < 0)
-			return nfserr_bad_xdr;
-		if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata2) < 0)
-			return nfserr_bad_xdr;
+		READ_BUF(8);
+		create->cr_specdata1 = be32_to_cpup(p++);
+		create->cr_specdata2 = be32_to_cpup(p++);
 		break;
 	case NF4SOCK:
 	case NF4FIFO:
@@ -833,221 +720,151 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
 	default:
 		break;
 	}
-	status = nfsd4_decode_component4(argp, &create->cr_name,
-					 &create->cr_namelen);
-	if (status)
-		return status;
-	status = nfsd4_decode_fattr4(argp, create->cr_bmval,
-				    ARRAY_SIZE(create->cr_bmval),
-				    &create->cr_iattr, &create->cr_acl,
-				    &create->cr_label, &create->cr_umask);
-	if (status)
+
+	READ_BUF(4);
+	create->cr_namelen = be32_to_cpup(p++);
+	READ_BUF(create->cr_namelen);
+	SAVEMEM(create->cr_name, create->cr_namelen);
+	if ((status = check_filename(create->cr_name, create->cr_namelen)))
 		return status;
 
-	return nfs_ok;
+	status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
+				    &create->cr_acl, &create->cr_label,
+				    &create->cr_umask);
+	if (status)
+		goto out;
+
+	DECODE_TAIL;
 }
 
 static inline __be32
-nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
 {
-	struct nfsd4_delegreturn *dr = &u->delegreturn;
-	return nfsd4_decode_stateid4(argp, &dr->dr_stateid);
+	return nfsd4_decode_stateid(argp, &dr->dr_stateid);
 }
 
 static inline __be32
-nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
 {
-	struct nfsd4_getattr *getattr = &u->getattr;
-	memset(getattr, 0, sizeof(*getattr));
-	return nfsd4_decode_bitmap4(argp, getattr->ga_bmval,
-				    ARRAY_SIZE(getattr->ga_bmval));
+	return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
 }
 
 static __be32
-nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
 {
-	struct nfsd4_link *link = &u->link;
-	memset(link, 0, sizeof(*link));
-	return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen);
-}
+	DECODE_HEAD;
 
-static __be32
-nfsd4_decode_open_to_lock_owner4(struct nfsd4_compoundargs *argp,
-				 struct nfsd4_lock *lock)
-{
-	__be32 status;
-
-	if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_open_seqid) < 0)
-		return nfserr_bad_xdr;
-	status = nfsd4_decode_stateid4(argp, &lock->lk_new_open_stateid);
-	if (status)
+	READ_BUF(4);
+	link->li_namelen = be32_to_cpup(p++);
+	READ_BUF(link->li_namelen);
+	SAVEMEM(link->li_name, link->li_namelen);
+	if ((status = check_filename(link->li_name, link->li_namelen)))
 		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_lock_seqid) < 0)
-		return nfserr_bad_xdr;
-	return nfsd4_decode_state_owner4(argp, &lock->lk_new_clientid,
-					 &lock->lk_new_owner);
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_exist_lock_owner4(struct nfsd4_compoundargs *argp,
-			       struct nfsd4_lock *lock)
+nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
 {
-	__be32 status;
+	DECODE_HEAD;
 
-	status = nfsd4_decode_stateid4(argp, &lock->lk_old_lock_stateid);
-	if (status)
-		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &lock->lk_old_lock_seqid) < 0)
-		return nfserr_bad_xdr;
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
-{
-	if (xdr_stream_decode_bool(argp->xdr, &lock->lk_is_new) < 0)
-		return nfserr_bad_xdr;
-	if (lock->lk_is_new)
-		return nfsd4_decode_open_to_lock_owner4(argp, lock);
-	return nfsd4_decode_exist_lock_owner4(argp, lock);
-}
-
-static __be32
-nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
-{
-	struct nfsd4_lock *lock = &u->lock;
-	memset(lock, 0, sizeof(*lock));
-	if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0)
-		return nfserr_bad_xdr;
+	/*
+	* type, reclaim(boolean), offset, length, new_lock_owner(boolean)
+	*/
+	READ_BUF(28);
+	lock->lk_type = be32_to_cpup(p++);
 	if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_bool(argp->xdr, &lock->lk_reclaim) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &lock->lk_offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &lock->lk_length) < 0)
-		return nfserr_bad_xdr;
-	return nfsd4_decode_locker4(argp, lock);
+		goto xdr_error;
+	lock->lk_reclaim = be32_to_cpup(p++);
+	p = xdr_decode_hyper(p, &lock->lk_offset);
+	p = xdr_decode_hyper(p, &lock->lk_length);
+	lock->lk_is_new = be32_to_cpup(p++);
+
+	if (lock->lk_is_new) {
+		READ_BUF(4);
+		lock->lk_new_open_seqid = be32_to_cpup(p++);
+		status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid);
+		if (status)
+			return status;
+		READ_BUF(8 + sizeof(clientid_t));
+		lock->lk_new_lock_seqid = be32_to_cpup(p++);
+		COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
+		lock->lk_new_owner.len = be32_to_cpup(p++);
+		READ_BUF(lock->lk_new_owner.len);
+		READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
+	} else {
+		status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid);
+		if (status)
+			return status;
+		READ_BUF(4);
+		lock->lk_old_lock_seqid = be32_to_cpup(p++);
+	}
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
 {
-	struct nfsd4_lockt *lockt = &u->lockt;
-	memset(lockt, 0, sizeof(*lockt));
-	if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0)
-		return nfserr_bad_xdr;
-	if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_length) < 0)
-		return nfserr_bad_xdr;
-	return nfsd4_decode_state_owner4(argp, &lockt->lt_clientid,
-					 &lockt->lt_owner);
+	DECODE_HEAD;
+		        
+	READ_BUF(32);
+	lockt->lt_type = be32_to_cpup(p++);
+	if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
+		goto xdr_error;
+	p = xdr_decode_hyper(p, &lockt->lt_offset);
+	p = xdr_decode_hyper(p, &lockt->lt_length);
+	COPYMEM(&lockt->lt_clientid, 8);
+	lockt->lt_owner.len = be32_to_cpup(p++);
+	READ_BUF(lockt->lt_owner.len);
+	READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
 {
-	struct nfsd4_locku *locku = &u->locku;
-	__be32 status;
+	DECODE_HEAD;
 
-	if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(8);
+	locku->lu_type = be32_to_cpup(p++);
 	if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &locku->lu_seqid) < 0)
-		return nfserr_bad_xdr;
-	status = nfsd4_decode_stateid4(argp, &locku->lu_stateid);
+		goto xdr_error;
+	locku->lu_seqid = be32_to_cpup(p++);
+	status = nfsd4_decode_stateid(argp, &locku->lu_stateid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u64(argp->xdr, &locku->lu_offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &locku->lu_length) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(16);
+	p = xdr_decode_hyper(p, &locku->lu_offset);
+	p = xdr_decode_hyper(p, &locku->lu_length);
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
 {
-	struct nfsd4_lookup *lookup = &u->lookup;
-	return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len);
-}
+	DECODE_HEAD;
 
-static __be32
-nfsd4_decode_createhow4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
-{
-	__be32 status;
+	READ_BUF(4);
+	lookup->lo_len = be32_to_cpup(p++);
+	READ_BUF(lookup->lo_len);
+	SAVEMEM(lookup->lo_name, lookup->lo_len);
+	if ((status = check_filename(lookup->lo_name, lookup->lo_len)))
+		return status;
 
-	if (xdr_stream_decode_u32(argp->xdr, &open->op_createmode) < 0)
-		return nfserr_bad_xdr;
-	switch (open->op_createmode) {
-	case NFS4_CREATE_UNCHECKED:
-	case NFS4_CREATE_GUARDED:
-		status = nfsd4_decode_fattr4(argp, open->op_bmval,
-					     ARRAY_SIZE(open->op_bmval),
-					     &open->op_iattr, &open->op_acl,
-					     &open->op_label, &open->op_umask);
-		if (status)
-			return status;
-		break;
-	case NFS4_CREATE_EXCLUSIVE:
-		status = nfsd4_decode_verifier4(argp, &open->op_verf);
-		if (status)
-			return status;
-		break;
-	case NFS4_CREATE_EXCLUSIVE4_1:
-		if (argp->minorversion < 1)
-			return nfserr_bad_xdr;
-		status = nfsd4_decode_verifier4(argp, &open->op_verf);
-		if (status)
-			return status;
-		status = nfsd4_decode_fattr4(argp, open->op_bmval,
-					     ARRAY_SIZE(open->op_bmval),
-					     &open->op_iattr, &open->op_acl,
-					     &open->op_label, &open->op_umask);
-		if (status)
-			return status;
-		break;
-	default:
-		return nfserr_bad_xdr;
-	}
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_openflag4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
-{
-	__be32 status;
-
-	if (xdr_stream_decode_u32(argp->xdr, &open->op_create) < 0)
-		return nfserr_bad_xdr;
-	switch (open->op_create) {
-	case NFS4_OPEN_NOCREATE:
-		break;
-	case NFS4_OPEN_CREATE:
-		status = nfsd4_decode_createhow4(argp, open);
-		if (status)
-			return status;
-		break;
-	default:
-		return nfserr_bad_xdr;
-	}
-
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when)
 {
+	__be32 *p;
 	u32 w;
 
-	if (xdr_stream_decode_u32(argp->xdr, &w) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	w = be32_to_cpup(p++);
 	*share_access = w & NFS4_SHARE_ACCESS_MASK;
 	*deleg_want = w & NFS4_SHARE_WANT_MASK;
 	if (deleg_when)
@@ -1090,163 +907,210 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
 	      NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED):
 		return nfs_ok;
 	}
+xdr_error:
 	return nfserr_bad_xdr;
 }
 
 static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x)
 {
-	if (xdr_stream_decode_u32(argp->xdr, x) < 0)
-		return nfserr_bad_xdr;
-	/* Note: unlike access bits, deny bits may be zero. */
+	__be32 *p;
+
+	READ_BUF(4);
+	*x = be32_to_cpup(p++);
+	/* Note: unlinke access bits, deny bits may be zero. */
 	if (*x & ~NFS4_SHARE_DENY_BOTH)
 		return nfserr_bad_xdr;
-
 	return nfs_ok;
+xdr_error:
+	return nfserr_bad_xdr;
+}
+
+static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
+{
+	__be32 *p;
+
+	READ_BUF(4);
+	o->len = be32_to_cpup(p++);
+
+	if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT)
+		return nfserr_bad_xdr;
+
+	READ_BUF(o->len);
+	SAVEMEM(o->data, o->len);
+	return nfs_ok;
+xdr_error:
+	return nfserr_bad_xdr;
 }
 
 static __be32
-nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp,
-			 struct nfsd4_open *open)
+nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 {
-	__be32 status;
+	DECODE_HEAD;
+	u32 dummy;
 
-	if (xdr_stream_decode_u32(argp->xdr, &open->op_claim_type) < 0)
-		return nfserr_bad_xdr;
+	memset(open->op_bmval, 0, sizeof(open->op_bmval));
+	open->op_iattr.ia_valid = 0;
+	open->op_openowner = NULL;
+
+	open->op_xdr_error = 0;
+	/* seqid, share_access, share_deny, clientid, ownerlen */
+	READ_BUF(4);
+	open->op_seqid = be32_to_cpup(p++);
+	/* decode, yet ignore deleg_when until supported */
+	status = nfsd4_decode_share_access(argp, &open->op_share_access,
+					   &open->op_deleg_want, &dummy);
+	if (status)
+		goto xdr_error;
+	status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
+	if (status)
+		goto xdr_error;
+	READ_BUF(sizeof(clientid_t));
+	COPYMEM(&open->op_clientid, sizeof(clientid_t));
+	status = nfsd4_decode_opaque(argp, &open->op_owner);
+	if (status)
+		goto xdr_error;
+	READ_BUF(4);
+	open->op_create = be32_to_cpup(p++);
+	switch (open->op_create) {
+	case NFS4_OPEN_NOCREATE:
+		break;
+	case NFS4_OPEN_CREATE:
+		READ_BUF(4);
+		open->op_createmode = be32_to_cpup(p++);
+		switch (open->op_createmode) {
+		case NFS4_CREATE_UNCHECKED:
+		case NFS4_CREATE_GUARDED:
+			status = nfsd4_decode_fattr(argp, open->op_bmval,
+				&open->op_iattr, &open->op_acl, &open->op_label,
+				&open->op_umask);
+			if (status)
+				goto out;
+			break;
+		case NFS4_CREATE_EXCLUSIVE:
+			READ_BUF(NFS4_VERIFIER_SIZE);
+			COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
+			break;
+		case NFS4_CREATE_EXCLUSIVE4_1:
+			if (argp->minorversion < 1)
+				goto xdr_error;
+			READ_BUF(NFS4_VERIFIER_SIZE);
+			COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
+			status = nfsd4_decode_fattr(argp, open->op_bmval,
+				&open->op_iattr, &open->op_acl, &open->op_label,
+				&open->op_umask);
+			if (status)
+				goto out;
+			break;
+		default:
+			goto xdr_error;
+		}
+		break;
+	default:
+		goto xdr_error;
+	}
+
+	/* open_claim */
+	READ_BUF(4);
+	open->op_claim_type = be32_to_cpup(p++);
 	switch (open->op_claim_type) {
 	case NFS4_OPEN_CLAIM_NULL:
 	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
-		status = nfsd4_decode_component4(argp, &open->op_fname,
-						 &open->op_fnamelen);
-		if (status)
+		READ_BUF(4);
+		open->op_fname.len = be32_to_cpup(p++);
+		READ_BUF(open->op_fname.len);
+		SAVEMEM(open->op_fname.data, open->op_fname.len);
+		if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
 			return status;
 		break;
 	case NFS4_OPEN_CLAIM_PREVIOUS:
-		if (xdr_stream_decode_u32(argp->xdr, &open->op_delegate_type) < 0)
-			return nfserr_bad_xdr;
+		READ_BUF(4);
+		open->op_delegate_type = be32_to_cpup(p++);
 		break;
 	case NFS4_OPEN_CLAIM_DELEGATE_CUR:
-		status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid);
+		status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
 		if (status)
 			return status;
-		status = nfsd4_decode_component4(argp, &open->op_fname,
-						 &open->op_fnamelen);
-		if (status)
+		READ_BUF(4);
+		open->op_fname.len = be32_to_cpup(p++);
+		READ_BUF(open->op_fname.len);
+		SAVEMEM(open->op_fname.data, open->op_fname.len);
+		if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
 			return status;
 		break;
 	case NFS4_OPEN_CLAIM_FH:
 	case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
 		if (argp->minorversion < 1)
-			return nfserr_bad_xdr;
+			goto xdr_error;
 		/* void */
 		break;
 	case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
 		if (argp->minorversion < 1)
-			return nfserr_bad_xdr;
-		status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid);
+			goto xdr_error;
+		status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
 		if (status)
 			return status;
 		break;
 	default:
-		return nfserr_bad_xdr;
+		goto xdr_error;
 	}
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
 {
-	struct nfsd4_open *open = &u->open;
-	__be32 status;
-	u32 dummy;
-
-	memset(open, 0, sizeof(*open));
-
-	if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0)
-		return nfserr_bad_xdr;
-	/* deleg_want is ignored */
-	status = nfsd4_decode_share_access(argp, &open->op_share_access,
-					   &open->op_deleg_want, &dummy);
-	if (status)
-		return status;
-	status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
-	if (status)
-		return status;
-	status = nfsd4_decode_state_owner4(argp, &open->op_clientid,
-					   &open->op_owner);
-	if (status)
-		return status;
-	status = nfsd4_decode_openflag4(argp, open);
-	if (status)
-		return status;
-	return nfsd4_decode_open_claim4(argp, open);
-}
-
-static __be32
-nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp,
-			  union nfsd4_op_u *u)
-{
-	struct nfsd4_open_confirm *open_conf = &u->open_confirm;
-	__be32 status;
+	DECODE_HEAD;
 
 	if (argp->minorversion >= 1)
 		return nfserr_notsupp;
 
-	status = nfsd4_decode_stateid4(argp, &open_conf->oc_req_stateid);
+	status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	open_conf->oc_seqid = be32_to_cpup(p++);
 
-	memset(&open_conf->oc_resp_stateid, 0,
-	       sizeof(open_conf->oc_resp_stateid));
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp,
-			    union nfsd4_op_u *u)
+nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
 {
-	struct nfsd4_open_downgrade *open_down = &u->open_downgrade;
-	__be32 status;
-
-	memset(open_down, 0, sizeof(*open_down));
-	status = nfsd4_decode_stateid4(argp, &open_down->od_stateid);
+	DECODE_HEAD;
+		    
+	status = nfsd4_decode_stateid(argp, &open_down->od_stateid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &open_down->od_seqid) < 0)
-		return nfserr_bad_xdr;
-	/* deleg_want is ignored */
+	READ_BUF(4);
+	open_down->od_seqid = be32_to_cpup(p++);
 	status = nfsd4_decode_share_access(argp, &open_down->od_share_access,
 					   &open_down->od_deleg_want, NULL);
 	if (status)
 		return status;
-	return nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
+	status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
+	if (status)
+		return status;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
 {
-	struct nfsd4_putfh *putfh = &u->putfh;
-	__be32 *p;
+	DECODE_HEAD;
 
-	if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	putfh->pf_fhlen = be32_to_cpup(p++);
 	if (putfh->pf_fhlen > NFS4_FHSIZE)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen);
-	if (!p)
-		return nfserr_bad_xdr;
-	putfh->pf_fhval = svcxdr_savemem(argp, p, putfh->pf_fhlen);
-	if (!putfh->pf_fhval)
-		return nfserr_jukebox;
+		goto xdr_error;
+	READ_BUF(putfh->pf_fhlen);
+	SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen);
 
-	putfh->no_verify = false;
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
 {
 	if (argp->minorversion == 0)
 		return nfs_ok;
@@ -1254,771 +1118,719 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
 }
 
 static __be32
-nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
 {
-	struct nfsd4_read *read = &u->read;
-	__be32 status;
+	DECODE_HEAD;
 
-	memset(read, 0, sizeof(*read));
-	status = nfsd4_decode_stateid4(argp, &read->rd_stateid);
+	status = nfsd4_decode_stateid(argp, &read->rd_stateid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u64(argp->xdr, &read->rd_offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &read->rd_length) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(12);
+	p = xdr_decode_hyper(p, &read->rd_offset);
+	read->rd_length = be32_to_cpup(p++);
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
 {
-	struct nfsd4_readdir *readdir = &u->readdir;
-	__be32 status;
+	DECODE_HEAD;
 
-	memset(readdir, 0, sizeof(*readdir));
-	if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0)
-		return nfserr_bad_xdr;
-	status = nfsd4_decode_verifier4(argp, &readdir->rd_verf);
-	if (status)
+	READ_BUF(24);
+	p = xdr_decode_hyper(p, &readdir->rd_cookie);
+	COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data));
+	readdir->rd_dircount = be32_to_cpup(p++);
+	readdir->rd_maxcount = be32_to_cpup(p++);
+	if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))
+		goto out;
+
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	remove->rm_namelen = be32_to_cpup(p++);
+	READ_BUF(remove->rm_namelen);
+	SAVEMEM(remove->rm_name, remove->rm_namelen);
+	if ((status = check_filename(remove->rm_name, remove->rm_namelen)))
 		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_dircount) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_maxcount) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_uint32_array(argp->xdr, readdir->rd_bmval,
-					   ARRAY_SIZE(readdir->rd_bmval)) < 0)
-		return nfserr_bad_xdr;
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
 {
-	struct nfsd4_remove *remove = &u->remove;
-	memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo));
-	return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen);
-}
+	DECODE_HEAD;
 
-static __be32
-nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
-{
-	struct nfsd4_rename *rename = &u->rename;
-	__be32 status;
-
-	memset(rename, 0, sizeof(*rename));
-	status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen);
-	if (status)
+	READ_BUF(4);
+	rename->rn_snamelen = be32_to_cpup(p++);
+	READ_BUF(rename->rn_snamelen);
+	SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+	READ_BUF(4);
+	rename->rn_tnamelen = be32_to_cpup(p++);
+	READ_BUF(rename->rn_tnamelen);
+	SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
+	if ((status = check_filename(rename->rn_sname, rename->rn_snamelen)))
 		return status;
-	return nfsd4_decode_component4(argp, &rename->rn_tname, &rename->rn_tnamelen);
+	if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen)))
+		return status;
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
 {
-	clientid_t *clientid = &u->renew;
-	return nfsd4_decode_clientid4(argp, clientid);
+	DECODE_HEAD;
+
+	if (argp->minorversion >= 1)
+		return nfserr_notsupp;
+
+	READ_BUF(sizeof(clientid_t));
+	COPYMEM(clientid, sizeof(clientid_t));
+
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
-		     union nfsd4_op_u *u)
+		     struct nfsd4_secinfo *secinfo)
 {
-	struct nfsd4_secinfo *secinfo = &u->secinfo;
-	secinfo->si_exp = NULL;
-	return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen);
-}
+	DECODE_HEAD;
 
-static __be32
-nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
-{
-	struct nfsd4_setattr *setattr = &u->setattr;
-	__be32 status;
-
-	memset(setattr, 0, sizeof(*setattr));
-	status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid);
+	READ_BUF(4);
+	secinfo->si_namelen = be32_to_cpup(p++);
+	READ_BUF(secinfo->si_namelen);
+	SAVEMEM(secinfo->si_name, secinfo->si_namelen);
+	status = check_filename(secinfo->si_name, secinfo->si_namelen);
 	if (status)
 		return status;
-	return nfsd4_decode_fattr4(argp, setattr->sa_bmval,
-				   ARRAY_SIZE(setattr->sa_bmval),
-				   &setattr->sa_iattr, &setattr->sa_acl,
-				   &setattr->sa_label, NULL);
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+		     struct nfsd4_secinfo_no_name *sin)
 {
-	struct nfsd4_setclientid *setclientid = &u->setclientid;
-	__be32 *p, status;
+	DECODE_HEAD;
 
-	memset(setclientid, 0, sizeof(*setclientid));
+	READ_BUF(4);
+	sin->sin_style = be32_to_cpup(p++);
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
+{
+	__be32 status;
+
+	status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
+	if (status)
+		return status;
+	return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
+				  &setattr->sa_acl, &setattr->sa_label, NULL);
+}
+
+static __be32
+nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
+{
+	DECODE_HEAD;
 
 	if (argp->minorversion >= 1)
 		return nfserr_notsupp;
 
-	status = nfsd4_decode_verifier4(argp, &setclientid->se_verf);
-	if (status)
-		return status;
+	READ_BUF(NFS4_VERIFIER_SIZE);
+	COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE);
+
 	status = nfsd4_decode_opaque(argp, &setclientid->se_name);
 	if (status)
-		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_prog) < 0)
 		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_netid_len) < 0)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len);
-	if (!p)
-		return nfserr_bad_xdr;
-	setclientid->se_callback_netid_val = svcxdr_savemem(argp, p,
-						setclientid->se_callback_netid_len);
-	if (!setclientid->se_callback_netid_val)
-		return nfserr_jukebox;
+	READ_BUF(8);
+	setclientid->se_callback_prog = be32_to_cpup(p++);
+	setclientid->se_callback_netid_len = be32_to_cpup(p++);
+	READ_BUF(setclientid->se_callback_netid_len);
+	SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+	READ_BUF(4);
+	setclientid->se_callback_addr_len = be32_to_cpup(p++);
 
-	if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len);
-	if (!p)
-		return nfserr_bad_xdr;
-	setclientid->se_callback_addr_val = svcxdr_savemem(argp, p,
-						setclientid->se_callback_addr_len);
-	if (!setclientid->se_callback_addr_val)
-		return nfserr_jukebox;
-	if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(setclientid->se_callback_addr_len);
+	SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+	READ_BUF(4);
+	setclientid->se_callback_ident = be32_to_cpup(p++);
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp,
-				 union nfsd4_op_u *u)
+nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
 {
-	struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm;
-	__be32 status;
+	DECODE_HEAD;
 
 	if (argp->minorversion >= 1)
 		return nfserr_notsupp;
 
-	status = nfsd4_decode_clientid4(argp, &scd_c->sc_clientid);
-	if (status)
-		return status;
-	return nfsd4_decode_verifier4(argp, &scd_c->sc_confirm);
+	READ_BUF(8 + NFS4_VERIFIER_SIZE);
+	COPYMEM(&scd_c->sc_clientid, 8);
+	COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE);
+
+	DECODE_TAIL;
 }
 
 /* Also used for NVERIFY */
 static __be32
-nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
 {
-	struct nfsd4_verify *verify = &u->verify;
-	__be32 *p, status;
+	DECODE_HEAD;
 
-	memset(verify, 0, sizeof(*verify));
-
-	status = nfsd4_decode_bitmap4(argp, verify->ve_bmval,
-				      ARRAY_SIZE(verify->ve_bmval));
-	if (status)
-		return status;
+	if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval)))
+		goto out;
 
 	/* For convenience's sake, we compare raw xdr'd attributes in
 	 * nfsd4_proc_verify */
 
-	if (xdr_stream_decode_u32(argp->xdr, &verify->ve_attrlen) < 0)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, verify->ve_attrlen);
-	if (!p)
-		return nfserr_bad_xdr;
-	verify->ve_attrval = svcxdr_savemem(argp, p, verify->ve_attrlen);
-	if (!verify->ve_attrval)
-		return nfserr_jukebox;
+	READ_BUF(4);
+	verify->ve_attrlen = be32_to_cpup(p++);
+	READ_BUF(verify->ve_attrlen);
+	SAVEMEM(verify->ve_attrval, verify->ve_attrlen);
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 {
-	struct nfsd4_write *write = &u->write;
-	__be32 status;
+	DECODE_HEAD;
 
-	status = nfsd4_decode_stateid4(argp, &write->wr_stateid);
+	status = nfsd4_decode_stateid(argp, &write->wr_stateid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u64(argp->xdr, &write->wr_offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &write->wr_stable_how) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(16);
+	p = xdr_decode_hyper(p, &write->wr_offset);
+	write->wr_stable_how = be32_to_cpup(p++);
 	if (write->wr_stable_how > NFS_FILE_SYNC)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &write->wr_buflen) < 0)
-		return nfserr_bad_xdr;
-	if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen))
-		return nfserr_bad_xdr;
+		goto xdr_error;
+	write->wr_buflen = be32_to_cpup(p++);
 
-	write->wr_bytes_written = 0;
-	write->wr_how_written = 0;
-	memset(&write->wr_verifier, 0, sizeof(write->wr_verifier));
-	return nfs_ok;
+	status = svcxdr_construct_vector(argp, &write->wr_head,
+					 &write->wr_pagelist, write->wr_buflen);
+	if (status)
+		return status;
+
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp,
-			       union nfsd4_op_u *u)
+nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
 {
-	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
-	__be32 status;
+	DECODE_HEAD;
 
 	if (argp->minorversion >= 1)
 		return nfserr_notsupp;
 
-	status = nfsd4_decode_state_owner4(argp, &rlockowner->rl_clientid,
-					   &rlockowner->rl_owner);
-	if (status)
-		return status;
+	READ_BUF(12);
+	COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
+	rlockowner->rl_owner.len = be32_to_cpup(p++);
+	READ_BUF(rlockowner->rl_owner.len);
+	READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
 
 	if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid))
 		return nfserr_inval;
-
-	return nfs_ok;
-}
-
-static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp,
-					   union nfsd4_op_u *u)
-{
-	struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
-	memset(bc, 0, sizeof(*bc));
-	if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0)
-		return nfserr_bad_xdr;
-	return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
-}
-
-static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
-						union nfsd4_op_u *u)
-{
-	struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
-	u32 use_conn_in_rdma_mode;
-	__be32 status;
-
-	memset(bcts, 0, sizeof(*bcts));
-	status = nfsd4_decode_sessionid4(argp, &bcts->sessionid);
-	if (status)
-		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &bcts->dir) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &use_conn_in_rdma_mode) < 0)
-		return nfserr_bad_xdr;
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_state_protect_ops(struct nfsd4_compoundargs *argp,
-			       struct nfsd4_exchange_id *exid)
-{
-	__be32 status;
-
-	status = nfsd4_decode_bitmap4(argp, exid->spo_must_enforce,
-				      ARRAY_SIZE(exid->spo_must_enforce));
-	if (status)
-		return nfserr_bad_xdr;
-	status = nfsd4_decode_bitmap4(argp, exid->spo_must_allow,
-				      ARRAY_SIZE(exid->spo_must_allow));
-	if (status)
-		return nfserr_bad_xdr;
-
-	return nfs_ok;
-}
-
-/*
- * This implementation currently does not support SP4_SSV.
- * This decoder simply skips over these arguments.
- */
-static noinline __be32
-nfsd4_decode_ssv_sp_parms(struct nfsd4_compoundargs *argp,
-			  struct nfsd4_exchange_id *exid)
-{
-	u32 count, window, num_gss_handles;
-	__be32 status;
-
-	/* ssp_ops */
-	status = nfsd4_decode_state_protect_ops(argp, exid);
-	if (status)
-		return status;
-
-	/* ssp_hash_algs<> */
-	if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
-		return nfserr_bad_xdr;
-	while (count--) {
-		status = nfsd4_decode_ignored_string(argp, 0);
-		if (status)
-			return status;
-	}
-
-	/* ssp_encr_algs<> */
-	if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
-		return nfserr_bad_xdr;
-	while (count--) {
-		status = nfsd4_decode_ignored_string(argp, 0);
-		if (status)
-			return status;
-	}
-
-	if (xdr_stream_decode_u32(argp->xdr, &window) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &num_gss_handles) < 0)
-		return nfserr_bad_xdr;
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_state_protect4_a(struct nfsd4_compoundargs *argp,
-			      struct nfsd4_exchange_id *exid)
-{
-	__be32 status;
-
-	if (xdr_stream_decode_u32(argp->xdr, &exid->spa_how) < 0)
-		return nfserr_bad_xdr;
-	switch (exid->spa_how) {
-	case SP4_NONE:
-		break;
-	case SP4_MACH_CRED:
-		status = nfsd4_decode_state_protect_ops(argp, exid);
-		if (status)
-			return status;
-		break;
-	case SP4_SSV:
-		status = nfsd4_decode_ssv_sp_parms(argp, exid);
-		if (status)
-			return status;
-		break;
-	default:
-		return nfserr_bad_xdr;
-	}
-
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp,
-			  struct nfsd4_exchange_id *exid)
-{
-	__be32 status;
-	u32 count;
-
-	if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
-		return nfserr_bad_xdr;
-	switch (count) {
-	case 0:
-		break;
-	case 1:
-		/* Note that RFC 8881 places no length limit on
-		 * nii_domain, but this implementation permits no
-		 * more than NFS4_OPAQUE_LIMIT bytes */
-		status = nfsd4_decode_opaque(argp, &exid->nii_domain);
-		if (status)
-			return status;
-		/* Note that RFC 8881 places no length limit on
-		 * nii_name, but this implementation permits no
-		 * more than NFS4_OPAQUE_LIMIT bytes */
-		status = nfsd4_decode_opaque(argp, &exid->nii_name);
-		if (status)
-			return status;
-		status = nfsd4_decode_nfstime4(argp, &exid->nii_time);
-		if (status)
-			return status;
-		break;
-	default:
-		return nfserr_bad_xdr;
-	}
-
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
-			 union nfsd4_op_u *u)
+			 struct nfsd4_exchange_id *exid)
 {
-	struct nfsd4_exchange_id *exid = &u->exchange_id;
-	__be32 status;
+	int dummy, tmp;
+	DECODE_HEAD;
+
+	READ_BUF(NFS4_VERIFIER_SIZE);
+	COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE);
 
-	memset(exid, 0, sizeof(*exid));
-	status = nfsd4_decode_verifier4(argp, &exid->verifier);
-	if (status)
-		return status;
 	status = nfsd4_decode_opaque(argp, &exid->clname);
 	if (status)
-		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &exid->flags) < 0)
-		return nfserr_bad_xdr;
-	status = nfsd4_decode_state_protect4_a(argp, exid);
-	if (status)
-		return status;
-	return nfsd4_decode_nfs_impl_id4(argp, exid);
-}
-
-static __be32
-nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp,
-			    struct nfsd4_channel_attrs *ca)
-{
-	__be32 *p;
-
-	p = xdr_inline_decode(argp->xdr, XDR_UNIT * 7);
-	if (!p)
 		return nfserr_bad_xdr;
 
-	/* headerpadsz is ignored */
-	p++;
-	ca->maxreq_sz = be32_to_cpup(p++);
-	ca->maxresp_sz = be32_to_cpup(p++);
-	ca->maxresp_cached = be32_to_cpup(p++);
-	ca->maxops = be32_to_cpup(p++);
-	ca->maxreqs = be32_to_cpup(p++);
-	ca->nr_rdma_attrs = be32_to_cpup(p);
-	switch (ca->nr_rdma_attrs) {
-	case 0:
+	READ_BUF(4);
+	exid->flags = be32_to_cpup(p++);
+
+	/* Ignore state_protect4_a */
+	READ_BUF(4);
+	exid->spa_how = be32_to_cpup(p++);
+	switch (exid->spa_how) {
+	case SP4_NONE:
 		break;
-	case 1:
-		if (xdr_stream_decode_u32(argp->xdr, &ca->rdma_attrs) < 0)
-			return nfserr_bad_xdr;
+	case SP4_MACH_CRED:
+		/* spo_must_enforce */
+		status = nfsd4_decode_bitmap(argp,
+					exid->spo_must_enforce);
+		if (status)
+			goto out;
+		/* spo_must_allow */
+		status = nfsd4_decode_bitmap(argp, exid->spo_must_allow);
+		if (status)
+			goto out;
+		break;
+	case SP4_SSV:
+		/* ssp_ops */
+		READ_BUF(4);
+		dummy = be32_to_cpup(p++);
+		READ_BUF(dummy * 4);
+		p += dummy;
+
+		READ_BUF(4);
+		dummy = be32_to_cpup(p++);
+		READ_BUF(dummy * 4);
+		p += dummy;
+
+		/* ssp_hash_algs<> */
+		READ_BUF(4);
+		tmp = be32_to_cpup(p++);
+		while (tmp--) {
+			READ_BUF(4);
+			dummy = be32_to_cpup(p++);
+			READ_BUF(dummy);
+			p += XDR_QUADLEN(dummy);
+		}
+
+		/* ssp_encr_algs<> */
+		READ_BUF(4);
+		tmp = be32_to_cpup(p++);
+		while (tmp--) {
+			READ_BUF(4);
+			dummy = be32_to_cpup(p++);
+			READ_BUF(dummy);
+			p += XDR_QUADLEN(dummy);
+		}
+
+		/* ignore ssp_window and ssp_num_gss_handles: */
+		READ_BUF(8);
 		break;
 	default:
-		return nfserr_bad_xdr;
+		goto xdr_error;
 	}
 
-	return nfs_ok;
+	READ_BUF(4);    /* nfs_impl_id4 array length */
+	dummy = be32_to_cpup(p++);
+
+	if (dummy > 1)
+		goto xdr_error;
+
+	if (dummy == 1) {
+		status = nfsd4_decode_opaque(argp, &exid->nii_domain);
+		if (status)
+			goto xdr_error;
+
+		/* nii_name */
+		status = nfsd4_decode_opaque(argp, &exid->nii_name);
+		if (status)
+			goto xdr_error;
+
+		/* nii_date */
+		status = nfsd4_decode_time(argp, &exid->nii_time);
+		if (status)
+			goto xdr_error;
+	}
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
-			    union nfsd4_op_u *u)
+			    struct nfsd4_create_session *sess)
 {
-	struct nfsd4_create_session *sess = &u->create_session;
-	__be32 status;
+	DECODE_HEAD;
 
-	memset(sess, 0, sizeof(*sess));
-	status = nfsd4_decode_clientid4(argp, &sess->clientid);
-	if (status)
-		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &sess->seqid) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &sess->flags) < 0)
-		return nfserr_bad_xdr;
-	status = nfsd4_decode_channel_attrs4(argp, &sess->fore_channel);
-	if (status)
-		return status;
-	status = nfsd4_decode_channel_attrs4(argp, &sess->back_channel);
-	if (status)
-		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0)
-		return nfserr_bad_xdr;
-	return nfsd4_decode_cb_sec(argp, &sess->cb_sec);
+	READ_BUF(16);
+	COPYMEM(&sess->clientid, 8);
+	sess->seqid = be32_to_cpup(p++);
+	sess->flags = be32_to_cpup(p++);
+
+	/* Fore channel attrs */
+	READ_BUF(28);
+	p++; /* headerpadsz is always 0 */
+	sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
+	sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
+	sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
+	sess->fore_channel.maxops = be32_to_cpup(p++);
+	sess->fore_channel.maxreqs = be32_to_cpup(p++);
+	sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++);
+	if (sess->fore_channel.nr_rdma_attrs == 1) {
+		READ_BUF(4);
+		sess->fore_channel.rdma_attrs = be32_to_cpup(p++);
+	} else if (sess->fore_channel.nr_rdma_attrs > 1) {
+		dprintk("Too many fore channel attr bitmaps!\n");
+		goto xdr_error;
+	}
+
+	/* Back channel attrs */
+	READ_BUF(28);
+	p++; /* headerpadsz is always 0 */
+	sess->back_channel.maxreq_sz = be32_to_cpup(p++);
+	sess->back_channel.maxresp_sz = be32_to_cpup(p++);
+	sess->back_channel.maxresp_cached = be32_to_cpup(p++);
+	sess->back_channel.maxops = be32_to_cpup(p++);
+	sess->back_channel.maxreqs = be32_to_cpup(p++);
+	sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++);
+	if (sess->back_channel.nr_rdma_attrs == 1) {
+		READ_BUF(4);
+		sess->back_channel.rdma_attrs = be32_to_cpup(p++);
+	} else if (sess->back_channel.nr_rdma_attrs > 1) {
+		dprintk("Too many back channel attr bitmaps!\n");
+		goto xdr_error;
+	}
+
+	READ_BUF(4);
+	sess->callback_prog = be32_to_cpup(p++);
+	nfsd4_decode_cb_sec(argp, &sess->cb_sec);
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
-			     union nfsd4_op_u *u)
+			     struct nfsd4_destroy_session *destroy_session)
 {
-	struct nfsd4_destroy_session *destroy_session = &u->destroy_session;
-	return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid);
+	DECODE_HEAD;
+	READ_BUF(NFS4_MAX_SESSIONID_LEN);
+	COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
-			  union nfsd4_op_u *u)
+			  struct nfsd4_free_stateid *free_stateid)
 {
-	struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
-	return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid);
+	DECODE_HEAD;
+
+	READ_BUF(sizeof(stateid_t));
+	free_stateid->fr_stateid.si_generation = be32_to_cpup(p++);
+	COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t));
+
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
+		      struct nfsd4_sequence *seq)
+{
+	DECODE_HEAD;
+
+	READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
+	COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+	seq->seqid = be32_to_cpup(p++);
+	seq->slotid = be32_to_cpup(p++);
+	seq->maxslots = be32_to_cpup(p++);
+	seq->cachethis = be32_to_cpup(p++);
+
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
+{
+	int i;
+	__be32 *p, status;
+	struct nfsd4_test_stateid_id *stateid;
+
+	READ_BUF(4);
+	test_stateid->ts_num_ids = ntohl(*p++);
+
+	INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
+
+	for (i = 0; i < test_stateid->ts_num_ids; i++) {
+		stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
+		if (!stateid) {
+			status = nfserrno(-ENOMEM);
+			goto out;
+		}
+
+		INIT_LIST_HEAD(&stateid->ts_id_list);
+		list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
+
+		status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid);
+		if (status)
+			goto out;
+	}
+
+	status = 0;
+out:
+	return status;
+xdr_error:
+	dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__);
+	status = nfserr_bad_xdr;
+	goto out;
+}
+
+static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc)
+{
+	DECODE_HEAD;
+
+	READ_BUF(8);
+	COPYMEM(&dc->clientid, 8);
+
+	DECODE_TAIL;
+}
+
+static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
+{
+	DECODE_HEAD;
+
+	READ_BUF(4);
+	rc->rca_one_fs = be32_to_cpup(p++);
+
+	DECODE_TAIL;
 }
 
 #ifdef CONFIG_NFSD_PNFS
 static __be32
 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
-		union nfsd4_op_u *u)
+		struct nfsd4_getdeviceinfo *gdev)
 {
-	struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
-	__be32 status;
+	DECODE_HEAD;
+	u32 num, i;
 
-	memset(gdev, 0, sizeof(*gdev));
-	status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid);
+	READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4);
+	COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid));
+	gdev->gd_layout_type = be32_to_cpup(p++);
+	gdev->gd_maxcount = be32_to_cpup(p++);
+	num = be32_to_cpup(p++);
+	if (num) {
+		if (num > 1000)
+			goto xdr_error;
+		READ_BUF(4 * num);
+		gdev->gd_notify_types = be32_to_cpup(p++);
+		for (i = 1; i < num; i++) {
+			if (be32_to_cpup(p++)) {
+				status = nfserr_inval;
+				goto out;
+			}
+		}
+	}
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+		struct nfsd4_layoutget *lgp)
+{
+	DECODE_HEAD;
+
+	READ_BUF(36);
+	lgp->lg_signal = be32_to_cpup(p++);
+	lgp->lg_layout_type = be32_to_cpup(p++);
+	lgp->lg_seg.iomode = be32_to_cpup(p++);
+	p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
+	p = xdr_decode_hyper(p, &lgp->lg_seg.length);
+	p = xdr_decode_hyper(p, &lgp->lg_minlength);
+
+	status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_maxcount) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_uint32_array(argp->xdr,
-					   &gdev->gd_notify_types, 1) < 0)
-		return nfserr_bad_xdr;
 
-	return nfs_ok;
+	READ_BUF(4);
+	lgp->lg_maxcount = be32_to_cpup(p++);
+
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
-			  union nfsd4_op_u *u)
+		struct nfsd4_layoutcommit *lcp)
 {
-	struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
-	__be32 *p, status;
+	DECODE_HEAD;
+	u32 timechange;
 
-	memset(lcp, 0, sizeof(*lcp));
-	if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_bool(argp->xdr, &lcp->lc_reclaim) < 0)
-		return nfserr_bad_xdr;
-	status = nfsd4_decode_stateid4(argp, &lcp->lc_sid);
+	READ_BUF(20);
+	p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
+	p = xdr_decode_hyper(p, &lcp->lc_seg.length);
+	lcp->lc_reclaim = be32_to_cpup(p++);
+
+	status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_newoffset) < 0)
-		return nfserr_bad_xdr;
+
+	READ_BUF(4);
+	lcp->lc_newoffset = be32_to_cpup(p++);
 	if (lcp->lc_newoffset) {
-		if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_last_wr) < 0)
-			return nfserr_bad_xdr;
+		READ_BUF(8);
+		p = xdr_decode_hyper(p, &lcp->lc_last_wr);
 	} else
 		lcp->lc_last_wr = 0;
-	p = xdr_inline_decode(argp->xdr, XDR_UNIT);
-	if (!p)
-		return nfserr_bad_xdr;
-	if (xdr_item_is_present(p)) {
-		status = nfsd4_decode_nfstime4(argp, &lcp->lc_mtime);
+	READ_BUF(4);
+	timechange = be32_to_cpup(p++);
+	if (timechange) {
+		status = nfsd4_decode_time(argp, &lcp->lc_mtime);
 		if (status)
 			return status;
 	} else {
 		lcp->lc_mtime.tv_nsec = UTIME_NOW;
 	}
-	return nfsd4_decode_layoutupdate4(argp, lcp);
-}
+	READ_BUF(8);
+	lcp->lc_layout_type = be32_to_cpup(p++);
 
-static __be32
-nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
-		union nfsd4_op_u *u)
-{
-	struct nfsd4_layoutget *lgp = &u->layoutget;
-	__be32 status;
+	/*
+	 * Save the layout update in XDR format and let the layout driver deal
+	 * with it later.
+	 */
+	lcp->lc_up_len = be32_to_cpup(p++);
+	if (lcp->lc_up_len > 0) {
+		READ_BUF(lcp->lc_up_len);
+		READMEM(lcp->lc_up_layout, lcp->lc_up_len);
+	}
 
-	memset(lgp, 0, sizeof(*lgp));
-	if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_seg.iomode) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.length) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_minlength) < 0)
-		return nfserr_bad_xdr;
-	status = nfsd4_decode_stateid4(argp, &lgp->lg_sid);
-	if (status)
-		return status;
-	if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_maxcount) < 0)
-		return nfserr_bad_xdr;
-
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
-		union nfsd4_op_u *u)
+		struct nfsd4_layoutreturn *lrp)
 {
-	struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
-	memset(lrp, 0, sizeof(*lrp));
-	if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_seg.iomode) < 0)
-		return nfserr_bad_xdr;
-	return nfsd4_decode_layoutreturn4(argp, lrp);
+	DECODE_HEAD;
+
+	READ_BUF(16);
+	lrp->lr_reclaim = be32_to_cpup(p++);
+	lrp->lr_layout_type = be32_to_cpup(p++);
+	lrp->lr_seg.iomode = be32_to_cpup(p++);
+	lrp->lr_return_type = be32_to_cpup(p++);
+	if (lrp->lr_return_type == RETURN_FILE) {
+		READ_BUF(16);
+		p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
+		p = xdr_decode_hyper(p, &lrp->lr_seg.length);
+
+		status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
+		if (status)
+			return status;
+
+		READ_BUF(4);
+		lrp->lrf_body_len = be32_to_cpup(p++);
+		if (lrp->lrf_body_len > 0) {
+			READ_BUF(lrp->lrf_body_len);
+			READMEM(lrp->lrf_body, lrp->lrf_body_len);
+		}
+	} else {
+		lrp->lr_seg.offset = 0;
+		lrp->lr_seg.length = NFS4_MAX_UINT64;
+	}
+
+	DECODE_TAIL;
 }
 #endif /* CONFIG_NFSD_PNFS */
 
-static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
-					   union nfsd4_op_u *u)
-{
-	struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name;
-	if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0)
-		return nfserr_bad_xdr;
-
-	sin->sin_exp = NULL;
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
-		      union nfsd4_op_u *u)
-{
-	struct nfsd4_sequence *seq = &u->sequence;
-	__be32 *p, status;
-
-	status = nfsd4_decode_sessionid4(argp, &seq->sessionid);
-	if (status)
-		return status;
-	p = xdr_inline_decode(argp->xdr, XDR_UNIT * 4);
-	if (!p)
-		return nfserr_bad_xdr;
-	seq->seqid = be32_to_cpup(p++);
-	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
-	seq->cachethis = be32_to_cpup(p);
-
-	seq->status_flags = 0;
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp,
-			  union nfsd4_op_u *u)
-{
-	struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
-	struct nfsd4_test_stateid_id *stateid;
-	__be32 status;
-	u32 i;
-
-	memset(test_stateid, 0, sizeof(*test_stateid));
-	if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0)
-		return nfserr_bad_xdr;
-
-	INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
-	for (i = 0; i < test_stateid->ts_num_ids; i++) {
-		stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
-		if (!stateid)
-			return nfserr_jukebox;
-		INIT_LIST_HEAD(&stateid->ts_id_list);
-		list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
-		status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid);
-		if (status)
-			return status;
-	}
-
-	return nfs_ok;
-}
-
-static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp,
-					    union nfsd4_op_u *u)
-{
-	struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
-	return nfsd4_decode_clientid4(argp, &dc->clientid);
-}
-
-static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,
-					    union nfsd4_op_u *u)
-{
-	struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
-	if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0)
-		return nfserr_bad_xdr;
-	return nfs_ok;
-}
-
 static __be32
 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
-		       union nfsd4_op_u *u)
+		       struct nfsd4_fallocate *fallocate)
 {
-	struct nfsd4_fallocate *fallocate = &u->allocate;
-	__be32 status;
+	DECODE_HEAD;
 
-	status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid);
+	status = nfsd4_decode_stateid(argp, &fallocate->falloc_stateid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_length) < 0)
-		return nfserr_bad_xdr;
 
-	return nfs_ok;
+	READ_BUF(16);
+	p = xdr_decode_hyper(p, &fallocate->falloc_offset);
+	xdr_decode_hyper(p, &fallocate->falloc_length);
+
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
+{
+	DECODE_HEAD;
+
+	status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid);
+	if (status)
+		return status;
+	status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid);
+	if (status)
+		return status;
+
+	READ_BUF(8 + 8 + 8);
+	p = xdr_decode_hyper(p, &clone->cl_src_pos);
+	p = xdr_decode_hyper(p, &clone->cl_dst_pos);
+	p = xdr_decode_hyper(p, &clone->cl_count);
+	DECODE_TAIL;
 }
 
 static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
 				      struct nl4_server *ns)
 {
+	DECODE_HEAD;
 	struct nfs42_netaddr *naddr;
-	__be32 *p;
 
-	if (xdr_stream_decode_u32(argp->xdr, &ns->nl4_type) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	ns->nl4_type = be32_to_cpup(p++);
 
 	/* currently support for 1 inter-server source server */
 	switch (ns->nl4_type) {
 	case NL4_NETADDR:
 		naddr = &ns->u.nl4_addr;
 
-		if (xdr_stream_decode_u32(argp->xdr, &naddr->netid_len) < 0)
-			return nfserr_bad_xdr;
+		READ_BUF(4);
+		naddr->netid_len = be32_to_cpup(p++);
 		if (naddr->netid_len > RPCBIND_MAXNETIDLEN)
-			return nfserr_bad_xdr;
+			goto xdr_error;
 
-		p = xdr_inline_decode(argp->xdr, naddr->netid_len);
-		if (!p)
-			return nfserr_bad_xdr;
-		memcpy(naddr->netid, p, naddr->netid_len);
+		READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */
+		COPYMEM(naddr->netid, naddr->netid_len);
 
-		if (xdr_stream_decode_u32(argp->xdr, &naddr->addr_len) < 0)
-			return nfserr_bad_xdr;
+		naddr->addr_len = be32_to_cpup(p++);
 		if (naddr->addr_len > RPCBIND_MAXUADDRLEN)
-			return nfserr_bad_xdr;
+			goto xdr_error;
 
-		p = xdr_inline_decode(argp->xdr, naddr->addr_len);
-		if (!p)
-			return nfserr_bad_xdr;
-		memcpy(naddr->addr, p, naddr->addr_len);
+		READ_BUF(naddr->addr_len);
+		COPYMEM(naddr->addr, naddr->addr_len);
 		break;
 	default:
-		return nfserr_bad_xdr;
+		goto xdr_error;
 	}
-
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
 {
-	struct nfsd4_copy *copy = &u->copy;
-	u32 consecutive, i, count, sync;
+	DECODE_HEAD;
 	struct nl4_server *ns_dummy;
-	__be32 status;
+	int i, count;
 
-	memset(copy, 0, sizeof(*copy));
-	status = nfsd4_decode_stateid4(argp, &copy->cp_src_stateid);
+	status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
 	if (status)
 		return status;
-	status = nfsd4_decode_stateid4(argp, &copy->cp_dst_stateid);
+	status = nfsd4_decode_stateid(argp, &copy->cp_dst_stateid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u64(argp->xdr, &copy->cp_src_pos) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &copy->cp_dst_pos) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &copy->cp_count) < 0)
-		return nfserr_bad_xdr;
-	/* ca_consecutive: we always do consecutive copies */
-	if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_bool(argp->xdr, &sync) < 0)
-		return nfserr_bad_xdr;
-	nfsd4_copy_set_sync(copy, sync);
 
-	if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
-		return nfserr_bad_xdr;
-	copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src));
-	if (copy->cp_src == NULL)
-		return nfserr_jukebox;
+	READ_BUF(8 + 8 + 8 + 4 + 4 + 4);
+	p = xdr_decode_hyper(p, &copy->cp_src_pos);
+	p = xdr_decode_hyper(p, &copy->cp_dst_pos);
+	p = xdr_decode_hyper(p, &copy->cp_count);
+	p++; /* ca_consecutive: we always do consecutive copies */
+	copy->cp_synchronous = be32_to_cpup(p++);
+
+	count = be32_to_cpup(p++);
+
+	copy->cp_intra = false;
 	if (count == 0) { /* intra-server copy */
-		__set_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
-		return nfs_ok;
+		copy->cp_intra = true;
+		goto intra;
 	}
 
-	/* decode all the supplied server addresses but use only the first */
-	status = nfsd4_decode_nl4_server(argp, copy->cp_src);
+	/* decode all the supplied server addresses but use first */
+	status = nfsd4_decode_nl4_server(argp, &copy->cp_src);
 	if (status)
 		return status;
 
 	ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL);
 	if (ns_dummy == NULL)
-		return nfserr_jukebox;
+		return nfserrno(-ENOMEM);
 	for (i = 0; i < count - 1; i++) {
 		status = nfsd4_decode_nl4_server(argp, ns_dummy);
 		if (status) {
@@ -2027,80 +1839,44 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
 		}
 	}
 	kfree(ns_dummy);
+intra:
 
-	return nfs_ok;
-}
-
-static __be32
-nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
-			 union nfsd4_op_u *u)
-{
-	struct nfsd4_copy_notify *cn = &u->copy_notify;
-	__be32 status;
-
-	memset(cn, 0, sizeof(*cn));
-	cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src));
-	if (cn->cpn_src == NULL)
-		return nfserr_jukebox;
-	cn->cpn_dst = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_dst));
-	if (cn->cpn_dst == NULL)
-		return nfserr_jukebox;
-
-	status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid);
-	if (status)
-		return status;
-	return nfsd4_decode_nl4_server(argp, cn->cpn_dst);
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
-			    union nfsd4_op_u *u)
+			    struct nfsd4_offload_status *os)
 {
-	struct nfsd4_offload_status *os = &u->offload_status;
-	os->count = 0;
-	os->status = 0;
-	return nfsd4_decode_stateid4(argp, &os->stateid);
+	return nfsd4_decode_stateid(argp, &os->stateid);
 }
 
 static __be32
-nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+			 struct nfsd4_copy_notify *cn)
 {
-	struct nfsd4_seek *seek = &u->seek;
 	__be32 status;
 
-	status = nfsd4_decode_stateid4(argp, &seek->seek_stateid);
+	status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
 	if (status)
 		return status;
-	if (xdr_stream_decode_u64(argp->xdr, &seek->seek_offset) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0)
-		return nfserr_bad_xdr;
-
-	seek->seek_eof = 0;
-	seek->seek_pos = 0;
-	return nfs_ok;
+	return nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
 }
 
 static __be32
-nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
 {
-	struct nfsd4_clone *clone = &u->clone;
-	__be32 status;
+	DECODE_HEAD;
 
-	status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid);
+	status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
 	if (status)
 		return status;
-	status = nfsd4_decode_stateid4(argp, &clone->cl_dst_stateid);
-	if (status)
-		return status;
-	if (xdr_stream_decode_u64(argp->xdr, &clone->cl_src_pos) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &clone->cl_dst_pos) < 0)
-		return nfserr_bad_xdr;
-	if (xdr_stream_decode_u64(argp->xdr, &clone->cl_count) < 0)
-		return nfserr_bad_xdr;
 
-	return nfs_ok;
+	READ_BUF(8 + 4);
+	p = xdr_decode_hyper(p, &seek->seek_offset);
+	seek->seek_whence = be32_to_cpup(p);
+
+	DECODE_TAIL;
 }
 
 /*
@@ -2113,14 +1889,13 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
  */
 
 /*
- * Decode data into buffer.
+ * Decode data into buffer. Uses head and pages constructed by
+ * svcxdr_construct_vector.
  */
 static __be32
-nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr,
-		       char **bufp, u32 buflen)
+nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head,
+		       struct page **pages, char **bufp, u32 buflen)
 {
-	struct page **pages = xdr->pages;
-	struct kvec *head = xdr->head;
 	char *tmp, *dp;
 	u32 len;
 
@@ -2163,22 +1938,25 @@ nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr,
 static __be32
 nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
 {
+	DECODE_HEAD;
 	char *name, *sp, *dp;
 	u32 namelen, cnt;
-	__be32 *p;
 
-	if (xdr_stream_decode_u32(argp->xdr, &namelen) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	namelen = be32_to_cpup(p++);
+
 	if (namelen > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN))
 		return nfserr_nametoolong;
+
 	if (namelen == 0)
-		return nfserr_bad_xdr;
-	p = xdr_inline_decode(argp->xdr, namelen);
-	if (!p)
-		return nfserr_bad_xdr;
+		goto xdr_error;
+
+	READ_BUF(namelen);
+
 	name = svcxdr_tmpalloc(argp, namelen + XATTR_USER_PREFIX_LEN + 1);
 	if (!name)
 		return nfserr_jukebox;
+
 	memcpy(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
 
 	/*
@@ -2191,14 +1969,14 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
 
 	while (cnt-- > 0) {
 		if (*sp == '\0')
-			return nfserr_bad_xdr;
+			goto xdr_error;
 		*dp++ = *sp++;
 	}
 	*dp = '\0';
 
 	*namep = name;
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 /*
@@ -2209,13 +1987,11 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
  */
 static __be32
 nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
-		      union nfsd4_op_u *u)
+		      struct nfsd4_getxattr *getxattr)
 {
-	struct nfsd4_getxattr *getxattr = &u->getxattr;
 	__be32 status;
 	u32 maxcount;
 
-	memset(getxattr, 0, sizeof(*getxattr));
 	status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name);
 	if (status)
 		return status;
@@ -2224,21 +2000,21 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
 	maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount);
 
 	getxattr->getxa_len = maxcount;
-	return nfs_ok;
+
+	return status;
 }
 
 static __be32
 nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
-		      union nfsd4_op_u *u)
+		      struct nfsd4_setxattr *setxattr)
 {
-	struct nfsd4_setxattr *setxattr = &u->setxattr;
+	DECODE_HEAD;
 	u32 flags, maxcount, size;
-	__be32 status;
+	struct kvec head;
+	struct page **pagelist;
 
-	memset(setxattr, 0, sizeof(*setxattr));
-
-	if (xdr_stream_decode_u32(argp->xdr, &flags) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	flags = be32_to_cpup(p++);
 
 	if (flags > SETXATTR4_REPLACE)
 		return nfserr_inval;
@@ -2251,35 +2027,33 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
 	maxcount = svc_max_payload(argp->rqstp);
 	maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount);
 
-	if (xdr_stream_decode_u32(argp->xdr, &size) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(4);
+	size = be32_to_cpup(p++);
 	if (size > maxcount)
 		return nfserr_xattr2big;
 
 	setxattr->setxa_len = size;
 	if (size > 0) {
-		struct xdr_buf payload;
+		status = svcxdr_construct_vector(argp, &head, &pagelist, size);
+		if (status)
+			return status;
 
-		if (!xdr_stream_subsegment(argp->xdr, &payload, size))
-			return nfserr_bad_xdr;
-		status = nfsd4_vbuf_from_vector(argp, &payload,
-						&setxattr->setxa_buf, size);
+		status = nfsd4_vbuf_from_vector(argp, &head, pagelist,
+		    &setxattr->setxa_buf, size);
 	}
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
-			union nfsd4_op_u *u)
+			struct nfsd4_listxattrs *listxattrs)
 {
-	struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
+	DECODE_HEAD;
 	u32 maxcount;
 
-	memset(listxattrs, 0, sizeof(*listxattrs));
-
-	if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0)
-		return nfserr_bad_xdr;
+	READ_BUF(12);
+	p = xdr_decode_hyper(p, &listxattrs->lsxa_cookie);
 
 	/*
 	 * If the cookie  is too large to have even one user.x attribute
@@ -2289,8 +2063,7 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
 	    (XATTR_LIST_MAX / (XATTR_USER_PREFIX_LEN + 2)))
 		return nfserr_badcookie;
 
-	if (xdr_stream_decode_u32(argp->xdr, &maxcount) < 0)
-		return nfserr_bad_xdr;
+	maxcount = be32_to_cpup(p++);
 	if (maxcount < 8)
 		/* Always need at least 2 words (length and one character) */
 		return nfserr_inval;
@@ -2298,119 +2071,117 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
 	maxcount = min(maxcount, svc_max_payload(argp->rqstp));
 	listxattrs->lsxa_maxcount = maxcount;
 
-	return nfs_ok;
+	DECODE_TAIL;
 }
 
 static __be32
 nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp,
-			 union nfsd4_op_u *u)
+			 struct nfsd4_removexattr *removexattr)
 {
-	struct nfsd4_removexattr *removexattr = &u->removexattr;
-	memset(removexattr, 0, sizeof(*removexattr));
 	return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name);
 }
 
 static __be32
-nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
 {
 	return nfs_ok;
 }
 
 static __be32
-nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
 {
 	return nfserr_notsupp;
 }
 
-typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u);
+typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
 
 static const nfsd4_dec nfsd4_dec_ops[] = {
-	[OP_ACCESS]		= nfsd4_decode_access,
-	[OP_CLOSE]		= nfsd4_decode_close,
-	[OP_COMMIT]		= nfsd4_decode_commit,
-	[OP_CREATE]		= nfsd4_decode_create,
-	[OP_DELEGPURGE]		= nfsd4_decode_notsupp,
-	[OP_DELEGRETURN]	= nfsd4_decode_delegreturn,
-	[OP_GETATTR]		= nfsd4_decode_getattr,
-	[OP_GETFH]		= nfsd4_decode_noop,
-	[OP_LINK]		= nfsd4_decode_link,
-	[OP_LOCK]		= nfsd4_decode_lock,
-	[OP_LOCKT]		= nfsd4_decode_lockt,
-	[OP_LOCKU]		= nfsd4_decode_locku,
-	[OP_LOOKUP]		= nfsd4_decode_lookup,
-	[OP_LOOKUPP]		= nfsd4_decode_noop,
-	[OP_NVERIFY]		= nfsd4_decode_verify,
-	[OP_OPEN]		= nfsd4_decode_open,
-	[OP_OPENATTR]		= nfsd4_decode_notsupp,
-	[OP_OPEN_CONFIRM]	= nfsd4_decode_open_confirm,
-	[OP_OPEN_DOWNGRADE]	= nfsd4_decode_open_downgrade,
-	[OP_PUTFH]		= nfsd4_decode_putfh,
-	[OP_PUTPUBFH]		= nfsd4_decode_putpubfh,
-	[OP_PUTROOTFH]		= nfsd4_decode_noop,
-	[OP_READ]		= nfsd4_decode_read,
-	[OP_READDIR]		= nfsd4_decode_readdir,
-	[OP_READLINK]		= nfsd4_decode_noop,
-	[OP_REMOVE]		= nfsd4_decode_remove,
-	[OP_RENAME]		= nfsd4_decode_rename,
-	[OP_RENEW]		= nfsd4_decode_renew,
-	[OP_RESTOREFH]		= nfsd4_decode_noop,
-	[OP_SAVEFH]		= nfsd4_decode_noop,
-	[OP_SECINFO]		= nfsd4_decode_secinfo,
-	[OP_SETATTR]		= nfsd4_decode_setattr,
-	[OP_SETCLIENTID]	= nfsd4_decode_setclientid,
-	[OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm,
-	[OP_VERIFY]		= nfsd4_decode_verify,
-	[OP_WRITE]		= nfsd4_decode_write,
-	[OP_RELEASE_LOCKOWNER]	= nfsd4_decode_release_lockowner,
+	[OP_ACCESS]		= (nfsd4_dec)nfsd4_decode_access,
+	[OP_CLOSE]		= (nfsd4_dec)nfsd4_decode_close,
+	[OP_COMMIT]		= (nfsd4_dec)nfsd4_decode_commit,
+	[OP_CREATE]		= (nfsd4_dec)nfsd4_decode_create,
+	[OP_DELEGPURGE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_DELEGRETURN]	= (nfsd4_dec)nfsd4_decode_delegreturn,
+	[OP_GETATTR]		= (nfsd4_dec)nfsd4_decode_getattr,
+	[OP_GETFH]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_LINK]		= (nfsd4_dec)nfsd4_decode_link,
+	[OP_LOCK]		= (nfsd4_dec)nfsd4_decode_lock,
+	[OP_LOCKT]		= (nfsd4_dec)nfsd4_decode_lockt,
+	[OP_LOCKU]		= (nfsd4_dec)nfsd4_decode_locku,
+	[OP_LOOKUP]		= (nfsd4_dec)nfsd4_decode_lookup,
+	[OP_LOOKUPP]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_NVERIFY]		= (nfsd4_dec)nfsd4_decode_verify,
+	[OP_OPEN]		= (nfsd4_dec)nfsd4_decode_open,
+	[OP_OPENATTR]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OPEN_CONFIRM]	= (nfsd4_dec)nfsd4_decode_open_confirm,
+	[OP_OPEN_DOWNGRADE]	= (nfsd4_dec)nfsd4_decode_open_downgrade,
+	[OP_PUTFH]		= (nfsd4_dec)nfsd4_decode_putfh,
+	[OP_PUTPUBFH]		= (nfsd4_dec)nfsd4_decode_putpubfh,
+	[OP_PUTROOTFH]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_READ]		= (nfsd4_dec)nfsd4_decode_read,
+	[OP_READDIR]		= (nfsd4_dec)nfsd4_decode_readdir,
+	[OP_READLINK]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_REMOVE]		= (nfsd4_dec)nfsd4_decode_remove,
+	[OP_RENAME]		= (nfsd4_dec)nfsd4_decode_rename,
+	[OP_RENEW]		= (nfsd4_dec)nfsd4_decode_renew,
+	[OP_RESTOREFH]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_SAVEFH]		= (nfsd4_dec)nfsd4_decode_noop,
+	[OP_SECINFO]		= (nfsd4_dec)nfsd4_decode_secinfo,
+	[OP_SETATTR]		= (nfsd4_dec)nfsd4_decode_setattr,
+	[OP_SETCLIENTID]	= (nfsd4_dec)nfsd4_decode_setclientid,
+	[OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
+	[OP_VERIFY]		= (nfsd4_dec)nfsd4_decode_verify,
+	[OP_WRITE]		= (nfsd4_dec)nfsd4_decode_write,
+	[OP_RELEASE_LOCKOWNER]	= (nfsd4_dec)nfsd4_decode_release_lockowner,
 
 	/* new operations for NFSv4.1 */
-	[OP_BACKCHANNEL_CTL]	= nfsd4_decode_backchannel_ctl,
-	[OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session,
-	[OP_EXCHANGE_ID]	= nfsd4_decode_exchange_id,
-	[OP_CREATE_SESSION]	= nfsd4_decode_create_session,
-	[OP_DESTROY_SESSION]	= nfsd4_decode_destroy_session,
-	[OP_FREE_STATEID]	= nfsd4_decode_free_stateid,
-	[OP_GET_DIR_DELEGATION]	= nfsd4_decode_notsupp,
+	[OP_BACKCHANNEL_CTL]	= (nfsd4_dec)nfsd4_decode_backchannel_ctl,
+	[OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
+	[OP_EXCHANGE_ID]	= (nfsd4_dec)nfsd4_decode_exchange_id,
+	[OP_CREATE_SESSION]	= (nfsd4_dec)nfsd4_decode_create_session,
+	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
+	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_free_stateid,
+	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
 #ifdef CONFIG_NFSD_PNFS
-	[OP_GETDEVICEINFO]	= nfsd4_decode_getdeviceinfo,
-	[OP_GETDEVICELIST]	= nfsd4_decode_notsupp,
-	[OP_LAYOUTCOMMIT]	= nfsd4_decode_layoutcommit,
-	[OP_LAYOUTGET]		= nfsd4_decode_layoutget,
-	[OP_LAYOUTRETURN]	= nfsd4_decode_layoutreturn,
+	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_getdeviceinfo,
+	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_layoutcommit,
+	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_layoutget,
+	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_layoutreturn,
 #else
-	[OP_GETDEVICEINFO]	= nfsd4_decode_notsupp,
-	[OP_GETDEVICELIST]	= nfsd4_decode_notsupp,
-	[OP_LAYOUTCOMMIT]	= nfsd4_decode_notsupp,
-	[OP_LAYOUTGET]		= nfsd4_decode_notsupp,
-	[OP_LAYOUTRETURN]	= nfsd4_decode_notsupp,
+	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
 #endif
-	[OP_SECINFO_NO_NAME]	= nfsd4_decode_secinfo_no_name,
-	[OP_SEQUENCE]		= nfsd4_decode_sequence,
-	[OP_SET_SSV]		= nfsd4_decode_notsupp,
-	[OP_TEST_STATEID]	= nfsd4_decode_test_stateid,
-	[OP_WANT_DELEGATION]	= nfsd4_decode_notsupp,
-	[OP_DESTROY_CLIENTID]	= nfsd4_decode_destroy_clientid,
-	[OP_RECLAIM_COMPLETE]	= nfsd4_decode_reclaim_complete,
+	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_secinfo_no_name,
+	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
+	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_TEST_STATEID]	= (nfsd4_dec)nfsd4_decode_test_stateid,
+	[OP_WANT_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_DESTROY_CLIENTID]	= (nfsd4_dec)nfsd4_decode_destroy_clientid,
+	[OP_RECLAIM_COMPLETE]	= (nfsd4_dec)nfsd4_decode_reclaim_complete,
 
 	/* new operations for NFSv4.2 */
-	[OP_ALLOCATE]		= nfsd4_decode_fallocate,
-	[OP_COPY]		= nfsd4_decode_copy,
-	[OP_COPY_NOTIFY]	= nfsd4_decode_copy_notify,
-	[OP_DEALLOCATE]		= nfsd4_decode_fallocate,
-	[OP_IO_ADVISE]		= nfsd4_decode_notsupp,
-	[OP_LAYOUTERROR]	= nfsd4_decode_notsupp,
-	[OP_LAYOUTSTATS]	= nfsd4_decode_notsupp,
-	[OP_OFFLOAD_CANCEL]	= nfsd4_decode_offload_status,
-	[OP_OFFLOAD_STATUS]	= nfsd4_decode_offload_status,
-	[OP_READ_PLUS]		= nfsd4_decode_read,
-	[OP_SEEK]		= nfsd4_decode_seek,
-	[OP_WRITE_SAME]		= nfsd4_decode_notsupp,
-	[OP_CLONE]		= nfsd4_decode_clone,
+	[OP_ALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
+	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_copy,
+	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_copy_notify,
+	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_fallocate,
+	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTSTATS]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_offload_status,
+	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_offload_status,
+	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_read,
+	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_seek,
+	[OP_WRITE_SAME]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_CLONE]		= (nfsd4_dec)nfsd4_decode_clone,
 	/* RFC 8276 extended atributes operations */
-	[OP_GETXATTR]		= nfsd4_decode_getxattr,
-	[OP_SETXATTR]		= nfsd4_decode_setxattr,
-	[OP_LISTXATTRS]		= nfsd4_decode_listxattrs,
-	[OP_REMOVEXATTR]	= nfsd4_decode_removexattr,
+	[OP_GETXATTR]		= (nfsd4_dec)nfsd4_decode_getxattr,
+	[OP_SETXATTR]		= (nfsd4_dec)nfsd4_decode_setxattr,
+	[OP_LISTXATTRS]		= (nfsd4_dec)nfsd4_decode_listxattrs,
+	[OP_REMOVEXATTR]	= (nfsd4_dec)nfsd4_decode_removexattr,
 };
 
 static inline bool
@@ -2427,46 +2198,43 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
 	return true;
 }
 
-static bool
+static __be32
 nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 {
+	DECODE_HEAD;
 	struct nfsd4_op *op;
 	bool cachethis = false;
 	int auth_slack= argp->rqstp->rq_auth_slack;
 	int max_reply = auth_slack + 8; /* opcnt, status */
 	int readcount = 0;
 	int readbytes = 0;
-	__be32 *p;
 	int i;
 
-	if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0)
-		return false;
-	max_reply += XDR_UNIT;
-	argp->tag = NULL;
-	if (unlikely(argp->taglen)) {
-		if (argp->taglen > NFSD4_MAX_TAGLEN)
-			return false;
-		p = xdr_inline_decode(argp->xdr, argp->taglen);
-		if (!p)
-			return false;
-		argp->tag = svcxdr_savemem(argp, p, argp->taglen);
-		if (!argp->tag)
-			return false;
-		max_reply += xdr_align_size(argp->taglen);
-	}
+	READ_BUF(4);
+	argp->taglen = be32_to_cpup(p++);
+	READ_BUF(argp->taglen);
+	SAVEMEM(argp->tag, argp->taglen);
+	READ_BUF(8);
+	argp->minorversion = be32_to_cpup(p++);
+	argp->opcnt = be32_to_cpup(p++);
+	max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
 
-	if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
-		return false;
-	if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
-		return false;
-	argp->opcnt = min_t(u32, argp->client_opcnt,
-			    NFSD_MAX_OPS_PER_COMPOUND);
+	if (argp->taglen > NFSD4_MAX_TAGLEN)
+		goto xdr_error;
+	/*
+	 * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS
+	 * here, so we return success at the xdr level so that
+	 * nfsd4_proc can handle this is an NFS-level error.
+	 */
+	if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
+		return 0;
 
 	if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
-		argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops));
+		argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
 		if (!argp->ops) {
 			argp->ops = argp->iops;
-			return false;
+			dprintk("nfsd: couldn't allocate room for COMPOUND\n");
+			goto xdr_error;
 		}
 	}
 
@@ -2476,23 +2244,17 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	for (i = 0; i < argp->opcnt; i++) {
 		op = &argp->ops[i];
 		op->replay = NULL;
-		op->opdesc = NULL;
 
-		if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0)
-			return false;
-		if (nfsd4_opnum_in_range(argp, op)) {
-			op->opdesc = OPDESC(op);
+		READ_BUF(4);
+		op->opnum = be32_to_cpup(p++);
+
+		if (nfsd4_opnum_in_range(argp, op))
 			op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
-			if (op->status != nfs_ok)
-				trace_nfsd_compound_decode_err(argp->rqstp,
-							       argp->opcnt, i,
-							       op->opnum,
-							       op->status);
-		} else {
+		else {
 			op->opnum = OP_ILLEGAL;
 			op->status = nfserr_op_illegal;
 		}
-
+		op->opdesc = OPDESC(op);
 		/*
 		 * We'll try to cache the result in the DRC if any one
 		 * op in the compound wants to be cached:
@@ -2527,7 +2289,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
 		clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
 
-	return true;
+	DECODE_TAIL;
 }
 
 static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
@@ -2536,25 +2298,15 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
 	if (exp->ex_flags & NFSEXP_V4ROOT) {
 		*p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
 		*p++ = 0;
-	} else
+	} else if (IS_I_VERSION(inode)) {
 		p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode));
+	} else {
+		*p++ = cpu_to_be32(stat->ctime.tv_sec);
+		*p++ = cpu_to_be32(stat->ctime.tv_nsec);
+	}
 	return p;
 }
 
-static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
-				    struct timespec64 *tv)
-{
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, XDR_UNIT * 3);
-	if (!p)
-		return nfserr_resource;
-
-	p = xdr_encode_hyper(p, (s64)tv->tv_sec);
-	*p = cpu_to_be32(tv->tv_nsec);
-	return nfs_ok;
-}
-
 /*
  * ctime (in NFSv4, time_metadata) is not writeable, and the client
  * doesn't really care what resolution could theoretically be stored by
@@ -2583,8 +2335,15 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
 static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
 {
 	*p++ = cpu_to_be32(c->atomic);
-	p = xdr_encode_hyper(p, c->before_change);
-	p = xdr_encode_hyper(p, c->after_change);
+	if (c->change_supported) {
+		p = xdr_encode_hyper(p, c->before_change);
+		p = xdr_encode_hyper(p, c->after_change);
+	} else {
+		*p++ = cpu_to_be32(c->before_ctime_sec);
+		*p++ = cpu_to_be32(c->before_ctime_nsec);
+		*p++ = cpu_to_be32(c->after_ctime_sec);
+		*p++ = cpu_to_be32(c->after_ctime_nsec);
+	}
 	return p;
 }
 
@@ -2799,7 +2558,7 @@ static u32 nfs4_file_type(umode_t mode)
 	case S_IFREG:	return NF4REG;
 	case S_IFSOCK:	return NF4SOCK;
 	default:	return NF4BAD;
-	}
+	};
 }
 
 static inline __be32
@@ -2883,10 +2642,9 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32
 }
 
 
-static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino)
+static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
 {
 	struct path path = exp->ex_path;
-	struct kstat stat;
 	int err;
 
 	path_get(&path);
@@ -2894,10 +2652,8 @@ static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino)
 		if (path.dentry != path.mnt->mnt_root)
 			break;
 	}
-	err = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
+	err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	path_put(&path);
-	if (!err)
-		*pino = stat.ino;
 	return err;
 }
 
@@ -2950,9 +2706,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 	struct kstat stat;
 	struct svc_fh *tempfh = NULL;
 	struct kstatfs statfs;
-	__be32 *p, *attrlen_p;
+	__be32 *p;
 	int starting_len = xdr->buf->len;
 	int attrlen_offset;
+	__be32 attrlen;
 	u32 dummy;
 	u64 dummy64;
 	u32 rdattr_err = 0;
@@ -2984,9 +2741,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 	err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		goto out_nfserr;
-	if (!(stat.result_mask & STATX_BTIME))
-		/* underlying FS does not offer btime so we can't share it */
-		bmval1 &= ~FATTR4_WORD1_TIME_CREATE;
 	if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
 			FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
 	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
@@ -3040,9 +2794,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 		goto out;
 
 	attrlen_offset = xdr->buf->len;
-	attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
-	if (!attrlen_p)
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		goto out_resource;
+	p++;                /* to be backfilled later */
 
 	if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
 		u32 supp[3];
@@ -3228,7 +2983,7 @@ out_acl:
 		p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4);
 		if (!p)
 			goto out_resource;
-		p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw,
+		p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base,
 					fhp->fh_handle.fh_size);
 	}
 	if (bmval0 & FATTR4_WORD0_FILEID) {
@@ -3360,14 +3115,11 @@ out_acl:
 		p = xdr_encode_hyper(p, dummy64);
 	}
 	if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
-		status = nfsd4_encode_nfstime4(xdr, &stat.atime);
-		if (status)
-			goto out;
-	}
-	if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
-		status = nfsd4_encode_nfstime4(xdr, &stat.btime);
-		if (status)
-			goto out;
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
+			goto out_resource;
+		p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
+		*p++ = cpu_to_be32(stat.atime.tv_nsec);
 	}
 	if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
 		p = xdr_reserve_space(xdr, 12);
@@ -3376,31 +3128,36 @@ out_acl:
 		p = encode_time_delta(p, d_inode(dentry));
 	}
 	if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
-		status = nfsd4_encode_nfstime4(xdr, &stat.ctime);
-		if (status)
-			goto out;
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
+			goto out_resource;
+		p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
+		*p++ = cpu_to_be32(stat.ctime.tv_nsec);
 	}
 	if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
-		status = nfsd4_encode_nfstime4(xdr, &stat.mtime);
-		if (status)
-			goto out;
+		p = xdr_reserve_space(xdr, 12);
+		if (!p)
+			goto out_resource;
+		p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
+		*p++ = cpu_to_be32(stat.mtime.tv_nsec);
 	}
 	if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+		struct kstat parent_stat;
 		u64 ino = stat.ino;
 
 		p = xdr_reserve_space(xdr, 8);
 		if (!p)
                 	goto out_resource;
 		/*
-		 * Get ino of mountpoint in parent filesystem, if not ignoring
-		 * crossmount and this is the root of a cross-mounted
-		 * filesystem.
+		 * Get parent's attributes if not ignoring crossmount
+		 * and this is the root of a cross-mounted filesystem.
 		 */
 		if (ignore_crossmnt == 0 &&
 		    dentry == exp->ex_path.mnt->mnt_root) {
-			err = nfsd4_get_mounted_on_ino(exp, &ino);
+			err = get_parent_attributes(exp, &parent_stat);
 			if (err)
 				goto out_nfserr;
+			ino = parent_stat.ino;
 		}
 		p = xdr_encode_hyper(p, ino);
 	}
@@ -3437,6 +3194,16 @@ out_acl:
 			goto out;
 	}
 
+	if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
+			goto out_resource;
+		if (IS_I_VERSION(d_inode(dentry)))
+			*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR);
+		else
+			*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
+	}
+
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 	if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
 		status = nfsd4_encode_security_label(xdr, rqstp, context,
@@ -3455,7 +3222,8 @@ out_acl:
 		*p++ = cpu_to_be32(err == 0);
 	}
 
-	*attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+	attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
+	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
 	status = nfs_ok;
 
 out:
@@ -3624,7 +3392,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
 	p = xdr_reserve_space(xdr, 3*4 + namlen);
 	if (!p)
 		goto fail;
-	p = xdr_encode_hyper(p, OFFSET_MAX);        /* offset of next entry */
+	p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
 	p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
 	nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
@@ -3708,11 +3476,9 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
 }
 
 static __be32
-nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr,
-		    union nfsd4_op_u *u)
+nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
 {
-	struct nfsd4_access *access = &u->access;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 8);
@@ -3723,11 +3489,9 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr,
 	return 0;
 }
 
-static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr,
-						union nfsd4_op_u *u)
+static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
 {
-	struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8);
@@ -3742,22 +3506,18 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
 }
 
 static __be32
-nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr,
-		   union nfsd4_op_u *u)
+nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
 {
-	struct nfsd4_close *close = &u->close;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	return nfsd4_encode_stateid(xdr, &close->cl_stateid);
 }
 
 
 static __be32
-nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
-		    union nfsd4_op_u *u)
+nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
 {
-	struct nfsd4_commit *commit = &u->commit;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
@@ -3769,11 +3529,9 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
-		    union nfsd4_op_u *u)
+nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
 {
-	struct nfsd4_create *create = &u->create;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 20);
@@ -3785,23 +3543,19 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr,
-		     union nfsd4_op_u *u)
+nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
 {
-	struct nfsd4_getattr *getattr = &u->getattr;
 	struct svc_fh *fhp = getattr->ga_fhp;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry,
 				    getattr->ga_bmval, resp->rqstp, 0);
 }
 
 static __be32
-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr,
-		   union nfsd4_op_u *u)
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
 {
-	struct svc_fh **fhpp = &u->getfh;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	struct svc_fh *fhp = *fhpp;
 	unsigned int len;
 	__be32 *p;
@@ -3810,7 +3564,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr,
 	p = xdr_reserve_space(xdr, len + 4);
 	if (!p)
 		return nfserr_resource;
-	p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, len);
+	p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len);
 	return 0;
 }
 
@@ -3854,11 +3608,9 @@ again:
 }
 
 static __be32
-nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr,
-		  union nfsd4_op_u *u)
+nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
 {
-	struct nfsd4_lock *lock = &u->lock;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	if (!nfserr)
 		nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid);
@@ -3869,11 +3621,9 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr,
-		   union nfsd4_op_u *u)
+nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
 {
-	struct nfsd4_lockt *lockt = &u->lockt;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	if (nfserr == nfserr_denied)
 		nfsd4_encode_lock_denied(xdr, &lockt->lt_denied);
@@ -3881,22 +3631,18 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr,
-		   union nfsd4_op_u *u)
+nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
 {
-	struct nfsd4_locku *locku = &u->locku;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	return nfsd4_encode_stateid(xdr, &locku->lu_stateid);
 }
 
 
 static __be32
-nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
-		  union nfsd4_op_u *u)
+nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
 {
-	struct nfsd4_link *link = &u->link;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 20);
@@ -3908,11 +3654,9 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 
 static __be32
-nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
-		  union nfsd4_op_u *u)
+nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
 {
-	struct nfsd4_open *open = &u->open;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
@@ -4004,21 +3748,17 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr,
-			  union nfsd4_op_u *u)
+nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
 {
-	struct nfsd4_open_confirm *oc = &u->open_confirm;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
 }
 
 static __be32
-nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
-			    union nfsd4_op_u *u)
+nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
 {
-	struct nfsd4_open_downgrade *od = &u->open_downgrade;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	return nfsd4_encode_stateid(xdr, &od->od_stateid);
 }
@@ -4028,28 +3768,33 @@ static __be32 nfsd4_encode_splice_read(
 				struct nfsd4_read *read,
 				struct file *file, unsigned long maxcount)
 {
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	struct xdr_buf *buf = xdr->buf;
-	int status, space_left;
+	u32 eof;
+	int space_left;
 	__be32 nfserr;
+	__be32 *p = xdr->p - 2;
 
 	/* Make sure there will be room for padding if needed */
 	if (xdr->end - xdr->p < 1)
 		return nfserr_resource;
 
 	nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
-				  file, read->rd_offset, &maxcount,
-				  &read->rd_eof);
+				  file, read->rd_offset, &maxcount, &eof);
 	read->rd_length = maxcount;
-	if (nfserr)
-		goto out_err;
-	status = svc_encode_result_payload(read->rd_rqstp,
-					   buf->head[0].iov_len, maxcount);
-	if (status) {
-		nfserr = nfserrno(status);
-		goto out_err;
+	if (nfserr) {
+		/*
+		 * nfsd_splice_actor may have already messed with the
+		 * page length; reset it so as not to confuse
+		 * xdr_truncate_encode:
+		 */
+		buf->page_len = 0;
+		return nfserr;
 	}
 
+	*(p++) = htonl(eof);
+	*(p++) = htonl(maxcount);
+
 	buf->page_len = maxcount;
 	buf->len += maxcount;
 	xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
@@ -4075,25 +3820,18 @@ static __be32 nfsd4_encode_splice_read(
 	xdr->end = (__be32 *)((void *)xdr->end + space_left);
 
 	return 0;
-
-out_err:
-	/*
-	 * nfsd_splice_actor may have already messed with the
-	 * page length; reset it so as not to confuse
-	 * xdr_truncate_encode in our caller.
-	 */
-	buf->page_len = 0;
-	return nfserr;
 }
 
 static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 				 struct nfsd4_read *read,
 				 struct file *file, unsigned long maxcount)
 {
-	struct xdr_stream *xdr = resp->xdr;
-	unsigned int starting_len = xdr->buf->len;
-	__be32 zero = xdr_zero;
+	struct xdr_stream *xdr = &resp->xdr;
+	u32 eof;
+	int starting_len = xdr->buf->len - 8;
 	__be32 nfserr;
+	__be32 tmp;
+	int pad;
 
 	read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
 	if (read->rd_vlen < 0)
@@ -4101,27 +3839,33 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 
 	nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
 			    resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
-			    &read->rd_eof);
+			    &eof);
 	read->rd_length = maxcount;
 	if (nfserr)
 		return nfserr;
-	if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount))
+	if (svc_encode_read_payload(resp->rqstp, starting_len + 8, maxcount))
 		return nfserr_io;
-	xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount));
+	xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount));
+
+	tmp = htonl(eof);
+	write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
+	tmp = htonl(maxcount);
+	write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+
+	tmp = xdr_zero;
+	pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
+	write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
+								&tmp, pad);
+	return 0;
 
-	write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero,
-			       xdr_pad_size(maxcount));
-	return nfs_ok;
 }
 
 static __be32
 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
-		  union nfsd4_op_u *u)
+		  struct nfsd4_read *read)
 {
-	struct nfsd4_read *read = &u->read;
-	bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
 	unsigned long maxcount;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	struct file *file;
 	int starting_len = xdr->buf->len;
 	__be32 *p;
@@ -4132,44 +3876,45 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
 	if (!p) {
-		WARN_ON_ONCE(splice_ok);
+		WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
 		return nfserr_resource;
 	}
-	if (resp->xdr->buf->page_len && splice_ok) {
+	if (resp->xdr.buf->page_len &&
+	    test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
 		WARN_ON_ONCE(1);
 		return nfserr_serverfault;
 	}
 	xdr_commit_encode(xdr);
 
-	maxcount = min_t(unsigned long, read->rd_length,
+	maxcount = svc_max_payload(resp->rqstp);
+	maxcount = min_t(unsigned long, maxcount,
 			 (xdr->buf->buflen - xdr->buf->len));
+	maxcount = min_t(unsigned long, maxcount, read->rd_length);
 
-	if (file->f_op->splice_read && splice_ok)
+	if (file->f_op->splice_read &&
+	    test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
 		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
 	else
 		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
-	if (nfserr) {
-		xdr_truncate_encode(xdr, starting_len);
-		return nfserr;
-	}
 
-	p = xdr_encode_bool(p, read->rd_eof);
-	*p = cpu_to_be32(read->rd_length);
-	return nfs_ok;
+	if (nfserr)
+		xdr_truncate_encode(xdr, starting_len);
+
+	return nfserr;
 }
 
 static __be32
-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
-		      union nfsd4_op_u *u)
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
 {
-	struct nfsd4_readlink *readlink = &u->readlink;
-	__be32 *p, *maxcount_p, zero = xdr_zero;
-	struct xdr_stream *xdr = resp->xdr;
+	int maxcount;
+	__be32 wire_count;
+	int zero = 0;
+	struct xdr_stream *xdr = &resp->xdr;
 	int length_offset = xdr->buf->len;
-	int maxcount, status;
+	__be32 *p;
 
-	maxcount_p = xdr_reserve_space(xdr, XDR_UNIT);
-	if (!maxcount_p)
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
 		return nfserr_resource;
 	maxcount = PAGE_SIZE;
 
@@ -4186,35 +3931,28 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
 						(char *)p, &maxcount);
 	if (nfserr == nfserr_isdir)
 		nfserr = nfserr_inval;
-	if (nfserr)
-		goto out_err;
-	status = svc_encode_result_payload(readlink->rl_rqstp, length_offset,
-					   maxcount);
-	if (status) {
-		nfserr = nfserrno(status);
-		goto out_err;
+	if (nfserr) {
+		xdr_truncate_encode(xdr, length_offset);
+		return nfserr;
 	}
-	*maxcount_p = cpu_to_be32(maxcount);
-	xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount));
-	write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero,
-			       xdr_pad_size(maxcount));
-	return nfs_ok;
 
-out_err:
-	xdr_truncate_encode(xdr, length_offset);
-	return nfserr;
+	wire_count = htonl(maxcount);
+	write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
+	xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4));
+	if (maxcount & 3)
+		write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
+						&zero, 4 - (maxcount&3));
+	return 0;
 }
 
 static __be32
-nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
-		     union nfsd4_op_u *u)
+nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
-	struct nfsd4_readdir *readdir = &u->readdir;
 	int maxcount;
 	int bytes_left;
 	loff_t offset;
 	__be64 wire_offset;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	int starting_len = xdr->buf->len;
 	__be32 *p;
 
@@ -4225,8 +3963,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
 	*p++ = cpu_to_be32(0);
 	*p++ = cpu_to_be32(0);
-	xdr->buf->head[0].iov_len = (char *)xdr->p -
-				    (char *)xdr->buf->head[0].iov_base;
+	resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
+				- (char *)resp->xdr.buf->head[0].iov_base;
 
 	/*
 	 * Number of bytes left for directory entries allowing for the
@@ -4299,11 +4037,9 @@ err_no_verf:
 }
 
 static __be32
-nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
-		    union nfsd4_op_u *u)
+nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
 {
-	struct nfsd4_remove *remove = &u->remove;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 20);
@@ -4314,11 +4050,9 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
-		    union nfsd4_op_u *u)
+nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
 {
-	struct nfsd4_rename *rename = &u->rename;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 40);
@@ -4399,20 +4133,18 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
 
 static __be32
 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
-		     union nfsd4_op_u *u)
+		     struct nfsd4_secinfo *secinfo)
 {
-	struct nfsd4_secinfo *secinfo = &u->secinfo;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
 }
 
 static __be32
 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
-		     union nfsd4_op_u *u)
+		     struct nfsd4_secinfo_no_name *secinfo)
 {
-	struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 
 	return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
 }
@@ -4422,11 +4154,9 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
  * regardless of the error status.
  */
 static __be32
-nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr,
-		     union nfsd4_op_u *u)
+nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
 {
-	struct nfsd4_setattr *setattr = &u->setattr;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 16);
@@ -4448,11 +4178,9 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
-			 union nfsd4_op_u *u)
+nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
 {
-	struct nfsd4_setclientid *scd = &u->setclientid;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (!nfserr) {
@@ -4474,11 +4202,9 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
-		   union nfsd4_op_u *u)
+nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
 {
-	struct nfsd4_write *write = &u->write;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 16);
@@ -4493,10 +4219,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 static __be32
 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
-			 union nfsd4_op_u *u)
+			 struct nfsd4_exchange_id *exid)
 {
-	struct nfsd4_exchange_id *exid = &u->exchange_id;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 	char *major_id;
 	char *server_scope;
@@ -4572,10 +4297,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 static __be32
 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
-			    union nfsd4_op_u *u)
+			    struct nfsd4_create_session *sess)
 {
-	struct nfsd4_create_session *sess = &u->create_session;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 24);
@@ -4626,10 +4350,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 static __be32
 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
-		      union nfsd4_op_u *u)
+		      struct nfsd4_sequence *seq)
 {
-	struct nfsd4_sequence *seq = &u->sequence;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20);
@@ -4650,10 +4373,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 static __be32
 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
-			  union nfsd4_op_u *u)
+			  struct nfsd4_test_stateid *test_stateid)
 {
-	struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	struct nfsd4_test_stateid_id *stateid, *next;
 	__be32 *p;
 
@@ -4672,10 +4394,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
 #ifdef CONFIG_NFSD_PNFS
 static __be32
 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
-		union nfsd4_op_u *u)
+		struct nfsd4_getdeviceinfo *gdev)
 {
-	struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	const struct nfsd4_layout_ops *ops;
 	u32 starting_len = xdr->buf->len, needed_len;
 	__be32 *p;
@@ -4726,10 +4447,9 @@ toosmall:
 
 static __be32
 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
-		union nfsd4_op_u *u)
+		struct nfsd4_layoutget *lgp)
 {
-	struct nfsd4_layoutget *lgp = &u->layoutget;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	const struct nfsd4_layout_ops *ops;
 	__be32 *p;
 
@@ -4754,10 +4474,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 static __be32
 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
-			  union nfsd4_op_u *u)
+			  struct nfsd4_layoutcommit *lcp)
 {
-	struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 4);
@@ -4776,10 +4495,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 static __be32
 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
-		union nfsd4_op_u *u)
+		struct nfsd4_layoutreturn *lrp)
 {
-	struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 4);
@@ -4797,7 +4515,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
 		struct nfsd42_write_res *write, bool sync)
 {
 	__be32 *p;
-	p = xdr_reserve_space(resp->xdr, 4);
+	p = xdr_reserve_space(&resp->xdr, 4);
 	if (!p)
 		return nfserr_resource;
 
@@ -4806,11 +4524,11 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
 	else {
 		__be32 nfserr;
 		*p++ = cpu_to_be32(1);
-		nfserr = nfsd4_encode_stateid(resp->xdr, &write->cb_stateid);
+		nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid);
 		if (nfserr)
 			return nfserr;
 	}
-	p = xdr_reserve_space(resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE);
+	p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE);
 	if (!p)
 		return nfserr_resource;
 
@@ -4824,7 +4542,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
 static __be32
 nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
 {
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	struct nfs42_netaddr *addr;
 	__be32 *p;
 
@@ -4863,28 +4581,26 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
 
 static __be32
 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
-		  union nfsd4_op_u *u)
+		  struct nfsd4_copy *copy)
 {
-	struct nfsd4_copy *copy = &u->copy;
 	__be32 *p;
 
 	nfserr = nfsd42_encode_write_res(resp, &copy->cp_res,
-					 nfsd4_copy_is_sync(copy));
+			copy->cp_synchronous);
 	if (nfserr)
 		return nfserr;
 
-	p = xdr_reserve_space(resp->xdr, 4 + 4);
+	p = xdr_reserve_space(&resp->xdr, 4 + 4);
 	*p++ = xdr_one; /* cr_consecutive */
-	*p = nfsd4_copy_is_sync(copy) ? xdr_one : xdr_zero;
+	*p++ = cpu_to_be32(copy->cp_synchronous);
 	return 0;
 }
 
 static __be32
 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
-			    union nfsd4_op_u *u)
+			    struct nfsd4_offload_status *os)
 {
-	struct nfsd4_offload_status *os = &u->offload_status;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 8 + 4);
@@ -4897,84 +4613,159 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 static __be32
 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
-			    struct nfsd4_read *read)
+			    struct nfsd4_read *read,
+			    unsigned long *maxcount, u32 *eof,
+			    loff_t *pos)
 {
-	bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
+	struct xdr_stream *xdr = &resp->xdr;
 	struct file *file = read->rd_nf->nf_file;
-	struct xdr_stream *xdr = resp->xdr;
-	unsigned long maxcount;
-	__be32 nfserr, *p;
+	int starting_len = xdr->buf->len;
+	loff_t hole_pos;
+	__be32 nfserr;
+	__be32 *p, tmp;
+	__be64 tmp64;
+
+	hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
+	if (hole_pos > read->rd_offset)
+		*maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
+	*maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
 
 	/* Content type, offset, byte count */
 	p = xdr_reserve_space(xdr, 4 + 8 + 4);
 	if (!p)
-		return nfserr_io;
-	if (resp->xdr->buf->page_len && splice_ok) {
-		WARN_ON_ONCE(splice_ok);
-		return nfserr_serverfault;
-	}
+		return nfserr_resource;
 
-	maxcount = min_t(unsigned long, read->rd_length,
-			 (xdr->buf->buflen - xdr->buf->len));
+	read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
+	if (read->rd_vlen < 0)
+		return nfserr_resource;
 
-	if (file->f_op->splice_read && splice_ok)
-		nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
-	else
-		nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+	nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
+			    resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
 	if (nfserr)
 		return nfserr;
+	xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
 
-	*p++ = cpu_to_be32(NFS4_CONTENT_DATA);
-	p = xdr_encode_hyper(p, read->rd_offset);
-	*p = cpu_to_be32(read->rd_length);
+	tmp = htonl(NFS4_CONTENT_DATA);
+	write_bytes_to_xdr_buf(xdr->buf, starting_len,      &tmp,   4);
+	tmp64 = cpu_to_be64(read->rd_offset);
+	write_bytes_to_xdr_buf(xdr->buf, starting_len + 4,  &tmp64, 8);
+	tmp = htonl(*maxcount);
+	write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp,   4);
 
+	tmp = xdr_zero;
+	write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
+			       xdr_pad_size(*maxcount));
+	return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
+			    struct nfsd4_read *read,
+			    unsigned long *maxcount, u32 *eof)
+{
+	struct file *file = read->rd_nf->nf_file;
+	loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
+	loff_t f_size = i_size_read(file_inode(file));
+	unsigned long count;
+	__be32 *p;
+
+	if (data_pos == -ENXIO)
+		data_pos = f_size;
+	else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
+		return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
+	count = data_pos - read->rd_offset;
+
+	/* Content type, offset, byte count */
+	p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8);
+	if (!p)
+		return nfserr_resource;
+
+	*p++ = htonl(NFS4_CONTENT_HOLE);
+	 p   = xdr_encode_hyper(p, read->rd_offset);
+	 p   = xdr_encode_hyper(p, count);
+
+	*eof = (read->rd_offset + count) >= f_size;
+	*maxcount = min_t(unsigned long, count, *maxcount);
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
-		       union nfsd4_op_u *u)
+		       struct nfsd4_read *read)
 {
-	struct nfsd4_read *read = &u->read;
-	struct file *file = read->rd_nf->nf_file;
-	struct xdr_stream *xdr = resp->xdr;
+	unsigned long maxcount, count;
+	struct xdr_stream *xdr = &resp->xdr;
+	struct file *file;
 	int starting_len = xdr->buf->len;
-	u32 segments = 0;
-	__be32 *p;
+	int last_segment = xdr->buf->len;
+	int segments = 0;
+	__be32 *p, tmp;
+	bool is_data;
+	loff_t pos;
+	u32 eof;
 
 	if (nfserr)
 		return nfserr;
+	file = read->rd_nf->nf_file;
 
 	/* eof flag, segment count */
 	p = xdr_reserve_space(xdr, 4 + 4);
 	if (!p)
-		return nfserr_io;
+		return nfserr_resource;
 	xdr_commit_encode(xdr);
 
-	read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
-	if (read->rd_eof)
+	maxcount = svc_max_payload(resp->rqstp);
+	maxcount = min_t(unsigned long, maxcount,
+			 (xdr->buf->buflen - xdr->buf->len));
+	maxcount = min_t(unsigned long, maxcount, read->rd_length);
+	count    = maxcount;
+
+	eof = read->rd_offset >= i_size_read(file_inode(file));
+	if (eof)
 		goto out;
 
-	nfserr = nfsd4_encode_read_plus_data(resp, read);
-	if (nfserr) {
-		xdr_truncate_encode(xdr, starting_len);
-		return nfserr;
+	pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
+	is_data = pos > read->rd_offset;
+
+	while (count > 0 && !eof) {
+		maxcount = count;
+		if (is_data)
+			nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
+						segments == 0 ? &pos : NULL);
+		else
+			nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
+		if (nfserr)
+			goto out;
+		count -= maxcount;
+		read->rd_offset += maxcount;
+		is_data = !is_data;
+		last_segment = xdr->buf->len;
+		segments++;
 	}
 
-	segments++;
-
 out:
-	p = xdr_encode_bool(p, read->rd_eof);
-	*p = cpu_to_be32(segments);
+	if (nfserr && segments == 0)
+		xdr_truncate_encode(xdr, starting_len);
+	else {
+		if (nfserr) {
+			xdr_truncate_encode(xdr, last_segment);
+			nfserr = nfs_ok;
+			eof = 0;
+		}
+		tmp = htonl(eof);
+		write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
+		tmp = htonl(segments);
+		write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+	}
+
 	return nfserr;
 }
 
 static __be32
 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
-			 union nfsd4_op_u *u)
+			 struct nfsd4_copy_notify *cn)
 {
-	struct nfsd4_copy_notify *cn = &u->copy_notify;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	if (nfserr)
@@ -5001,18 +4792,16 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	*p++ = cpu_to_be32(1);
 
-	nfserr = nfsd42_encode_nl4_server(resp, cn->cpn_src);
-	return nfserr;
+	return nfsd42_encode_nl4_server(resp, &cn->cpn_src);
 }
 
 static __be32
 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
-		  union nfsd4_op_u *u)
+		  struct nfsd4_seek *seek)
 {
-	struct nfsd4_seek *seek = &u->seek;
 	__be32 *p;
 
-	p = xdr_reserve_space(resp->xdr, 4 + 8);
+	p = xdr_reserve_space(&resp->xdr, 4 + 8);
 	*p++ = cpu_to_be32(seek->seek_eof);
 	p = xdr_encode_hyper(p, seek->seek_pos);
 
@@ -5020,8 +4809,7 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
 }
 
 static __be32
-nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr,
-		  union nfsd4_op_u *p)
+nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
 {
 	return nfserr;
 }
@@ -5072,10 +4860,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen)
 
 static __be32
 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
-		      union nfsd4_op_u *u)
+		      struct nfsd4_getxattr *getxattr)
 {
-	struct nfsd4_getxattr *getxattr = &u->getxattr;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p, err;
 
 	p = xdr_reserve_space(xdr, 4);
@@ -5097,10 +4884,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 static __be32
 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
-		      union nfsd4_op_u *u)
+		      struct nfsd4_setxattr *setxattr)
 {
-	struct nfsd4_setxattr *setxattr = &u->setxattr;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 20);
@@ -5139,10 +4925,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,
 
 static __be32
 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
-			union nfsd4_op_u *u)
+			struct nfsd4_listxattrs *listxattrs)
 {
-	struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	u32 cookie_offset, count_offset, eof;
 	u32 left, xdrleft, slen, count;
 	u32 xdrlen, offset;
@@ -5251,10 +5036,9 @@ out:
 
 static __be32
 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
-			 union nfsd4_op_u *u)
+			 struct nfsd4_removexattr *removexattr)
 {
-	struct nfsd4_removexattr *removexattr = &u->removexattr;
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 20);
@@ -5265,7 +5049,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
 	return 0;
 }
 
-typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);
+typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
 
 /*
  * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1
@@ -5273,93 +5057,93 @@ typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u
  * done in the decoding phase.
  */
 static const nfsd4_enc nfsd4_enc_ops[] = {
-	[OP_ACCESS]		= nfsd4_encode_access,
-	[OP_CLOSE]		= nfsd4_encode_close,
-	[OP_COMMIT]		= nfsd4_encode_commit,
-	[OP_CREATE]		= nfsd4_encode_create,
-	[OP_DELEGPURGE]		= nfsd4_encode_noop,
-	[OP_DELEGRETURN]	= nfsd4_encode_noop,
-	[OP_GETATTR]		= nfsd4_encode_getattr,
-	[OP_GETFH]		= nfsd4_encode_getfh,
-	[OP_LINK]		= nfsd4_encode_link,
-	[OP_LOCK]		= nfsd4_encode_lock,
-	[OP_LOCKT]		= nfsd4_encode_lockt,
-	[OP_LOCKU]		= nfsd4_encode_locku,
-	[OP_LOOKUP]		= nfsd4_encode_noop,
-	[OP_LOOKUPP]		= nfsd4_encode_noop,
-	[OP_NVERIFY]		= nfsd4_encode_noop,
-	[OP_OPEN]		= nfsd4_encode_open,
-	[OP_OPENATTR]		= nfsd4_encode_noop,
-	[OP_OPEN_CONFIRM]	= nfsd4_encode_open_confirm,
-	[OP_OPEN_DOWNGRADE]	= nfsd4_encode_open_downgrade,
-	[OP_PUTFH]		= nfsd4_encode_noop,
-	[OP_PUTPUBFH]		= nfsd4_encode_noop,
-	[OP_PUTROOTFH]		= nfsd4_encode_noop,
-	[OP_READ]		= nfsd4_encode_read,
-	[OP_READDIR]		= nfsd4_encode_readdir,
-	[OP_READLINK]		= nfsd4_encode_readlink,
-	[OP_REMOVE]		= nfsd4_encode_remove,
-	[OP_RENAME]		= nfsd4_encode_rename,
-	[OP_RENEW]		= nfsd4_encode_noop,
-	[OP_RESTOREFH]		= nfsd4_encode_noop,
-	[OP_SAVEFH]		= nfsd4_encode_noop,
-	[OP_SECINFO]		= nfsd4_encode_secinfo,
-	[OP_SETATTR]		= nfsd4_encode_setattr,
-	[OP_SETCLIENTID]	= nfsd4_encode_setclientid,
-	[OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop,
-	[OP_VERIFY]		= nfsd4_encode_noop,
-	[OP_WRITE]		= nfsd4_encode_write,
-	[OP_RELEASE_LOCKOWNER]	= nfsd4_encode_noop,
+	[OP_ACCESS]		= (nfsd4_enc)nfsd4_encode_access,
+	[OP_CLOSE]		= (nfsd4_enc)nfsd4_encode_close,
+	[OP_COMMIT]		= (nfsd4_enc)nfsd4_encode_commit,
+	[OP_CREATE]		= (nfsd4_enc)nfsd4_encode_create,
+	[OP_DELEGPURGE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_DELEGRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_GETATTR]		= (nfsd4_enc)nfsd4_encode_getattr,
+	[OP_GETFH]		= (nfsd4_enc)nfsd4_encode_getfh,
+	[OP_LINK]		= (nfsd4_enc)nfsd4_encode_link,
+	[OP_LOCK]		= (nfsd4_enc)nfsd4_encode_lock,
+	[OP_LOCKT]		= (nfsd4_enc)nfsd4_encode_lockt,
+	[OP_LOCKU]		= (nfsd4_enc)nfsd4_encode_locku,
+	[OP_LOOKUP]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LOOKUPP]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_NVERIFY]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OPEN]		= (nfsd4_enc)nfsd4_encode_open,
+	[OP_OPENATTR]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OPEN_CONFIRM]	= (nfsd4_enc)nfsd4_encode_open_confirm,
+	[OP_OPEN_DOWNGRADE]	= (nfsd4_enc)nfsd4_encode_open_downgrade,
+	[OP_PUTFH]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_PUTPUBFH]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_PUTROOTFH]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_READ]		= (nfsd4_enc)nfsd4_encode_read,
+	[OP_READDIR]		= (nfsd4_enc)nfsd4_encode_readdir,
+	[OP_READLINK]		= (nfsd4_enc)nfsd4_encode_readlink,
+	[OP_REMOVE]		= (nfsd4_enc)nfsd4_encode_remove,
+	[OP_RENAME]		= (nfsd4_enc)nfsd4_encode_rename,
+	[OP_RENEW]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_RESTOREFH]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_SAVEFH]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_SECINFO]		= (nfsd4_enc)nfsd4_encode_secinfo,
+	[OP_SETATTR]		= (nfsd4_enc)nfsd4_encode_setattr,
+	[OP_SETCLIENTID]	= (nfsd4_enc)nfsd4_encode_setclientid,
+	[OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
+	[OP_VERIFY]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_WRITE]		= (nfsd4_enc)nfsd4_encode_write,
+	[OP_RELEASE_LOCKOWNER]	= (nfsd4_enc)nfsd4_encode_noop,
 
 	/* NFSv4.1 operations */
-	[OP_BACKCHANNEL_CTL]	= nfsd4_encode_noop,
-	[OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session,
-	[OP_EXCHANGE_ID]	= nfsd4_encode_exchange_id,
-	[OP_CREATE_SESSION]	= nfsd4_encode_create_session,
-	[OP_DESTROY_SESSION]	= nfsd4_encode_noop,
-	[OP_FREE_STATEID]	= nfsd4_encode_noop,
-	[OP_GET_DIR_DELEGATION]	= nfsd4_encode_noop,
+	[OP_BACKCHANNEL_CTL]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
+	[OP_EXCHANGE_ID]	= (nfsd4_enc)nfsd4_encode_exchange_id,
+	[OP_CREATE_SESSION]	= (nfsd4_enc)nfsd4_encode_create_session,
+	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
 #ifdef CONFIG_NFSD_PNFS
-	[OP_GETDEVICEINFO]	= nfsd4_encode_getdeviceinfo,
-	[OP_GETDEVICELIST]	= nfsd4_encode_noop,
-	[OP_LAYOUTCOMMIT]	= nfsd4_encode_layoutcommit,
-	[OP_LAYOUTGET]		= nfsd4_encode_layoutget,
-	[OP_LAYOUTRETURN]	= nfsd4_encode_layoutreturn,
+	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_getdeviceinfo,
+	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_layoutcommit,
+	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_layoutget,
+	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_layoutreturn,
 #else
-	[OP_GETDEVICEINFO]	= nfsd4_encode_noop,
-	[OP_GETDEVICELIST]	= nfsd4_encode_noop,
-	[OP_LAYOUTCOMMIT]	= nfsd4_encode_noop,
-	[OP_LAYOUTGET]		= nfsd4_encode_noop,
-	[OP_LAYOUTRETURN]	= nfsd4_encode_noop,
+	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
 #endif
-	[OP_SECINFO_NO_NAME]	= nfsd4_encode_secinfo_no_name,
-	[OP_SEQUENCE]		= nfsd4_encode_sequence,
-	[OP_SET_SSV]		= nfsd4_encode_noop,
-	[OP_TEST_STATEID]	= nfsd4_encode_test_stateid,
-	[OP_WANT_DELEGATION]	= nfsd4_encode_noop,
-	[OP_DESTROY_CLIENTID]	= nfsd4_encode_noop,
-	[OP_RECLAIM_COMPLETE]	= nfsd4_encode_noop,
+	[OP_SECINFO_NO_NAME]	= (nfsd4_enc)nfsd4_encode_secinfo_no_name,
+	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
+	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_TEST_STATEID]	= (nfsd4_enc)nfsd4_encode_test_stateid,
+	[OP_WANT_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_DESTROY_CLIENTID]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_RECLAIM_COMPLETE]	= (nfsd4_enc)nfsd4_encode_noop,
 
 	/* NFSv4.2 operations */
-	[OP_ALLOCATE]		= nfsd4_encode_noop,
-	[OP_COPY]		= nfsd4_encode_copy,
-	[OP_COPY_NOTIFY]	= nfsd4_encode_copy_notify,
-	[OP_DEALLOCATE]		= nfsd4_encode_noop,
-	[OP_IO_ADVISE]		= nfsd4_encode_noop,
-	[OP_LAYOUTERROR]	= nfsd4_encode_noop,
-	[OP_LAYOUTSTATS]	= nfsd4_encode_noop,
-	[OP_OFFLOAD_CANCEL]	= nfsd4_encode_noop,
-	[OP_OFFLOAD_STATUS]	= nfsd4_encode_offload_status,
-	[OP_READ_PLUS]		= nfsd4_encode_read_plus,
-	[OP_SEEK]		= nfsd4_encode_seek,
-	[OP_WRITE_SAME]		= nfsd4_encode_noop,
-	[OP_CLONE]		= nfsd4_encode_noop,
+	[OP_ALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_copy,
+	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_copy_notify,
+	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTSTATS]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_offload_status,
+	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_read_plus,
+	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_seek,
+	[OP_WRITE_SAME]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_CLONE]		= (nfsd4_enc)nfsd4_encode_noop,
 
 	/* RFC 8276 extended atributes operations */
-	[OP_GETXATTR]		= nfsd4_encode_getxattr,
-	[OP_SETXATTR]		= nfsd4_encode_setxattr,
-	[OP_LISTXATTRS]		= nfsd4_encode_listxattrs,
-	[OP_REMOVEXATTR]	= nfsd4_encode_removexattr,
+	[OP_GETXATTR]		= (nfsd4_enc)nfsd4_encode_getxattr,
+	[OP_SETXATTR]		= (nfsd4_enc)nfsd4_encode_setxattr,
+	[OP_LISTXATTRS]		= (nfsd4_enc)nfsd4_encode_listxattrs,
+	[OP_REMOVEXATTR]	= (nfsd4_enc)nfsd4_encode_removexattr,
 };
 
 /*
@@ -5394,7 +5178,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize)
 void
 nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
-	struct xdr_stream *xdr = resp->xdr;
+	struct xdr_stream *xdr = &resp->xdr;
 	struct nfs4_stateowner *so = resp->cstate.replay_owner;
 	struct svc_rqst *rqstp = resp->rqstp;
 	const struct nfsd4_operation *opdesc = op->opdesc;
@@ -5403,8 +5187,10 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 8);
-	if (!p)
-		goto release;
+	if (!p) {
+		WARN_ON_ONCE(1);
+		return;
+	}
 	*p++ = cpu_to_be32(op->opnum);
 	post_err_offset = xdr->buf->len;
 
@@ -5413,12 +5199,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	if (op->status && opdesc &&
 			!(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE))
 		goto status;
-	BUG_ON(op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
+	BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
 	       !nfsd4_enc_ops[op->opnum]);
 	encoder = nfsd4_enc_ops[op->opnum];
 	op->status = encoder(resp, op->status, &op->u);
-	if (op->status)
-		trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
+	if (opdesc && opdesc->op_release)
+		opdesc->op_release(&op->u);
 	xdr_commit_encode(xdr);
 
 	/* nfsd4_check_resp_size guarantees enough room for error status */
@@ -5458,10 +5244,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 						so->so_replay.rp_buf, len);
 	}
 status:
-	*p = op->status;
-release:
-	if (opdesc && opdesc->op_release)
-		opdesc->op_release(&op->u);
+	/* Note that op->status is already in network byte order: */
+	write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4);
 }
 
 /* 
@@ -5487,14 +5271,22 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
 	p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
 }
 
+int
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
+{
+        return xdr_ressize_check(rqstp, p);
+}
+
 void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 {
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 
 	if (args->ops != args->iops) {
-		vfree(args->ops);
+		kfree(args->ops);
 		args->ops = args->iops;
 	}
+	kfree(args->tmpp);
+	args->tmpp = NULL;
 	while (args->to_free) {
 		struct svcxdr_tmpbuf *tb = args->to_free;
 		args->to_free = tb->next;
@@ -5502,44 +5294,57 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 	}
 }
 
-bool
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs4svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
+{
+	return 1;
+}
+
+int
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 
-	/* svcxdr_tmp_alloc */
+	if (rqstp->rq_arg.head[0].iov_len % 4) {
+		/* client is nuts */
+		dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
+			__func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid));
+		return 0;
+	}
+	args->p = p;
+	args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
+	args->pagelist = rqstp->rq_arg.pages;
+	args->pagelen = rqstp->rq_arg.page_len;
+	args->tail = false;
+	args->tmpp = NULL;
 	args->to_free = NULL;
-
-	args->xdr = xdr;
 	args->ops = args->iops;
 	args->rqstp = rqstp;
 
-	return nfsd4_decode_compound(args);
+	return !nfsd4_decode_compound(args);
 }
 
-bool
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
-	__be32 *p;
+	struct xdr_buf *buf = resp->xdr.buf;
 
-	/*
-	 * Send buffer space for the following items is reserved
-	 * at the top of nfsd4_proc_compound().
-	 */
-	p = resp->statusp;
+	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
+				 buf->tail[0].iov_len);
 
-	*p++ = resp->cstate.status;
+	*p = resp->cstate.status;
 
-	rqstp->rq_next_page = xdr->page_ptr + 1;
+	rqstp->rq_next_page = resp->xdr.page_ptr + 1;
 
+	p = resp->tagp;
 	*p++ = htonl(resp->taglen);
 	memcpy(p, resp->tag, resp->taglen);
 	p += XDR_QUADLEN(resp->taglen);
 	*p++ = htonl(resp->opcnt);
 
 	nfsd4_sequence_done(resp);
-	return true;
+	return 1;
 }
 
 /*
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 2b5417e06d80..80c90fc231a5 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -84,6 +84,12 @@ nfsd_hashsize(unsigned int limit)
 	return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
 }
 
+static u32
+nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
+{
+	return hash_32(be32_to_cpu(xid), nn->maskbits);
+}
+
 static struct svc_cacherep *
 nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
 			struct nfsd_net *nn)
@@ -115,14 +121,14 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
 				struct nfsd_net *nn)
 {
 	if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
-		nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
+		nn->drc_mem_usage -= rp->c_replvec.iov_len;
 		kfree(rp->c_replvec.iov_base);
 	}
 	if (rp->c_state != RC_UNUSED) {
 		rb_erase(&rp->c_node, &b->rb_head);
 		list_del(&rp->c_lru);
 		atomic_dec(&nn->num_drc_entries);
-		nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp));
+		nn->drc_mem_usage -= sizeof(*rp);
 	}
 	kmem_cache_free(drc_slab, rp);
 }
@@ -148,16 +154,6 @@ void nfsd_drc_slab_free(void)
 	kmem_cache_destroy(drc_slab);
 }
 
-static int nfsd_reply_cache_stats_init(struct nfsd_net *nn)
-{
-	return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
-}
-
-static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn)
-{
-	nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
-}
-
 int nfsd_reply_cache_init(struct nfsd_net *nn)
 {
 	unsigned int hashsize;
@@ -169,16 +165,12 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
 	hashsize = nfsd_hashsize(nn->max_drc_entries);
 	nn->maskbits = ilog2(hashsize);
 
-	status = nfsd_reply_cache_stats_init(nn);
-	if (status)
-		goto out_nomem;
-
 	nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
 	nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
 	nn->nfsd_reply_cache_shrinker.seeks = 1;
 	status = register_shrinker(&nn->nfsd_reply_cache_shrinker);
 	if (status)
-		goto out_stats_destroy;
+		goto out_nomem;
 
 	nn->drc_hashtbl = kvzalloc(array_size(hashsize,
 				sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
@@ -194,8 +186,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
 	return 0;
 out_shrinker:
 	unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
-out_stats_destroy:
-	nfsd_reply_cache_stats_destroy(nn);
 out_nomem:
 	printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
 	return -ENOMEM;
@@ -216,7 +206,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
 									rp, nn);
 		}
 	}
-	nfsd_reply_cache_stats_destroy(nn);
 
 	kvfree(nn->drc_hashtbl);
 	nn->drc_hashtbl = NULL;
@@ -235,16 +224,8 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
 	list_move_tail(&rp->c_lru, &b->lru_head);
 }
 
-static noinline struct nfsd_drc_bucket *
-nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
-{
-	unsigned int hash = hash_32((__force u32)xid, nn->maskbits);
-
-	return &nn->drc_hashtbl[hash];
-}
-
-static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
-			 unsigned int max)
+static long
+prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
 {
 	struct svc_cacherep *rp, *tmp;
 	long freed = 0;
@@ -260,17 +241,11 @@ static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
 		    time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
 			break;
 		nfsd_reply_cache_free_locked(b, rp, nn);
-		if (max && freed++ > max)
-			break;
+		freed++;
 	}
 	return freed;
 }
 
-static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
-{
-	return prune_bucket(b, nn, 3);
-}
-
 /*
  * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
  * Also prune the oldest ones when the total exceeds the max number of entries.
@@ -287,7 +262,7 @@ prune_cache_entries(struct nfsd_net *nn)
 		if (list_empty(&b->lru_head))
 			continue;
 		spin_lock(&b->cache_lock);
-		freed += prune_bucket(b, nn, 0);
+		freed += prune_bucket(b, nn);
 		spin_unlock(&b->cache_lock);
 	}
 	return freed;
@@ -349,7 +324,7 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
 {
 	if (key->c_key.k_xid == rp->c_key.k_xid &&
 	    key->c_key.k_csum != rp->c_key.k_csum) {
-		nfsd_stats_payload_misses_inc(nn);
+		++nn->payload_misses;
 		trace_nfsd_drc_mismatch(nn, key, rp);
 	}
 
@@ -421,16 +396,18 @@ out:
  */
 int nfsd_cache_lookup(struct svc_rqst *rqstp)
 {
-	struct nfsd_net		*nn;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	struct svc_cacherep	*rp, *found;
+	__be32			xid = rqstp->rq_xid;
 	__wsum			csum;
-	struct nfsd_drc_bucket	*b;
+	u32 hash = nfsd_cache_hash(xid, nn);
+	struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
 	int type = rqstp->rq_cachetype;
 	int rtn = RC_DOIT;
 
 	rqstp->rq_cacherep = NULL;
 	if (type == RC_NOCACHE) {
-		nfsd_stats_rc_nocache_inc();
+		nfsdstats.rcnocache++;
 		goto out;
 	}
 
@@ -440,25 +417,27 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
 	 * Since the common case is a cache miss followed by an insert,
 	 * preallocate an entry.
 	 */
-	nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
 	if (!rp)
 		goto out;
 
-	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
 	spin_lock(&b->cache_lock);
 	found = nfsd_cache_insert(b, rp, nn);
-	if (found != rp)
+	if (found != rp) {
+		nfsd_reply_cache_free_locked(NULL, rp, nn);
+		rp = found;
 		goto found_entry;
+	}
 
-	nfsd_stats_rc_misses_inc();
+	nfsdstats.rcmisses++;
 	rqstp->rq_cacherep = rp;
 	rp->c_state = RC_INPROG;
 
 	atomic_inc(&nn->num_drc_entries);
-	nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
+	nn->drc_mem_usage += sizeof(*rp);
 
-	nfsd_prune_bucket(b, nn);
+	/* go ahead and prune the cache */
+	prune_bucket(b, nn);
 
 out_unlock:
 	spin_unlock(&b->cache_lock);
@@ -467,10 +446,8 @@ out:
 
 found_entry:
 	/* We found a matching entry which is either in progress or done. */
-	nfsd_reply_cache_free_locked(NULL, rp, nn);
-	nfsd_stats_rc_hits_inc();
+	nfsdstats.rchits++;
 	rtn = RC_DROPIT;
-	rp = found;
 
 	/* Request being processed */
 	if (rp->c_state == RC_INPROG)
@@ -529,6 +506,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	struct svc_cacherep *rp = rqstp->rq_cacherep;
 	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
+	u32		hash;
 	struct nfsd_drc_bucket *b;
 	int		len;
 	size_t		bufsize = 0;
@@ -536,7 +514,8 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 	if (!rp)
 		return;
 
-	b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn);
+	hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
+	b = &nn->drc_hashtbl[hash];
 
 	len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
 	len >>= 2;
@@ -569,7 +548,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 		return;
 	}
 	spin_lock(&b->cache_lock);
-	nfsd_stats_drc_mem_usage_add(nn, bufsize);
+	nn->drc_mem_usage += bufsize;
 	lru_put_end(b, rp);
 	rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
 	rp->c_type = cachetype;
@@ -603,26 +582,28 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
  * scraping this file for info should test the labels to ensure they're
  * getting the correct field.
  */
-int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
+static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
 {
-	struct nfsd_net *nn = net_generic(file_inode(m->file)->i_sb->s_fs_info,
-					  nfsd_net_id);
+	struct nfsd_net *nn = m->private;
 
 	seq_printf(m, "max entries:           %u\n", nn->max_drc_entries);
 	seq_printf(m, "num entries:           %u\n",
-		   atomic_read(&nn->num_drc_entries));
+			atomic_read(&nn->num_drc_entries));
 	seq_printf(m, "hash buckets:          %u\n", 1 << nn->maskbits);
-	seq_printf(m, "mem usage:             %lld\n",
-		   percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE]));
-	seq_printf(m, "cache hits:            %lld\n",
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]));
-	seq_printf(m, "cache misses:          %lld\n",
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]));
-	seq_printf(m, "not cached:            %lld\n",
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]));
-	seq_printf(m, "payload misses:        %lld\n",
-		   percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES]));
+	seq_printf(m, "mem usage:             %u\n", nn->drc_mem_usage);
+	seq_printf(m, "cache hits:            %u\n", nfsdstats.rchits);
+	seq_printf(m, "cache misses:          %u\n", nfsdstats.rcmisses);
+	seq_printf(m, "not cached:            %u\n", nfsdstats.rcnocache);
+	seq_printf(m, "payload misses:        %u\n", nn->payload_misses);
 	seq_printf(m, "longest chain len:     %u\n", nn->longest_chain);
 	seq_printf(m, "cachesize at longest:  %u\n", nn->longest_chain_cachesize);
 	return 0;
 }
+
+int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
+{
+	struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info,
+								nfsd_net_id);
+
+	return single_open(file, nfsd_reply_cache_stats_show, nn);
+}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 682f5226e79a..7c36634598d3 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -25,7 +25,6 @@
 #include "state.h"
 #include "netns.h"
 #include "pnfs.h"
-#include "filecache.h"
 
 /*
  *	We have a single directory with several nodes in it.
@@ -33,7 +32,6 @@
 enum {
 	NFSD_Root = 1,
 	NFSD_List,
-	NFSD_Export_Stats,
 	NFSD_Export_features,
 	NFSD_Fh,
 	NFSD_FO_UnlockIP,
@@ -46,7 +44,6 @@ enum {
 	NFSD_Ports,
 	NFSD_MaxBlkSize,
 	NFSD_MaxConnections,
-	NFSD_Filecache,
 	NFSD_SupportedEnctypes,
 	/*
 	 * The below MUST come last.  Otherwise we leave a hole in nfsd_files[]
@@ -185,7 +182,17 @@ static int export_features_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-DEFINE_SHOW_ATTRIBUTE(export_features);
+static int export_features_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, export_features_show, NULL);
+}
+
+static const struct file_operations export_features_operations = {
+	.open		= export_features_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
 static int supported_enctypes_show(struct seq_file *m, void *v)
@@ -194,7 +201,17 @@ static int supported_enctypes_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-DEFINE_SHOW_ATTRIBUTE(supported_enctypes);
+static int supported_enctypes_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, supported_enctypes_show, NULL);
+}
+
+static const struct file_operations supported_enctypes_ops = {
+	.open		= supported_enctypes_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
 
 static const struct file_operations pool_stats_operations = {
@@ -204,9 +221,12 @@ static const struct file_operations pool_stats_operations = {
 	.release	= nfsd_pool_stats_release,
 };
 
-DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats);
-
-DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats);
+static const struct file_operations reply_cache_stats_operations = {
+	.open		= nfsd_reply_cache_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 /*----------------------------------------------------------------------------*/
 /*
@@ -374,12 +394,12 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
 	auth_domain_put(dom);
 	if (len)
 		return len;
-
+	
 	mesg = buf;
 	len = SIMPLE_TRANSACTION_LIMIT;
-	qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size);
+	qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size);
 	mesg[-1] = '\n';
-	return mesg - buf;
+	return mesg - buf;	
 }
 
 /*
@@ -581,9 +601,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 
 			cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
 			switch(num) {
-#ifdef CONFIG_NFSD_V2
 			case 2:
-#endif
 			case 3:
 				nfsd_vers(nn, num, cmd);
 				break;
@@ -603,9 +621,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 				}
 				break;
 			default:
-				/* Ignore requests to disable non-existent versions */
-				if (cmd == NFSD_SET)
-					return -EINVAL;
+				return -EINVAL;
 			}
 			vers += len + 1;
 		} while ((len = qword_get(&mesg, vers, size)) > 0);
@@ -616,6 +632,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 	}
 
 	/* Now write current state into reply buffer */
+	len = 0;
 	sep = "";
 	remaining = SIMPLE_TRANSACTION_LIMIT;
 	for (num=2 ; num <= 4 ; num++) {
@@ -709,25 +726,28 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
 	char *mesg = buf;
 	int fd, err;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct svc_serv *serv;
 
 	err = get_int(&mesg, &fd);
 	if (err != 0 || fd < 0)
 		return -EINVAL;
 
+	if (svc_alien_sock(net, fd)) {
+		printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
+		return -EINVAL;
+	}
+
 	err = nfsd_create_serv(net);
 	if (err != 0)
 		return err;
 
-	serv = nn->nfsd_serv;
-	err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+	if (err < 0) {
+		nfsd_destroy(net);
+		return err;
+	}
 
-	if (err < 0 && !serv->sv_nrthreads && !nn->keep_active)
-		nfsd_last_thread(net);
-	else if (err >= 0 && !serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
-		svc_get(serv);
-
-	svc_put(serv);
+	/* Decrease the count, but don't shut down the service */
+	nn->nfsd_serv->sv_nrthreads--;
 	return err;
 }
 
@@ -741,7 +761,6 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
 	struct svc_xprt *xprt;
 	int port, err;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct svc_serv *serv;
 
 	if (sscanf(buf, "%15s %5u", transport, &port) != 2)
 		return -EINVAL;
@@ -753,33 +772,30 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
 	if (err != 0)
 		return err;
 
-	serv = nn->nfsd_serv;
-	err = svc_xprt_create(serv, transport, net,
-			      PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
+	err = svc_create_xprt(nn->nfsd_serv, transport, net,
+				PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
 	if (err < 0)
 		goto out_err;
 
-	err = svc_xprt_create(serv, transport, net,
-			      PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
+	err = svc_create_xprt(nn->nfsd_serv, transport, net,
+				PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
 	if (err < 0 && err != -EAFNOSUPPORT)
 		goto out_close;
 
-	if (!serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
-		svc_get(serv);
-
-	svc_put(serv);
+	/* Decrease the count, but don't shut down the service */
+	nn->nfsd_serv->sv_nrthreads--;
 	return 0;
 out_close:
-	xprt = svc_find_xprt(serv, transport, net, PF_INET, port);
+	xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
 	if (xprt != NULL) {
-		svc_xprt_close(xprt);
+		svc_close_xprt(xprt);
 		svc_xprt_put(xprt);
 	}
 out_err:
-	if (!serv->sv_nrthreads && !nn->keep_active)
-		nfsd_last_thread(net);
-
-	svc_put(serv);
+	if (!list_empty(&nn->nfsd_serv->sv_permsocks))
+		nn->nfsd_serv->sv_nrthreads--;
+	 else
+		nfsd_destroy(net);
 	return err;
 }
 
@@ -1152,7 +1168,6 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
 		inode->i_fop = &simple_dir_operations;
 		inode->i_op = &simple_dir_inode_operations;
 		inc_nlink(inode);
-		break;
 	default:
 		break;
 	}
@@ -1254,8 +1269,7 @@ static void nfsdfs_remove_files(struct dentry *root)
 /* XXX: cut'n'paste from simple_fill_super; figure out if we could share
  * code instead. */
 static  int nfsdfs_create_files(struct dentry *root,
-				const struct tree_descr *files,
-				struct dentry **fdentries)
+					const struct tree_descr *files)
 {
 	struct inode *dir = d_inode(root);
 	struct inode *inode;
@@ -1264,6 +1278,8 @@ static  int nfsdfs_create_files(struct dentry *root,
 
 	inode_lock(dir);
 	for (i = 0; files->name && files->name[0]; i++, files++) {
+		if (!files->name)
+			continue;
 		dentry = d_alloc_name(root, files->name);
 		if (!dentry)
 			goto out;
@@ -1277,8 +1293,6 @@ static  int nfsdfs_create_files(struct dentry *root,
 		inode->i_private = __get_nfsdfs_client(dir);
 		d_add(dentry, inode);
 		fsnotify_create(dir, dentry);
-		if (fdentries)
-			fdentries[i] = dentry;
 	}
 	inode_unlock(dir);
 	return 0;
@@ -1290,9 +1304,8 @@ out:
 
 /* on success, returns positive number unique to that client. */
 struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
-				 struct nfsdfs_client *ncl, u32 id,
-				 const struct tree_descr *files,
-				 struct dentry **fdentries)
+		struct nfsdfs_client *ncl, u32 id,
+		const struct tree_descr *files)
 {
 	struct dentry *dentry;
 	char name[11];
@@ -1303,7 +1316,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
 	dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
 	if (IS_ERR(dentry)) /* XXX: tossing errors? */
 		return NULL;
-	ret = nfsdfs_create_files(dentry, files, fdentries);
+	ret = nfsdfs_create_files(dentry, files);
 	if (ret) {
 		nfsd_client_rmdir(dentry);
 		return NULL;
@@ -1339,10 +1352,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
 
 	static const struct tree_descr nfsd_files[] = {
 		[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
-		/* Per-export io stats use same ops as exports file */
-		[NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO},
 		[NFSD_Export_features] = {"export_features",
-					&export_features_fops, S_IRUGO},
+					&export_features_operations, S_IRUGO},
 		[NFSD_FO_UnlockIP] = {"unlock_ip",
 					&transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_FO_UnlockFS] = {"unlock_filesystem",
@@ -1351,16 +1362,13 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
 		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
-		[NFSD_Reply_Cache_Stats] = {"reply_cache_stats",
-					&nfsd_reply_cache_stats_fops, S_IRUGO},
+		[NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO},
 		[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
 		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
 		[NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
-		[NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
 #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
-		[NFSD_SupportedEnctypes] = {"supported_krb5_enctypes",
-					&supported_enctypes_fops, S_IRUGO},
+		[NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
 #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
 #ifdef CONFIG_NFSD_V4
 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
@@ -1460,16 +1468,25 @@ static __net_init int nfsd_init_net(struct net *net)
 		goto out_idmap_error;
 	nn->nfsd_versions = NULL;
 	nn->nfsd4_minorversions = NULL;
-	nfsd4_init_leases_net(nn);
 	retval = nfsd_reply_cache_init(nn);
 	if (retval)
-		goto out_cache_error;
-	get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
-	seqlock_init(&nn->writeverf_lock);
+		goto out_drc_error;
+	nn->nfsd4_lease = 90;	/* default lease time */
+	nn->nfsd4_grace = 90;
+	nn->somebody_reclaimed = false;
+	nn->track_reclaim_completes = false;
+	nn->clverifier_counter = prandom_u32();
+	nn->clientid_base = prandom_u32();
+	nn->clientid_counter = nn->clientid_base + 1;
+	nn->s2s_cp_cl_id = nn->clientid_counter++;
+
+	atomic_set(&nn->ntf_refcnt, 0);
+	init_waitqueue_head(&nn->ntf_wq);
+	seqlock_init(&nn->boot_lock);
 
 	return 0;
 
-out_cache_error:
+out_drc_error:
 	nfsd_idmap_shutdown(net);
 out_idmap_error:
 	nfsd_export_shutdown(net);
@@ -1497,6 +1514,7 @@ static struct pernet_operations nfsd_net_ops = {
 static int __init init_nfsd(void)
 {
 	int retval;
+	printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
 
 	retval = nfsd4_init_slabs();
 	if (retval)
@@ -1504,9 +1522,7 @@ static int __init init_nfsd(void)
 	retval = nfsd4_init_pnfs();
 	if (retval)
 		goto out_free_slabs;
-	retval = nfsd_stat_init();	/* Statistics */
-	if (retval)
-		goto out_free_pnfs;
+	nfsd_stat_init();	/* Statistics */
 	retval = nfsd_drc_slab_create();
 	if (retval)
 		goto out_free_stat;
@@ -1514,25 +1530,20 @@ static int __init init_nfsd(void)
 	retval = create_proc_exports_entry();
 	if (retval)
 		goto out_free_lockd;
+	retval = register_filesystem(&nfsd_fs_type);
+	if (retval)
+		goto out_free_exports;
 	retval = register_pernet_subsys(&nfsd_net_ops);
 	if (retval < 0)
-		goto out_free_exports;
+		goto out_free_filesystem;
 	retval = register_cld_notifier();
-	if (retval)
-		goto out_free_subsys;
-	retval = nfsd4_create_laundry_wq();
-	if (retval)
-		goto out_free_cld;
-	retval = register_filesystem(&nfsd_fs_type);
 	if (retval)
 		goto out_free_all;
 	return 0;
 out_free_all:
-	nfsd4_destroy_laundry_wq();
-out_free_cld:
-	unregister_cld_notifier();
-out_free_subsys:
 	unregister_pernet_subsys(&nfsd_net_ops);
+out_free_filesystem:
+	unregister_filesystem(&nfsd_fs_type);
 out_free_exports:
 	remove_proc_entry("fs/nfs/exports", NULL);
 	remove_proc_entry("fs/nfs", NULL);
@@ -1541,7 +1552,6 @@ out_free_lockd:
 	nfsd_drc_slab_free();
 out_free_stat:
 	nfsd_stat_shutdown();
-out_free_pnfs:
 	nfsd4_exit_pnfs();
 out_free_slabs:
 	nfsd4_free_slabs();
@@ -1550,8 +1560,6 @@ out_free_slabs:
 
 static void __exit exit_nfsd(void)
 {
-	unregister_filesystem(&nfsd_fs_type);
-	nfsd4_destroy_laundry_wq();
 	unregister_cld_notifier();
 	unregister_pernet_subsys(&nfsd_net_ops);
 	nfsd_drc_slab_free();
@@ -1561,6 +1569,7 @@ static void __exit exit_nfsd(void)
 	nfsd_lockd_shutdown();
 	nfsd4_free_slabs();
 	nfsd4_exit_pnfs();
+	unregister_filesystem(&nfsd_fs_type);
 }
 
 MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 013bfa24ced2..4362d295ed34 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -24,8 +24,8 @@
 #include <uapi/linux/nfsd/debug.h>
 
 #include "netns.h"
-#include "export.h"
 #include "stats.h"
+#include "export.h"
 
 #undef ifdebug
 #ifdef CONFIG_SUNRPC_DEBUG
@@ -64,7 +64,8 @@ struct readdir_cd {
 
 
 extern struct svc_program	nfsd_program;
-extern const struct svc_version	nfsd_version2, nfsd_version3, nfsd_version4;
+extern const struct svc_version	nfsd_version2, nfsd_version3,
+				nfsd_version4;
 extern struct mutex		nfsd_mutex;
 extern spinlock_t		nfsd_drc_lock;
 extern unsigned long		nfsd_drc_max_mem;
@@ -72,16 +73,6 @@ extern unsigned long		nfsd_drc_mem_used;
 
 extern const struct seq_operations nfs_exports_op;
 
-/*
- * Common void argument and result helpers
- */
-struct nfsd_voidargs { };
-struct nfsd_voidres { };
-bool		nfssvc_decode_voidarg(struct svc_rqst *rqstp,
-				      struct xdr_stream *xdr);
-bool		nfssvc_encode_voidres(struct svc_rqst *rqstp,
-				      struct xdr_stream *xdr);
-
 /*
  * Function prototypes.
  */
@@ -96,6 +87,8 @@ int		nfsd_pool_stats_open(struct inode *, struct file *);
 int		nfsd_pool_stats_release(struct inode *, struct file *);
 void		nfsd_shutdown_threads(struct net *net);
 
+void		nfsd_destroy(struct net *net);
+
 bool		i_am_nfsd(void);
 
 struct nfsdfs_client {
@@ -105,9 +98,7 @@ struct nfsdfs_client {
 
 struct nfsdfs_client *get_nfsdfs_client(struct inode *);
 struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
-				 struct nfsdfs_client *ncl, u32 id,
-				 const struct tree_descr *,
-				 struct dentry **fdentries);
+		struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
 void nfsd_client_rmdir(struct dentry *dentry);
 
 
@@ -131,7 +122,6 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change);
 int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change);
 void nfsd_reset_versions(struct nfsd_net *nn);
 int nfsd_create_serv(struct net *net);
-void nfsd_last_thread(struct net *net);
 
 extern int nfsd_max_blksize;
 
@@ -160,9 +150,6 @@ void nfs4_state_shutdown_net(struct net *net);
 int nfs4_reset_recoverydir(char *recdir);
 char * nfs4_recoverydir(void);
 bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
-int nfsd4_create_laundry_wq(void);
-void nfsd4_destroy_laundry_wq(void);
-bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode);
 #else
 static inline int nfsd4_init_slabs(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
@@ -176,13 +163,6 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
 {
 	return false;
 }
-static inline int nfsd4_create_laundry_wq(void) { return 0; };
-static inline void nfsd4_destroy_laundry_wq(void) {};
-static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp,
-					      struct inode *inode)
-{
-	return false;
-}
 #endif
 
 /*
@@ -344,10 +324,6 @@ void		nfsd_lockd_shutdown(void);
 #define COMPOUND_ERR_SLACK_SPACE	16     /* OP_SETATTR */
 
 #define NFSD_LAUNDROMAT_MINTIMEOUT      1   /* seconds */
-#define	NFSD_COURTESY_CLIENT_TIMEOUT	(24 * 60 * 60)	/* seconds */
-#define	NFSD_CLIENT_MAX_TRIM_PER_RUN	128
-#define	NFS4_CLIENTS_PER_GB		1024
-#define NFSD_DELEGRETURN_TIMEOUT	(HZ / 34)	/* 30ms */
 
 /*
  * The following attributes are currently not supported by the NFSv4 server:
@@ -376,7 +352,7 @@ void		nfsd_lockd_shutdown(void);
  | FATTR4_WORD1_OWNER	        | FATTR4_WORD1_OWNER_GROUP  | FATTR4_WORD1_RAWDEV           \
  | FATTR4_WORD1_SPACE_AVAIL     | FATTR4_WORD1_SPACE_FREE   | FATTR4_WORD1_SPACE_TOTAL      \
  | FATTR4_WORD1_SPACE_USED      | FATTR4_WORD1_TIME_ACCESS  | FATTR4_WORD1_TIME_ACCESS_SET  \
- | FATTR4_WORD1_TIME_DELTA      | FATTR4_WORD1_TIME_METADATA   | FATTR4_WORD1_TIME_CREATE      \
+ | FATTR4_WORD1_TIME_DELTA   | FATTR4_WORD1_TIME_METADATA    \
  | FATTR4_WORD1_TIME_MODIFY     | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
 
 #define NFSD4_SUPPORTED_ATTRS_WORD2 0
@@ -410,6 +386,7 @@ void		nfsd_lockd_shutdown(void);
 
 #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
 	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+	FATTR4_WORD2_CHANGE_ATTR_TYPE | \
 	FATTR4_WORD2_MODE_UMASK | \
 	NFSD4_2_SECURITY_ATTRS | \
 	FATTR4_WORD2_XATTR_SUPPORT)
@@ -472,8 +449,7 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
 	(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
 #define NFSD_WRITEABLE_ATTRS_WORD1 \
 	(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
-	| FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \
-	| FATTR4_WORD1_TIME_MODIFY_SET)
+	| FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #define MAYBE_FATTR4_WORD2_SECURITY_LABEL \
 	FATTR4_WORD2_SECURITY_LABEL
@@ -499,20 +475,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
 extern int nfsd4_is_junction(struct dentry *dentry);
 extern int register_cld_notifier(void);
 extern void unregister_cld_notifier(void);
-#ifdef CONFIG_NFSD_V4_2_INTER_SSC
-extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
-#endif
-
-extern void nfsd4_init_leases_net(struct nfsd_net *nn);
-
 #else /* CONFIG_NFSD_V4 */
 static inline int nfsd4_is_junction(struct dentry *dentry)
 {
 	return 0;
 }
 
-static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
-
 #define register_cld_notifier() 0
 #define unregister_cld_notifier() do { } while(0)
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index db8d62632a5b..c81dbbad8792 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -153,12 +153,11 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
 static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 {
 	struct knfsd_fh	*fh = &fhp->fh_handle;
-	struct fid *fid = NULL;
+	struct fid *fid = NULL, sfid;
 	struct svc_export *exp;
 	struct dentry *dentry;
 	int fileid_type;
 	int data_left = fh->fh_size/4;
-	int len;
 	__be32 error;
 
 	error = nfserr_stale;
@@ -167,35 +166,48 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 	if (rqstp->rq_vers == 4 && fh->fh_size == 0)
 		return nfserr_nofilehandle;
 
-	if (fh->fh_version != 1)
-		return error;
+	if (fh->fh_version == 1) {
+		int len;
 
-	if (--data_left < 0)
-		return error;
-	if (fh->fh_auth_type != 0)
-		return error;
-	len = key_len(fh->fh_fsid_type) / 4;
-	if (len == 0)
-		return error;
-	if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
-		/* deprecated, convert to type 3 */
-		len = key_len(FSID_ENCODE_DEV)/4;
-		fh->fh_fsid_type = FSID_ENCODE_DEV;
-		/*
-		 * struct knfsd_fh uses host-endian fields, which are
-		 * sometimes used to hold net-endian values. This
-		 * confuses sparse, so we must use __force here to
-		 * keep it from complaining.
-		 */
-		fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
-						      ntohl((__force __be32)fh->fh_fsid[1])));
-		fh->fh_fsid[1] = fh->fh_fsid[2];
+		if (--data_left < 0)
+			return error;
+		if (fh->fh_auth_type != 0)
+			return error;
+		len = key_len(fh->fh_fsid_type) / 4;
+		if (len == 0)
+			return error;
+		if  (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+			/* deprecated, convert to type 3 */
+			len = key_len(FSID_ENCODE_DEV)/4;
+			fh->fh_fsid_type = FSID_ENCODE_DEV;
+			/*
+			 * struct knfsd_fh uses host-endian fields, which are
+			 * sometimes used to hold net-endian values. This
+			 * confuses sparse, so we must use __force here to
+			 * keep it from complaining.
+			 */
+			fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
+							ntohl((__force __be32)fh->fh_fsid[1])));
+			fh->fh_fsid[1] = fh->fh_fsid[2];
+		}
+		data_left -= len;
+		if (data_left < 0)
+			return error;
+		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
+		fid = (struct fid *)(fh->fh_fsid + len);
+	} else {
+		__u32 tfh[2];
+		dev_t xdev;
+		ino_t xino;
+
+		if (fh->fh_size != NFS_FHSIZE)
+			return error;
+		/* assume old filehandle format */
+		xdev = old_decode_dev(fh->ofh_xdev);
+		xino = u32_to_ino_t(fh->ofh_xino);
+		mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
+		exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
 	}
-	data_left -= len;
-	if (data_left < 0)
-		return error;
-	exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
-	fid = (struct fid *)(fh->fh_fsid + len);
 
 	error = nfserr_stale;
 	if (IS_ERR(exp)) {
@@ -240,25 +252,28 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 	if (rqstp->rq_vers > 2)
 		error = nfserr_badhandle;
 
-	fileid_type = fh->fh_fileid_type;
+	if (fh->fh_version != 1) {
+		sfid.i32.ino = fh->ofh_ino;
+		sfid.i32.gen = fh->ofh_generation;
+		sfid.i32.parent_ino = fh->ofh_dirino;
+		fid = &sfid;
+		data_left = 3;
+		if (fh->ofh_dirino == 0)
+			fileid_type = FILEID_INO32_GEN;
+		else
+			fileid_type = FILEID_INO32_GEN_PARENT;
+	} else
+		fileid_type = fh->fh_fileid_type;
 
 	if (fileid_type == FILEID_ROOT)
 		dentry = dget(exp->ex_path.dentry);
 	else {
-		dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
-						data_left, fileid_type,
-						nfsd_acceptable, exp);
-		if (IS_ERR_OR_NULL(dentry)) {
+		dentry = exportfs_decode_fh(exp->ex_path.mnt, fid,
+				data_left, fileid_type,
+				nfsd_acceptable, exp);
+		if (IS_ERR_OR_NULL(dentry))
 			trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
 					dentry ?  PTR_ERR(dentry) : -ESTALE);
-			switch (PTR_ERR(dentry)) {
-			case -ENOMEM:
-			case -ETIMEDOUT:
-				break;
-			default:
-				dentry = ERR_PTR(-ESTALE);
-			}
-		}
 	}
 	if (dentry == NULL)
 		goto out;
@@ -276,20 +291,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 
 	fhp->fh_dentry = dentry;
 	fhp->fh_export = exp;
-
-	switch (rqstp->rq_vers) {
-	case 4:
-		if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR)
-			fhp->fh_no_atomic_attr = true;
-		break;
-	case 3:
-		if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC)
-			fhp->fh_no_wcc = true;
-		break;
-	case 2:
-		fhp->fh_no_wcc = true;
-	}
-
 	return 0;
 out:
 	exp_put(exp);
@@ -326,7 +327,7 @@ out:
 __be32
 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
 {
-	struct svc_export *exp = NULL;
+	struct svc_export *exp;
 	struct dentry	*dentry;
 	__be32		error;
 
@@ -399,7 +400,7 @@ skip_pseudoflavor_check:
 	}
 out:
 	if (error == nfserr_stale)
-		nfsd_stats_fh_stale_inc(exp);
+		nfsdstats.fh_stale++;
 	return error;
 }
 
@@ -428,6 +429,20 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
 	}
 }
 
+/*
+ * for composing old style file handles
+ */
+static inline void _fh_update_old(struct dentry *dentry,
+				  struct svc_export *exp,
+				  struct knfsd_fh *fh)
+{
+	fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino);
+	fh->ofh_generation = d_inode(dentry)->i_generation;
+	if (d_is_dir(dentry) ||
+	    (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
+		fh->ofh_dirino = 0;
+}
+
 static bool is_root_export(struct svc_export *exp)
 {
 	return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
@@ -524,6 +539,9 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	/* ref_fh is a reference file handle.
 	 * if it is non-null and for the same filesystem, then we should compose
 	 * a filehandle which is of the same version, where possible.
+	 * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
+	 * Then create a 32byte filehandle using nfs_fhbase_old
+	 *
 	 */
 
 	struct inode * inode = d_inode(dentry);
@@ -541,13 +559,10 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	 */
 	set_version_and_fsid_type(fhp, exp, ref_fh);
 
-	/* If we have a ref_fh, then copy the fh_no_wcc setting from it. */
-	fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false;
-
 	if (ref_fh == fhp)
 		fh_put(ref_fh);
 
-	if (fhp->fh_dentry) {
+	if (fhp->fh_locked || fhp->fh_dentry) {
 		printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
 		       dentry);
 	}
@@ -559,21 +574,35 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	fhp->fh_dentry = dget(dentry); /* our internal copy */
 	fhp->fh_export = exp_get(exp);
 
-	fhp->fh_handle.fh_size =
-		key_len(fhp->fh_handle.fh_fsid_type) + 4;
-	fhp->fh_handle.fh_auth_type = 0;
+	if (fhp->fh_handle.fh_version == 0xca) {
+		/* old style filehandle please */
+		memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
+		fhp->fh_handle.fh_size = NFS_FHSIZE;
+		fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
+		fhp->fh_handle.ofh_dev =  old_encode_dev(ex_dev);
+		fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
+		fhp->fh_handle.ofh_xino =
+			ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino);
+		fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
+		if (inode)
+			_fh_update_old(dentry, exp, &fhp->fh_handle);
+	} else {
+		fhp->fh_handle.fh_size =
+			key_len(fhp->fh_handle.fh_fsid_type) + 4;
+		fhp->fh_handle.fh_auth_type = 0;
 
-	mk_fsid(fhp->fh_handle.fh_fsid_type,
-		fhp->fh_handle.fh_fsid,
-		ex_dev,
-		d_inode(exp->ex_path.dentry)->i_ino,
-		exp->ex_fsid, exp->ex_uuid);
+		mk_fsid(fhp->fh_handle.fh_fsid_type,
+			fhp->fh_handle.fh_fsid,
+			ex_dev,
+			d_inode(exp->ex_path.dentry)->i_ino,
+			exp->ex_fsid, exp->ex_uuid);
 
-	if (inode)
-		_fh_update(fhp, exp, dentry);
-	if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
-		fh_put(fhp);
-		return nfserr_opnotsupp;
+		if (inode)
+			_fh_update(fhp, exp, dentry);
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
+			fh_put(fhp);
+			return nfserr_opnotsupp;
+		}
 	}
 
 	return 0;
@@ -594,12 +623,16 @@ fh_update(struct svc_fh *fhp)
 	dentry = fhp->fh_dentry;
 	if (d_really_is_negative(dentry))
 		goto out_negative;
-	if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
-		return 0;
+	if (fhp->fh_handle.fh_version != 1) {
+		_fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
+	} else {
+		if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
+			return 0;
 
-	_fh_update(fhp, fhp->fh_export, dentry);
-	if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
-		return nfserr_opnotsupp;
+		_fh_update(fhp, fhp->fh_export, dentry);
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
+			return nfserr_opnotsupp;
+	}
 	return 0;
 out_bad:
 	printk(KERN_ERR "fh_update: fh not verified!\n");
@@ -610,85 +643,6 @@ out_negative:
 	return nfserr_serverfault;
 }
 
-/**
- * fh_fill_pre_attrs - Fill in pre-op attributes
- * @fhp: file handle to be updated
- *
- */
-void fh_fill_pre_attrs(struct svc_fh *fhp)
-{
-	bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
-	struct inode *inode;
-	struct kstat stat;
-	__be32 err;
-
-	if (fhp->fh_no_wcc || fhp->fh_pre_saved)
-		return;
-
-	inode = d_inode(fhp->fh_dentry);
-	err = fh_getattr(fhp, &stat);
-	if (err) {
-		/* Grab the times from inode anyway */
-		stat.mtime = inode->i_mtime;
-		stat.ctime = inode->i_ctime;
-		stat.size  = inode->i_size;
-	}
-	if (v4)
-		fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
-
-	fhp->fh_pre_mtime = stat.mtime;
-	fhp->fh_pre_ctime = stat.ctime;
-	fhp->fh_pre_size  = stat.size;
-	fhp->fh_pre_saved = true;
-}
-
-/**
- * fh_fill_post_attrs - Fill in post-op attributes
- * @fhp: file handle to be updated
- *
- */
-void fh_fill_post_attrs(struct svc_fh *fhp)
-{
-	bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
-	struct inode *inode = d_inode(fhp->fh_dentry);
-	__be32 err;
-
-	if (fhp->fh_no_wcc)
-		return;
-
-	if (fhp->fh_post_saved)
-		printk("nfsd: inode locked twice during operation.\n");
-
-	err = fh_getattr(fhp, &fhp->fh_post_attr);
-	if (err) {
-		fhp->fh_post_saved = false;
-		fhp->fh_post_attr.ctime = inode->i_ctime;
-	} else
-		fhp->fh_post_saved = true;
-	if (v4)
-		fhp->fh_post_change =
-			nfsd4_change_attribute(&fhp->fh_post_attr, inode);
-}
-
-/**
- * fh_fill_both_attrs - Fill pre-op and post-op attributes
- * @fhp: file handle to be updated
- *
- * This is used when the directory wasn't changed, but wcc attributes
- * are needed anyway.
- */
-void fh_fill_both_attrs(struct svc_fh *fhp)
-{
-	fh_fill_post_attrs(fhp);
-	if (!fhp->fh_post_saved)
-		return;
-	fhp->fh_pre_change = fhp->fh_post_change;
-	fhp->fh_pre_mtime = fhp->fh_post_attr.mtime;
-	fhp->fh_pre_ctime = fhp->fh_post_attr.ctime;
-	fhp->fh_pre_size = fhp->fh_post_attr.size;
-	fhp->fh_pre_saved = true;
-}
-
 /*
  * Release a file handle.
  */
@@ -698,16 +652,16 @@ fh_put(struct svc_fh *fhp)
 	struct dentry * dentry = fhp->fh_dentry;
 	struct svc_export * exp = fhp->fh_export;
 	if (dentry) {
+		fh_unlock(fhp);
 		fhp->fh_dentry = NULL;
 		dput(dentry);
-		fh_clear_pre_post_attrs(fhp);
+		fh_clear_wcc(fhp);
 	}
 	fh_drop_write(fhp);
 	if (exp) {
 		exp_put(exp);
 		fhp->fh_export = NULL;
 	}
-	fhp->fh_no_wcc = false;
 	return;
 }
 
@@ -717,15 +671,20 @@ fh_put(struct svc_fh *fhp)
 char * SVCFH_fmt(struct svc_fh *fhp)
 {
 	struct knfsd_fh *fh = &fhp->fh_handle;
-	static char buf[2+1+1+64*3+1];
 
-	if (fh->fh_size < 0 || fh->fh_size> 64)
-		return "bad-fh";
-	sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw);
+	static char buf[80];
+	sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
+		fh->fh_size,
+		fh->fh_base.fh_pad[0],
+		fh->fh_base.fh_pad[1],
+		fh->fh_base.fh_pad[2],
+		fh->fh_base.fh_pad[3],
+		fh->fh_base.fh_pad[4],
+		fh->fh_base.fh_pad[5]);
 	return buf;
 }
 
-enum fsid_source fsid_source(const struct svc_fh *fhp)
+enum fsid_source fsid_source(struct svc_fh *fhp)
 {
 	if (fhp->fh_handle.fh_version != 1)
 		return FSIDSOURCE_DEV;
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 513e028b0bbe..56cfbc361561 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -10,56 +10,8 @@
 
 #include <linux/crc32.h>
 #include <linux/sunrpc/svc.h>
+#include <uapi/linux/nfsd/nfsfh.h>
 #include <linux/iversion.h>
-#include <linux/exportfs.h>
-#include <linux/nfs4.h>
-
-/*
- * The file handle starts with a sequence of four-byte words.
- * The first word contains a version number (1) and three descriptor bytes
- * that tell how the remaining 3 variable length fields should be handled.
- * These three bytes are auth_type, fsid_type and fileid_type.
- *
- * All four-byte values are in host-byte-order.
- *
- * The auth_type field is deprecated and must be set to 0.
- *
- * The fsid_type identifies how the filesystem (or export point) is
- *    encoded.
- *  Current values:
- *     0  - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number
- *        NOTE: we cannot use the kdev_t device id value, because kdev_t.h
- *              says we mustn't.  We must break it up and reassemble.
- *     1  - 4 byte user specified identifier
- *     2  - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED
- *     3  - 4 byte device id, encoded for user-space, 4 byte inode number
- *     4  - 4 byte inode number and 4 byte uuid
- *     5  - 8 byte uuid
- *     6  - 16 byte uuid
- *     7  - 8 byte inode number and 16 byte uuid
- *
- * The fileid_type identifies how the file within the filesystem is encoded.
- *   The values for this field are filesystem specific, exccept that
- *   filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
- *   in include/linux/exportfs.h for currently registered values.
- */
-
-struct knfsd_fh {
-	unsigned int	fh_size;	/*
-					 * Points to the current size while
-					 * building a new file handle.
-					 */
-	union {
-		char			fh_raw[NFS4_FHSIZE];
-		struct {
-			u8		fh_version;	/* == 1 */
-			u8		fh_auth_type;	/* deprecated */
-			u8		fh_fsid_type;
-			u8		fh_fileid_type;
-			u32		fh_fsid[]; /* flexible-array member */
-		};
-	};
-};
 
 static inline __u32 ino_t_to_u32(ino_t ino)
 {
@@ -81,18 +33,14 @@ typedef struct svc_fh {
 	struct dentry *		fh_dentry;	/* validated dentry */
 	struct svc_export *	fh_export;	/* export pointer */
 
+	bool			fh_locked;	/* inode locked by us */
 	bool			fh_want_write;	/* remount protection taken */
-	bool			fh_no_wcc;	/* no wcc data needed */
-	bool			fh_no_atomic_attr;
-						/*
-						 * wcc data is not atomic with
-						 * operation
-						 */
 	int			fh_flags;	/* FH flags */
+#ifdef CONFIG_NFSD_V3
 	bool			fh_post_saved;	/* post-op attrs saved */
 	bool			fh_pre_saved;	/* pre-op attrs saved */
 
-	/* Pre-op attributes saved when inode is locked */
+	/* Pre-op attributes saved during fh_lock */
 	__u64			fh_pre_size;	/* size before operation */
 	struct timespec64	fh_pre_mtime;	/* mtime before oper */
 	struct timespec64	fh_pre_ctime;	/* ctime before oper */
@@ -102,9 +50,11 @@ typedef struct svc_fh {
 	 */
 	u64			fh_pre_change;
 
-	/* Post-op attributes saved in fh_fill_post_attrs() */
+	/* Post-op attributes saved in fh_unlock */
 	struct kstat		fh_post_attr;	/* full attrs after operation */
 	u64			fh_post_change; /* nfsv4 change; see above */
+#endif /* CONFIG_NFSD_V3 */
+
 } svc_fh;
 #define NFSD4_FH_FOREIGN (1<<0)
 #define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
@@ -126,7 +76,7 @@ enum fsid_source {
 	FSIDSOURCE_FSID,
 	FSIDSOURCE_UUID,
 };
-extern enum fsid_source fsid_source(const struct svc_fh *fhp);
+extern enum fsid_source fsid_source(struct svc_fh *fhp);
 
 
 /*
@@ -220,19 +170,19 @@ __be32	fh_update(struct svc_fh *);
 void	fh_put(struct svc_fh *);
 
 static __inline__ struct svc_fh *
-fh_copy(struct svc_fh *dst, const struct svc_fh *src)
+fh_copy(struct svc_fh *dst, struct svc_fh *src)
 {
-	WARN_ON(src->fh_dentry);
-
+	WARN_ON(src->fh_dentry || src->fh_locked);
+			
 	*dst = *src;
 	return dst;
 }
 
 static inline void
-fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src)
+fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
 {
 	dst->fh_size = src->fh_size;
-	memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size);
+	memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
 }
 
 static __inline__ struct svc_fh *
@@ -243,18 +193,16 @@ fh_init(struct svc_fh *fhp, int maxsize)
 	return fhp;
 }
 
-static inline bool fh_match(const struct knfsd_fh *fh1,
-			    const struct knfsd_fh *fh2)
+static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
 {
 	if (fh1->fh_size != fh2->fh_size)
 		return false;
-	if (memcmp(fh1->fh_raw, fh2->fh_raw, fh1->fh_size) != 0)
+	if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0)
 		return false;
 	return true;
 }
 
-static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
-				 const struct knfsd_fh *fh2)
+static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
 {
 	if (fh1->fh_fsid_type != fh2->fh_fsid_type)
 		return false;
@@ -271,23 +219,27 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
  * returns a crc32 hash for the filehandle that is compatible with
  * the one displayed by "wireshark".
  */
-static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
+
+static inline u32
+knfsd_fh_hash(struct knfsd_fh *fh)
 {
-	return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size);
+	return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size);
 }
 #else
-static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
+static inline u32
+knfsd_fh_hash(struct knfsd_fh *fh)
 {
 	return 0;
 }
 #endif
 
-/**
- * fh_clear_pre_post_attrs - Reset pre/post attributes
- * @fhp: file handle to be updated
- *
+#ifdef CONFIG_NFSD_V3
+/*
+ * The wcc data stored in current_fh should be cleared
+ * between compound ops.
  */
-static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
+static inline void
+fh_clear_wcc(struct svc_fh *fhp)
 {
 	fhp->fh_post_saved = false;
 	fhp->fh_pre_saved = false;
@@ -307,21 +259,68 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
 static inline u64 nfsd4_change_attribute(struct kstat *stat,
 					 struct inode *inode)
 {
-	if (inode->i_sb->s_export_op->fetch_iversion)
-		return inode->i_sb->s_export_op->fetch_iversion(inode);
-	else if (IS_I_VERSION(inode)) {
-		u64 chattr;
+	u64 chattr;
 
-		chattr =  stat->ctime.tv_sec;
-		chattr <<= 30;
-		chattr += stat->ctime.tv_nsec;
-		chattr += inode_query_iversion(inode);
-		return chattr;
-	} else
-		return time_to_chattr(&stat->ctime);
+	chattr =  stat->ctime.tv_sec;
+	chattr <<= 30;
+	chattr += stat->ctime.tv_nsec;
+	chattr += inode_query_iversion(inode);
+	return chattr;
+}
+
+extern void fill_pre_wcc(struct svc_fh *fhp);
+extern void fill_post_wcc(struct svc_fh *fhp);
+#else
+#define fh_clear_wcc(ignored)
+#define fill_pre_wcc(ignored)
+#define fill_post_wcc(notused)
+#endif /* CONFIG_NFSD_V3 */
+
+
+/*
+ * Lock a file handle/inode
+ * NOTE: both fh_lock and fh_unlock are done "by hand" in
+ * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
+ * so, any changes here should be reflected there.
+ */
+
+static inline void
+fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
+{
+	struct dentry	*dentry = fhp->fh_dentry;
+	struct inode	*inode;
+
+	BUG_ON(!dentry);
+
+	if (fhp->fh_locked) {
+		printk(KERN_WARNING "fh_lock: %pd2 already locked!\n",
+			dentry);
+		return;
+	}
+
+	inode = d_inode(dentry);
+	inode_lock_nested(inode, subclass);
+	fill_pre_wcc(fhp);
+	fhp->fh_locked = true;
+}
+
+static inline void
+fh_lock(struct svc_fh *fhp)
+{
+	fh_lock_nested(fhp, I_MUTEX_NORMAL);
+}
+
+/*
+ * Unlock a file handle/inode
+ */
+static inline void
+fh_unlock(struct svc_fh *fhp)
+{
+	if (fhp->fh_locked) {
+		fill_post_wcc(fhp);
+		inode_unlock(d_inode(fhp->fh_dentry));
+		fhp->fh_locked = false;
+	}
 }
 
-extern void fh_fill_pre_attrs(struct svc_fh *fhp);
-extern void fh_fill_post_attrs(struct svc_fh *fhp);
-extern void fh_fill_both_attrs(struct svc_fh *fhp);
 #endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 96426dea7d41..bbd01e8397f6 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -51,9 +51,6 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
 	struct nfsd_sattrargs *argp = rqstp->rq_argp;
 	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct iattr *iap = &argp->attrs;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= iap,
-	};
 	struct svc_fh *fhp;
 
 	dprintk("nfsd: SETATTR  %s, valid=%x, size=%ld\n",
@@ -103,7 +100,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
 		}
 	}
 
-	resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0);
+	resp->status = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0);
 	if (resp->status != nfs_ok)
 		goto out;
 
@@ -152,16 +149,14 @@ out:
 static __be32
 nfsd_proc_readlink(struct svc_rqst *rqstp)
 {
-	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_readlinkargs *argp = rqstp->rq_argp;
 	struct nfsd_readlinkres *resp = rqstp->rq_resp;
 
 	dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
 
 	/* Read the symlink. */
 	resp->len = NFS_MAXPATHLEN;
-	resp->page = *(rqstp->rq_next_page++);
-	resp->status = nfsd_readlink(rqstp, &argp->fh,
-				     page_address(resp->page), &resp->len);
+	resp->status = nfsd_readlink(rqstp, &argp->fh, argp->buffer, &resp->len);
 
 	fh_put(&argp->fh);
 	return rpc_success;
@@ -176,42 +171,36 @@ nfsd_proc_read(struct svc_rqst *rqstp)
 {
 	struct nfsd_readargs *argp = rqstp->rq_argp;
 	struct nfsd_readres *resp = rqstp->rq_resp;
-	unsigned int len;
 	u32 eof;
-	int v;
 
 	dprintk("nfsd: READ    %s %d bytes at %d\n",
 		SVCFH_fmt(&argp->fh),
 		argp->count, argp->offset);
 
-	argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
-	argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
-
-	v = 0;
-	len = argp->count;
-	resp->pages = rqstp->rq_next_page;
-	while (len > 0) {
-		struct page *page = *(rqstp->rq_next_page++);
-
-		rqstp->rq_vec[v].iov_base = page_address(page);
-		rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
-		len -= rqstp->rq_vec[v].iov_len;
-		v++;
-	}
-
 	/* Obtain buffer pointer for payload. 19 is 1 word for
 	 * status, 17 words for fattr, and 1 word for the byte count.
 	 */
+
+	if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
+		char buf[RPC_MAX_ADDRBUFLEN];
+		printk(KERN_NOTICE
+			"oversized read request from %s (%d bytes)\n",
+				svc_print_addr(rqstp, buf, sizeof(buf)),
+				argp->count);
+		argp->count = NFSSVC_MAXBLKSIZE_V2;
+	}
 	svc_reserve_auth(rqstp, (19<<2) + argp->count + 4);
 
 	resp->count = argp->count;
-	fh_copy(&resp->fh, &argp->fh);
-	resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
-				 rqstp->rq_vec, v, &resp->count, &eof);
+	resp->status = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
+				 argp->offset,
+				 rqstp->rq_vec, argp->vlen,
+				 &resp->count,
+				 &eof);
 	if (resp->status == nfs_ok)
 		resp->status = fh_getattr(&resp->fh, &resp->stat);
 	else if (resp->status == nfserr_jukebox)
-		set_bit(RQ_DROPME, &rqstp->rq_flags);
+		return rpc_drop_reply;
 	return rpc_success;
 }
 
@@ -238,7 +227,12 @@ nfsd_proc_write(struct svc_rqst *rqstp)
 		SVCFH_fmt(&argp->fh),
 		argp->len, argp->offset);
 
-	nvecs = svc_fill_write_vector(rqstp, &argp->payload);
+	nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+				      &argp->first, cnt);
+	if (!nvecs) {
+		resp->status = nfserr_io;
+		goto out;
+	}
 
 	resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
 				  argp->offset, rqstp->rq_vec, nvecs,
@@ -246,7 +240,8 @@ nfsd_proc_write(struct svc_rqst *rqstp)
 	if (resp->status == nfs_ok)
 		resp->status = fh_getattr(&resp->fh, &resp->stat);
 	else if (resp->status == nfserr_jukebox)
-		set_bit(RQ_DROPME, &rqstp->rq_flags);
+		return rpc_drop_reply;
+out:
 	return rpc_success;
 }
 
@@ -264,9 +259,6 @@ nfsd_proc_create(struct svc_rqst *rqstp)
 	svc_fh		*dirfhp = &argp->fh;
 	svc_fh		*newfhp = &resp->fh;
 	struct iattr	*attr = &argp->attrs;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= attr,
-	};
 	struct inode	*inode;
 	struct dentry	*dchild;
 	int		type, mode;
@@ -292,7 +284,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
 		goto done;
 	}
 
-	inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT);
+	fh_lock_nested(dirfhp, I_MUTEX_PARENT);
 	dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
 	if (IS_ERR(dchild)) {
 		resp->status = nfserrno(PTR_ERR(dchild));
@@ -391,8 +383,9 @@ nfsd_proc_create(struct svc_rqst *rqstp)
 	resp->status = nfs_ok;
 	if (!inode) {
 		/* File doesn't exist. Create it and set attrs */
-		resp->status = nfsd_create_locked(rqstp, dirfhp, &attrs, type,
-						  rdev, newfhp);
+		resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name,
+						  argp->len, attr, type, rdev,
+						  newfhp);
 	} else if (type == S_IFREG) {
 		dprintk("nfsd:   existing %s, valid=%x, size=%ld\n",
 			argp->name, attr->ia_valid, (long) attr->ia_size);
@@ -402,12 +395,13 @@ nfsd_proc_create(struct svc_rqst *rqstp)
 		 */
 		attr->ia_valid &= ATTR_SIZE;
 		if (attr->ia_valid)
-			resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0,
+			resp->status = nfsd_setattr(rqstp, newfhp, attr, 0,
 						    (time64_t)0);
 	}
 
 out_unlock:
-	inode_unlock(dirfhp->fh_dentry->d_inode);
+	/* We don't really need to unlock, as fh_put does it. */
+	fh_unlock(dirfhp);
 	fh_drop_write(dirfhp);
 done:
 	fh_put(dirfhp);
@@ -477,9 +471,6 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
 {
 	struct nfsd_symlinkargs *argp = rqstp->rq_argp;
 	struct nfsd_stat *resp = rqstp->rq_resp;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &argp->attrs,
-	};
 	struct svc_fh	newfh;
 
 	if (argp->tlen > NFS_MAXPATHLEN) {
@@ -501,7 +492,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
 
 	fh_init(&newfh, NFS_FHSIZE);
 	resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
-				    argp->tname, &attrs, &newfh);
+				    argp->tname, &newfh);
 
 	kfree(argp->tname);
 	fh_put(&argp->ffh);
@@ -519,9 +510,6 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
 {
 	struct nfsd_createargs *argp = rqstp->rq_argp;
 	struct nfsd_diropres *resp = rqstp->rq_resp;
-	struct nfsd_attrs attrs = {
-		.na_iattr	= &argp->attrs,
-	};
 
 	dprintk("nfsd: MKDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
 
@@ -533,7 +521,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
 	argp->attrs.ia_valid &= ~ATTR_SIZE;
 	fh_init(&resp->fh, NFS_FHSIZE);
 	resp->status = nfsd_create(rqstp, &argp->fh, argp->name, argp->len,
-				   &attrs, S_IFDIR, 0, &resp->fh);
+				   &argp->attrs, S_IFDIR, 0, &resp->fh);
 	fh_put(&argp->fh);
 	if (resp->status != nfs_ok)
 		goto out;
@@ -560,24 +548,6 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp)
 	return rpc_success;
 }
 
-static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
-				    struct nfsd_readdirres *resp,
-				    u32 count)
-{
-	struct xdr_buf *buf = &resp->dirlist;
-	struct xdr_stream *xdr = &resp->xdr;
-
-	memset(buf, 0, sizeof(*buf));
-
-	/* Reserve room for the NULL ptr & eof flag (-2 words) */
-	buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE);
-	buf->buflen -= XDR_UNIT * 2;
-	buf->pages = rqstp->rq_next_page;
-	rqstp->rq_next_page++;
-
-	xdr_init_encode_pages(xdr, buf, buf->pages,  NULL);
-}
-
 /*
  * Read a portion of a directory.
  */
@@ -586,20 +556,33 @@ nfsd_proc_readdir(struct svc_rqst *rqstp)
 {
 	struct nfsd_readdirargs *argp = rqstp->rq_argp;
 	struct nfsd_readdirres *resp = rqstp->rq_resp;
+	int		count;
 	loff_t		offset;
 
 	dprintk("nfsd: READDIR  %s %d bytes at %d\n",
 		SVCFH_fmt(&argp->fh),		
 		argp->count, argp->cookie);
 
-	nfsd_init_dirlist_pages(rqstp, resp, argp->count);
+	/* Shrink to the client read size */
+	count = (argp->count >> 2) - 2;
 
+	/* Make sure we've room for the NULL ptr & eof flag */
+	count -= 2;
+	if (count < 0)
+		count = 0;
+
+	resp->buffer = argp->buffer;
+	resp->offset = NULL;
+	resp->buflen = count;
 	resp->common.err = nfs_ok;
-	resp->cookie_offset = 0;
+	/* Read directory and encode entries on the fly */
 	offset = argp->cookie;
 	resp->status = nfsd_readdir(rqstp, &argp->fh, &offset,
 				    &resp->common, nfssvc_encode_entry);
-	nfssvc_encode_nfscookie(resp, offset);
+
+	resp->count = resp->buffer - argp->buffer;
+	if (resp->offset)
+		*resp->offset = htonl(offset);
 
 	fh_put(&argp->fh);
 	return rpc_success;
@@ -626,6 +609,7 @@ nfsd_proc_statfs(struct svc_rqst *rqstp)
  * NFSv2 Server procedures.
  * Only the results of non-idempotent operations are cached.
  */
+struct nfsd_void { int dummy; };
 
 #define ST 1		/* status */
 #define FH 8		/* filehandle */
@@ -634,49 +618,41 @@ nfsd_proc_statfs(struct svc_rqst *rqstp)
 static const struct svc_procedure nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
 		.pc_func = nfsd_proc_null,
-		.pc_decode = nfssvc_decode_voidarg,
-		.pc_encode = nfssvc_encode_voidres,
-		.pc_argsize = sizeof(struct nfsd_voidargs),
-		.pc_argzero = sizeof(struct nfsd_voidargs),
-		.pc_ressize = sizeof(struct nfsd_voidres),
+		.pc_decode = nfssvc_decode_void,
+		.pc_encode = nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_void),
+		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = 0,
-		.pc_name = "NULL",
 	},
 	[NFSPROC_GETATTR] = {
 		.pc_func = nfsd_proc_getattr,
-		.pc_decode = nfssvc_decode_fhandleargs,
-		.pc_encode = nfssvc_encode_attrstatres,
+		.pc_decode = nfssvc_decode_fhandle,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_attrstat,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
-		.pc_argzero = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT,
-		.pc_name = "GETATTR",
 	},
 	[NFSPROC_SETATTR] = {
 		.pc_func = nfsd_proc_setattr,
 		.pc_decode = nfssvc_decode_sattrargs,
-		.pc_encode = nfssvc_encode_attrstatres,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_attrstat,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
-		.pc_argzero = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+AT,
-		.pc_name = "SETATTR",
 	},
 	[NFSPROC_ROOT] = {
 		.pc_func = nfsd_proc_root,
-		.pc_decode = nfssvc_decode_voidarg,
-		.pc_encode = nfssvc_encode_voidres,
-		.pc_argsize = sizeof(struct nfsd_voidargs),
-		.pc_argzero = sizeof(struct nfsd_voidargs),
-		.pc_ressize = sizeof(struct nfsd_voidres),
+		.pc_decode = nfssvc_decode_void,
+		.pc_encode = nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_void),
+		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = 0,
-		.pc_name = "ROOT",
 	},
 	[NFSPROC_LOOKUP] = {
 		.pc_func = nfsd_proc_lookup,
@@ -684,22 +660,18 @@ static const struct svc_procedure nfsd_procedures2[18] = {
 		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_diropres,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
-		.pc_argzero = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+FH+AT,
-		.pc_name = "LOOKUP",
 	},
 	[NFSPROC_READLINK] = {
 		.pc_func = nfsd_proc_readlink,
-		.pc_decode = nfssvc_decode_fhandleargs,
+		.pc_decode = nfssvc_decode_readlinkargs,
 		.pc_encode = nfssvc_encode_readlinkres,
-		.pc_argsize = sizeof(struct nfsd_fhandle),
-		.pc_argzero = sizeof(struct nfsd_fhandle),
+		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
-		.pc_name = "READLINK",
 	},
 	[NFSPROC_READ] = {
 		.pc_func = nfsd_proc_read,
@@ -707,34 +679,28 @@ static const struct svc_procedure nfsd_procedures2[18] = {
 		.pc_encode = nfssvc_encode_readres,
 		.pc_release = nfssvc_release_readres,
 		.pc_argsize = sizeof(struct nfsd_readargs),
-		.pc_argzero = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
-		.pc_name = "READ",
 	},
 	[NFSPROC_WRITECACHE] = {
 		.pc_func = nfsd_proc_writecache,
-		.pc_decode = nfssvc_decode_voidarg,
-		.pc_encode = nfssvc_encode_voidres,
-		.pc_argsize = sizeof(struct nfsd_voidargs),
-		.pc_argzero = sizeof(struct nfsd_voidargs),
-		.pc_ressize = sizeof(struct nfsd_voidres),
+		.pc_decode = nfssvc_decode_void,
+		.pc_encode = nfssvc_encode_void,
+		.pc_argsize = sizeof(struct nfsd_void),
+		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = 0,
-		.pc_name = "WRITECACHE",
 	},
 	[NFSPROC_WRITE] = {
 		.pc_func = nfsd_proc_write,
 		.pc_decode = nfssvc_decode_writeargs,
-		.pc_encode = nfssvc_encode_attrstatres,
+		.pc_encode = nfssvc_encode_attrstat,
 		.pc_release = nfssvc_release_attrstat,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
-		.pc_argzero = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+AT,
-		.pc_name = "WRITE",
 	},
 	[NFSPROC_CREATE] = {
 		.pc_func = nfsd_proc_create,
@@ -742,55 +708,45 @@ static const struct svc_procedure nfsd_procedures2[18] = {
 		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_diropres,
 		.pc_argsize = sizeof(struct nfsd_createargs),
-		.pc_argzero = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+FH+AT,
-		.pc_name = "CREATE",
 	},
 	[NFSPROC_REMOVE] = {
 		.pc_func = nfsd_proc_remove,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = nfssvc_encode_statres,
+		.pc_encode = nfssvc_encode_stat,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
-		.pc_argzero = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_stat),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
-		.pc_name = "REMOVE",
 	},
 	[NFSPROC_RENAME] = {
 		.pc_func = nfsd_proc_rename,
 		.pc_decode = nfssvc_decode_renameargs,
-		.pc_encode = nfssvc_encode_statres,
+		.pc_encode = nfssvc_encode_stat,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
-		.pc_argzero = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_stat),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
-		.pc_name = "RENAME",
 	},
 	[NFSPROC_LINK] = {
 		.pc_func = nfsd_proc_link,
 		.pc_decode = nfssvc_decode_linkargs,
-		.pc_encode = nfssvc_encode_statres,
+		.pc_encode = nfssvc_encode_stat,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
-		.pc_argzero = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_stat),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
-		.pc_name = "LINK",
 	},
 	[NFSPROC_SYMLINK] = {
 		.pc_func = nfsd_proc_symlink,
 		.pc_decode = nfssvc_decode_symlinkargs,
-		.pc_encode = nfssvc_encode_statres,
+		.pc_encode = nfssvc_encode_stat,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
-		.pc_argzero = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_stat),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
-		.pc_name = "SYMLINK",
 	},
 	[NFSPROC_MKDIR] = {
 		.pc_func = nfsd_proc_mkdir,
@@ -798,43 +754,35 @@ static const struct svc_procedure nfsd_procedures2[18] = {
 		.pc_encode = nfssvc_encode_diropres,
 		.pc_release = nfssvc_release_diropres,
 		.pc_argsize = sizeof(struct nfsd_createargs),
-		.pc_argzero = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+FH+AT,
-		.pc_name = "MKDIR",
 	},
 	[NFSPROC_RMDIR] = {
 		.pc_func = nfsd_proc_rmdir,
 		.pc_decode = nfssvc_decode_diropargs,
-		.pc_encode = nfssvc_encode_statres,
+		.pc_encode = nfssvc_encode_stat,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
-		.pc_argzero = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_stat),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
-		.pc_name = "RMDIR",
 	},
 	[NFSPROC_READDIR] = {
 		.pc_func = nfsd_proc_readdir,
 		.pc_decode = nfssvc_decode_readdirargs,
 		.pc_encode = nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
-		.pc_argzero = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
 		.pc_cachetype = RC_NOCACHE,
-		.pc_name = "READDIR",
 	},
 	[NFSPROC_STATFS] = {
 		.pc_func = nfsd_proc_statfs,
-		.pc_decode = nfssvc_decode_fhandleargs,
+		.pc_decode = nfssvc_decode_fhandle,
 		.pc_encode = nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
-		.pc_argzero = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+5,
-		.pc_name = "STATFS",
 	},
 };
 
@@ -848,3 +796,61 @@ const struct svc_version nfsd_version2 = {
 	.vs_dispatch	= nfsd_dispatch,
 	.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
+
+/*
+ * Map errnos to NFS errnos.
+ */
+__be32
+nfserrno (int errno)
+{
+	static struct {
+		__be32	nfserr;
+		int	syserr;
+	} nfs_errtbl[] = {
+		{ nfs_ok, 0 },
+		{ nfserr_perm, -EPERM },
+		{ nfserr_noent, -ENOENT },
+		{ nfserr_io, -EIO },
+		{ nfserr_nxio, -ENXIO },
+		{ nfserr_fbig, -E2BIG },
+		{ nfserr_acces, -EACCES },
+		{ nfserr_exist, -EEXIST },
+		{ nfserr_xdev, -EXDEV },
+		{ nfserr_mlink, -EMLINK },
+		{ nfserr_nodev, -ENODEV },
+		{ nfserr_notdir, -ENOTDIR },
+		{ nfserr_isdir, -EISDIR },
+		{ nfserr_inval, -EINVAL },
+		{ nfserr_fbig, -EFBIG },
+		{ nfserr_nospc, -ENOSPC },
+		{ nfserr_rofs, -EROFS },
+		{ nfserr_mlink, -EMLINK },
+		{ nfserr_nametoolong, -ENAMETOOLONG },
+		{ nfserr_notempty, -ENOTEMPTY },
+#ifdef EDQUOT
+		{ nfserr_dquot, -EDQUOT },
+#endif
+		{ nfserr_stale, -ESTALE },
+		{ nfserr_jukebox, -ETIMEDOUT },
+		{ nfserr_jukebox, -ERESTARTSYS },
+		{ nfserr_jukebox, -EAGAIN },
+		{ nfserr_jukebox, -EWOULDBLOCK },
+		{ nfserr_jukebox, -ENOMEM },
+		{ nfserr_io, -ETXTBSY },
+		{ nfserr_notsupp, -EOPNOTSUPP },
+		{ nfserr_toosmall, -ETOOSMALL },
+		{ nfserr_serverfault, -ESERVERFAULT },
+		{ nfserr_serverfault, -ENFILE },
+		{ nfserr_io, -EUCLEAN },
+		{ nfserr_perm, -ENOKEY },
+	};
+	int	i;
+
+	for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
+		if (nfs_errtbl[i].syserr == errno)
+			return nfs_errtbl[i].nfserr;
+	}
+	WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
+	return nfserr_io;
+}
+
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 3d4fd40c987b..2e61a565cdbd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -12,7 +12,6 @@
 #include <linux/module.h>
 #include <linux/fs_struct.h>
 #include <linux/swap.h>
-#include <linux/siphash.h>
 
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
@@ -30,10 +29,14 @@
 #include "netns.h"
 #include "filecache.h"
 
-#include "trace.h"
-
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
+bool inter_copy_offload_enable;
+EXPORT_SYMBOL_GPL(inter_copy_offload_enable);
+module_param(inter_copy_offload_enable, bool, 0644);
+MODULE_PARM_DESC(inter_copy_offload_enable,
+		 "Enable inter server to server copy offload. Default: false");
+
 extern struct svc_program	nfsd_program;
 static int			nfsd(void *vrqstp);
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -56,17 +59,18 @@ static __be32			nfsd_init_request(struct svc_rqst *,
 						struct svc_process_info *);
 
 /*
- * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members
- * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks.
+ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
+ * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
+ * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
  *
  * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
- * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
- * nn->keep_active is set).  That number of nfsd threads must
- * exist and each must be listed in ->sp_all_threads in some entry of
- * ->sv_pools[].
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
+ * of nfsd threads must exist and each must listed in ->sp_all_threads in each
+ * entry of ->sv_pools[].
  *
- * Each active thread holds a counted reference on nn->nfsd_serv, as does
- * the nn->keep_active flag and various transient calls to svc_get().
+ * Transitions of the thread count between zero and non-zero are of particular
+ * interest since the svc_serv needs to be created and initialized at that
+ * point, or freed.
  *
  * Finally, the nfsd_mutex also protects some of the global variables that are
  * accessed when nfsd starts and that are settable via the write_* routines in
@@ -84,19 +88,15 @@ DEFINE_MUTEX(nfsd_mutex);
  * version 4.1 DRC caches.
  * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
  */
-DEFINE_SPINLOCK(nfsd_drc_lock);
+spinlock_t	nfsd_drc_lock;
 unsigned long	nfsd_drc_max_mem;
 unsigned long	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
 static const struct svc_version *nfsd_acl_version[] = {
-# if defined(CONFIG_NFSD_V2_ACL)
 	[2] = &nfsd_acl_version2,
-# endif
-# if defined(CONFIG_NFSD_V3_ACL)
 	[3] = &nfsd_acl_version3,
-# endif
 };
 
 #define NFSD_ACL_MINVERS            2
@@ -120,10 +120,10 @@ static struct svc_stat	nfsd_acl_svcstats = {
 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
 
 static const struct svc_version *nfsd_version[] = {
-#if defined(CONFIG_NFSD_V2)
 	[2] = &nfsd_version2,
-#endif
+#if defined(CONFIG_NFSD_V3)
 	[3] = &nfsd_version3,
+#endif
 #if defined(CONFIG_NFSD_V4)
 	[4] = &nfsd_version4,
 #endif
@@ -297,13 +297,13 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred)
 	if (!list_empty(&nn->nfsd_serv->sv_permsocks))
 		return 0;
 
-	error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
-				SVC_SOCK_DEFAULTS, cred);
+	error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
+					SVC_SOCK_DEFAULTS, cred);
 	if (error < 0)
 		return error;
 
-	error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
-				SVC_SOCK_DEFAULTS, cred);
+	error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
+					SVC_SOCK_DEFAULTS, cred);
 	if (error < 0)
 		return error;
 
@@ -312,7 +312,7 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred)
 
 static int nfsd_users = 0;
 
-static int nfsd_startup_generic(void)
+static int nfsd_startup_generic(int nrservs)
 {
 	int ret;
 
@@ -349,60 +349,36 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
 	return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
 }
 
-/**
- * nfsd_copy_write_verifier - Atomically copy a write verifier
- * @verf: buffer in which to receive the verifier cookie
- * @nn: NFS net namespace
- *
- * This function provides a wait-free mechanism for copying the
- * namespace's write verifier without tearing it.
- */
-void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
+void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
 {
 	int seq = 0;
 
 	do {
-		read_seqbegin_or_lock(&nn->writeverf_lock, &seq);
-		memcpy(verf, nn->writeverf, sizeof(nn->writeverf));
-	} while (need_seqretry(&nn->writeverf_lock, seq));
-	done_seqretry(&nn->writeverf_lock, seq);
+		read_seqbegin_or_lock(&nn->boot_lock, &seq);
+		/*
+		 * This is opaque to client, so no need to byte-swap. Use
+		 * __force to keep sparse happy. y2038 time_t overflow is
+		 * irrelevant in this usage
+		 */
+		verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
+		verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
+	} while (need_seqretry(&nn->boot_lock, seq));
+	done_seqretry(&nn->boot_lock, seq);
 }
 
-static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn)
+static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
 {
-	struct timespec64 now;
-	u64 verf;
-
-	/*
-	 * Because the time value is hashed, y2038 time_t overflow
-	 * is irrelevant in this usage.
-	 */
-	ktime_get_raw_ts64(&now);
-	verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key);
-	memcpy(nn->writeverf, &verf, sizeof(nn->writeverf));
+	ktime_get_real_ts64(&nn->nfssvc_boot);
 }
 
-/**
- * nfsd_reset_write_verifier - Generate a new write verifier
- * @nn: NFS net namespace
- *
- * This function updates the ->writeverf field of @nn. This field
- * contains an opaque cookie that, according to Section 18.32.3 of
- * RFC 8881, "the client can use to determine whether a server has
- * changed instance state (e.g., server restart) between a call to
- * WRITE and a subsequent call to either WRITE or COMMIT.  This
- * cookie MUST be unchanged during a single instance of the NFSv4.1
- * server and MUST be unique between instances of the NFSv4.1
- * server."
- */
-void nfsd_reset_write_verifier(struct nfsd_net *nn)
+void nfsd_reset_boot_verifier(struct nfsd_net *nn)
 {
-	write_seqlock(&nn->writeverf_lock);
-	nfsd_reset_write_verifier_locked(nn);
-	write_sequnlock(&nn->writeverf_lock);
+	write_seqlock(&nn->boot_lock);
+	nfsd_reset_boot_verifier_locked(nn);
+	write_sequnlock(&nn->boot_lock);
 }
 
-static int nfsd_startup_net(struct net *net, const struct cred *cred)
+static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int ret;
@@ -410,7 +386,7 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred)
 	if (nn->nfsd_net_up)
 		return 0;
 
-	ret = nfsd_startup_generic();
+	ret = nfsd_startup_generic(nrservs);
 	if (ret)
 		return ret;
 	ret = nfsd_init_socks(net, cred);
@@ -431,9 +407,6 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred)
 	if (ret)
 		goto out_filecache;
 
-#ifdef CONFIG_NFSD_V4_2_INTER_SSC
-	nfsd4_ssc_init_umount_work(nn);
-#endif
 	nn->nfsd_net_up = true;
 	return 0;
 
@@ -463,7 +436,6 @@ static void nfsd_shutdown_net(struct net *net)
 	nfsd_shutdown_generic();
 }
 
-static DEFINE_SPINLOCK(nfsd_notifier_lock);
 static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
 	void *ptr)
 {
@@ -473,17 +445,18 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	struct sockaddr_in sin;
 
-	if (event != NETDEV_DOWN || !nn->nfsd_serv)
+	if ((event != NETDEV_DOWN) ||
+	    !atomic_inc_not_zero(&nn->ntf_refcnt))
 		goto out;
 
-	spin_lock(&nfsd_notifier_lock);
 	if (nn->nfsd_serv) {
 		dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
 		sin.sin_family = AF_INET;
 		sin.sin_addr.s_addr = ifa->ifa_local;
 		svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
 	}
-	spin_unlock(&nfsd_notifier_lock);
+	atomic_dec(&nn->ntf_refcnt);
+	wake_up(&nn->ntf_wq);
 
 out:
 	return NOTIFY_DONE;
@@ -503,10 +476,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	struct sockaddr_in6 sin6;
 
-	if (event != NETDEV_DOWN || !nn->nfsd_serv)
+	if ((event != NETDEV_DOWN) ||
+	    !atomic_inc_not_zero(&nn->ntf_refcnt))
 		goto out;
 
-	spin_lock(&nfsd_notifier_lock);
 	if (nn->nfsd_serv) {
 		dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
 		sin6.sin6_family = AF_INET6;
@@ -515,8 +488,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
 			sin6.sin6_scope_id = ifa->idev->dev->ifindex;
 		svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
 	}
-	spin_unlock(&nfsd_notifier_lock);
-
+	atomic_dec(&nn->ntf_refcnt);
+	wake_up(&nn->ntf_wq);
 out:
 	return NOTIFY_DONE;
 }
@@ -529,15 +502,11 @@ static struct notifier_block nfsd_inet6addr_notifier = {
 /* Only used under nfsd_mutex, so this atomic may be overkill: */
 static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
 
-void nfsd_last_thread(struct net *net)
+static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct svc_serv *serv = nn->nfsd_serv;
-
-	spin_lock(&nfsd_notifier_lock);
-	nn->nfsd_serv = NULL;
-	spin_unlock(&nfsd_notifier_lock);
 
+	atomic_dec(&nn->ntf_refcnt);
 	/* check if the notifier still has clients */
 	if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
 		unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -545,8 +514,7 @@ void nfsd_last_thread(struct net *net)
 		unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
 #endif
 	}
-
-	svc_xprt_destroy_all(serv, net);
+	wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0);
 
 	/*
 	 * write_ports can create the server without actually starting
@@ -599,6 +567,7 @@ static void set_max_drc(void)
 	nfsd_drc_max_mem = (nr_free_buffer_pages()
 					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
 	nfsd_drc_mem_used = 0;
+	spin_lock_init(&nfsd_drc_lock);
 	dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
 }
 
@@ -623,6 +592,24 @@ static int nfsd_get_default_max_blksize(void)
 	return ret;
 }
 
+static const struct svc_serv_ops nfsd_thread_sv_ops = {
+	.svo_shutdown		= nfsd_last_thread,
+	.svo_function		= nfsd,
+	.svo_enqueue_xprt	= svc_xprt_do_enqueue,
+	.svo_setup		= svc_set_num_threads,
+	.svo_module		= THIS_MODULE,
+};
+
+static void nfsd_complete_shutdown(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+	WARN_ON(!mutex_is_locked(&nfsd_mutex));
+
+	nn->nfsd_serv = NULL;
+	complete(&nn->nfsd_shutdown_complete);
+}
+
 void nfsd_shutdown_threads(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -637,10 +624,11 @@ void nfsd_shutdown_threads(struct net *net)
 
 	svc_get(serv);
 	/* Kill outstanding nfsd threads */
-	svc_set_num_threads(serv, NULL, 0);
-	nfsd_last_thread(net);
-	svc_put(serv);
+	serv->sv_ops->svo_setup(serv, NULL, 0);
+	nfsd_destroy(net);
 	mutex_unlock(&nfsd_mutex);
+	/* Wait for shutdown of nfsd_serv to complete */
+	wait_for_completion(&nn->nfsd_shutdown_complete);
 }
 
 bool i_am_nfsd(void)
@@ -652,7 +640,6 @@ int nfsd_create_serv(struct net *net)
 {
 	int error;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct svc_serv *serv;
 
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 	if (nn->nfsd_serv) {
@@ -662,19 +649,19 @@ int nfsd_create_serv(struct net *net)
 	if (nfsd_max_blksize == 0)
 		nfsd_max_blksize = nfsd_get_default_max_blksize();
 	nfsd_reset_versions(nn);
-	serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd);
-	if (serv == NULL)
+	nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+						&nfsd_thread_sv_ops);
+	if (nn->nfsd_serv == NULL)
 		return -ENOMEM;
+	init_completion(&nn->nfsd_shutdown_complete);
 
-	serv->sv_maxconn = nn->max_connections;
-	error = svc_bind(serv, net);
+	nn->nfsd_serv->sv_maxconn = nn->max_connections;
+	error = svc_bind(nn->nfsd_serv, net);
 	if (error < 0) {
-		svc_put(serv);
+		svc_destroy(nn->nfsd_serv);
+		nfsd_complete_shutdown(net);
 		return error;
 	}
-	spin_lock(&nfsd_notifier_lock);
-	nn->nfsd_serv = serv;
-	spin_unlock(&nfsd_notifier_lock);
 
 	set_max_drc();
 	/* check if the notifier is already set */
@@ -684,7 +671,8 @@ int nfsd_create_serv(struct net *net)
 		register_inet6addr_notifier(&nfsd_inet6addr_notifier);
 #endif
 	}
-	nfsd_reset_write_verifier(nn);
+	atomic_inc(&nn->ntf_refcnt);
+	nfsd_reset_boot_verifier(nn);
 	return 0;
 }
 
@@ -711,6 +699,18 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
 	return 0;
 }
 
+void nfsd_destroy(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	int destroy = (nn->nfsd_serv->sv_nrthreads == 1);
+
+	if (destroy)
+		svc_shutdown_net(nn->nfsd_serv, net);
+	svc_destroy(nn->nfsd_serv);
+	if (destroy)
+		nfsd_complete_shutdown(net);
+}
+
 int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 {
 	int i = 0;
@@ -735,7 +735,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 	if (tot > NFSD_MAXSERVS) {
 		/* total too large: scale down requested numbers */
 		for (i = 0; i < n && tot > 0; i++) {
-			int new = nthreads[i] * NFSD_MAXSERVS / tot;
+		    	int new = nthreads[i] * NFSD_MAXSERVS / tot;
 			tot -= (nthreads[i] - new);
 			nthreads[i] = new;
 		}
@@ -755,13 +755,12 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
 	/* apply the new numbers */
 	svc_get(nn->nfsd_serv);
 	for (i = 0; i < n; i++) {
-		err = svc_set_num_threads(nn->nfsd_serv,
-					  &nn->nfsd_serv->sv_pools[i],
-					  nthreads[i]);
+		err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
+				&nn->nfsd_serv->sv_pools[i], nthreads[i]);
 		if (err)
 			break;
 	}
-	svc_put(nn->nfsd_serv);
+	nfsd_destroy(net);
 	return err;
 }
 
@@ -776,7 +775,6 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
 	int	error;
 	bool	nfsd_up_before;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-	struct svc_serv *serv;
 
 	mutex_lock(&nfsd_mutex);
 	dprintk("nfsd: creating service\n");
@@ -788,7 +786,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
 	if (nrservs == 0 && nn->nfsd_serv == NULL)
 		goto out;
 
-	strscpy(nn->nfsd_name, utsname()->nodename,
+	strlcpy(nn->nfsd_name, utsname()->nodename,
 		sizeof(nn->nfsd_name));
 
 	error = nfsd_create_serv(net);
@@ -796,25 +794,24 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
 		goto out;
 
 	nfsd_up_before = nn->nfsd_net_up;
-	serv = nn->nfsd_serv;
 
-	error = nfsd_startup_net(net, cred);
+	error = nfsd_startup_net(nrservs, net, cred);
 	if (error)
-		goto out_put;
-	error = svc_set_num_threads(serv, NULL, nrservs);
+		goto out_destroy;
+	error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
+			NULL, nrservs);
 	if (error)
 		goto out_shutdown;
-	error = serv->sv_nrthreads;
-	if (error == 0)
-		nfsd_last_thread(net);
+	/* We are holding a reference to nn->nfsd_serv which
+	 * we don't want to count in the return value,
+	 * so subtract 1
+	 */
+	error = nn->nfsd_serv->sv_nrthreads - 1;
 out_shutdown:
 	if (error < 0 && !nfsd_up_before)
 		nfsd_shutdown_net(net);
-out_put:
-	/* Threads now hold service active */
-	if (xchg(&nn->keep_active, 0))
-		svc_put(serv);
-	svc_put(serv);
+out_destroy:
+	nfsd_destroy(net);		/* Release server */
 out:
 	mutex_unlock(&nfsd_mutex);
 	return error;
@@ -928,6 +925,9 @@ nfsd(void *vrqstp)
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int err;
 
+	/* Lock module and set up kernel thread */
+	mutex_lock(&nfsd_mutex);
+
 	/* At this point, the thread shares current->fs
 	 * with the init process. We need to create files with the
 	 * umask as defined by the client instead of init's umask. */
@@ -938,7 +938,17 @@ nfsd(void *vrqstp)
 
 	current->fs->umask = 0;
 
-	atomic_inc(&nfsdstats.th_cnt);
+	/*
+	 * thread is spawned with all signals set to SIG_IGN, re-enable
+	 * the ones that will bring down the thread
+	 */
+	allow_signal(SIGKILL);
+	allow_signal(SIGHUP);
+	allow_signal(SIGINT);
+	allow_signal(SIGQUIT);
+
+	nfsdstats.th_cnt++;
+	mutex_unlock(&nfsd_mutex);
 
 	set_freezable();
 
@@ -962,14 +972,57 @@ nfsd(void *vrqstp)
 		validate_process_creds();
 	}
 
-	atomic_dec(&nfsdstats.th_cnt);
+	/* Clear signals before calling svc_exit_thread() */
+	flush_signals(current);
+
+	mutex_lock(&nfsd_mutex);
+	nfsdstats.th_cnt --;
 
 out:
+	rqstp->rq_server = NULL;
+
 	/* Release the thread */
 	svc_exit_thread(rqstp);
+
+	nfsd_destroy(net);
+
+	/* Release module */
+	mutex_unlock(&nfsd_mutex);
+	module_put_and_exit(0);
 	return 0;
 }
 
+/*
+ * A write procedure can have a large argument, and a read procedure can
+ * have a large reply, but no NFSv2 or NFSv3 procedure has argument and
+ * reply that can both be larger than a page.  The xdr code has taken
+ * advantage of this assumption to be a sloppy about bounds checking in
+ * some cases.  Pending a rewrite of the NFSv2/v3 xdr code to fix that
+ * problem, we enforce these assumptions here:
+ */
+static bool nfs_request_too_big(struct svc_rqst *rqstp,
+				const struct svc_procedure *proc)
+{
+	/*
+	 * The ACL code has more careful bounds-checking and is not
+	 * susceptible to this problem:
+	 */
+	if (rqstp->rq_prog != NFS_PROGRAM)
+		return false;
+	/*
+	 * Ditto NFSv4 (which can in theory have argument and reply both
+	 * more than a page):
+	 */
+	if (rqstp->rq_vers >= 4)
+		return false;
+	/* The reply will be small, we're OK: */
+	if (proc->pc_xdrressize > 0 &&
+	    proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
+		return false;
+
+	return rqstp->rq_arg.len > PAGE_SIZE;
+}
+
 /**
  * nfsd_dispatch - Process an NFS or NFSACL Request
  * @rqstp: incoming request
@@ -984,15 +1037,22 @@ out:
 int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	const struct svc_procedure *proc = rqstp->rq_procinfo;
+	struct kvec *argv = &rqstp->rq_arg.head[0];
+	struct kvec *resv = &rqstp->rq_res.head[0];
+	__be32 *p;
+
+	dprintk("nfsd_dispatch: vers %d proc %d\n",
+				rqstp->rq_vers, rqstp->rq_proc);
+
+	if (nfs_request_too_big(rqstp, proc))
+		goto out_too_large;
 
 	/*
 	 * Give the xdr decoder a chance to change this if it wants
 	 * (necessary in the NFSv4.0 compound case)
 	 */
 	rqstp->rq_cachetype = proc->pc_cachetype;
-
-	svcxdr_init_decode(rqstp);
-	if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
+	if (!proc->pc_decode(rqstp, argv->iov_base))
 		goto out_decode_err;
 
 	switch (nfsd_cache_lookup(rqstp)) {
@@ -1008,64 +1068,43 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 * Need to grab the location to store the status, as
 	 * NFSv4 does some encoding while processing
 	 */
-	svcxdr_init_encode(rqstp);
+	p = resv->iov_base + resv->iov_len;
+	resv->iov_len += sizeof(__be32);
 
 	*statp = proc->pc_func(rqstp);
-	if (test_bit(RQ_DROPME, &rqstp->rq_flags))
+	if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags))
 		goto out_update_drop;
 
-	if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
+	if (!proc->pc_encode(rqstp, p))
 		goto out_encode_err;
 
 	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
 out_cached_reply:
 	return 1;
 
+out_too_large:
+	dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
+	*statp = rpc_garbage_args;
+	return 1;
+
 out_decode_err:
-	trace_nfsd_garbage_args_err(rqstp);
+	dprintk("nfsd: failed to decode arguments!\n");
 	*statp = rpc_garbage_args;
 	return 1;
 
 out_update_drop:
+	dprintk("nfsd: Dropping request; may be revisited later\n");
 	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
 out_dropit:
 	return 0;
 
 out_encode_err:
-	trace_nfsd_cant_encode_err(rqstp);
+	dprintk("nfsd: failed to encode result!\n");
 	nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
 	*statp = rpc_system_err;
 	return 1;
 }
 
-/**
- * nfssvc_decode_voidarg - Decode void arguments
- * @rqstp: Server RPC transaction context
- * @xdr: XDR stream positioned at arguments to decode
- *
- * Return values:
- *   %false: Arguments were not valid
- *   %true: Decoding was successful
- */
-bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	return true;
-}
-
-/**
- * nfssvc_encode_voidres - Encode void results
- * @rqstp: Server RPC transaction context
- * @xdr: XDR stream into which to encode results
- *
- * Return values:
- *   %false: Local error while encoding
- *   %true: Encoding was successful
- */
-bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
-	return true;
-}
-
 int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 {
 	int ret;
@@ -1076,6 +1115,7 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 		mutex_unlock(&nfsd_mutex);
 		return -ENODEV;
 	}
+	/* bump up the psudo refcount while traversing */
 	svc_get(nn->nfsd_serv);
 	ret = svc_pool_stats_open(nn->nfsd_serv, file);
 	mutex_unlock(&nfsd_mutex);
@@ -1084,12 +1124,12 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 
 int nfsd_pool_stats_release(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq = file->private_data;
-	struct svc_serv *serv = seq->private;
 	int ret = seq_release(inode, file);
+	struct net *net = inode->i_sb->s_fs_info;
 
 	mutex_lock(&nfsd_mutex);
-	svc_put(serv);
+	/* this function really, really should have been called svc_put() */
+	nfsd_destroy(net);
 	mutex_unlock(&nfsd_mutex);
 	return ret;
 }
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index caf6355b18fa..8a288c8fcd57 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -9,10 +9,12 @@
 #include "xdr.h"
 #include "auth.h"
 
+#define NFSDDBG_FACILITY		NFSDDBG_XDR
+
 /*
  * Mapping of S_IF* types to NFS file types
  */
-static const u32 nfs_ftypes[] = {
+static u32	nfs_ftypes[] = {
 	NFNON,  NFCHR,  NFCHR, NFBAD,
 	NFDIR,  NFBAD,  NFBLK, NFBAD,
 	NFREG,  NFBAD,  NFLNK, NFBAD,
@@ -21,168 +23,93 @@ static const u32 nfs_ftypes[] = {
 
 
 /*
- * Basic NFSv2 data types (RFC 1094 Section 2.3)
+ * XDR functions for basic NFS types
  */
-
-/**
- * svcxdr_encode_stat - Encode an NFSv2 status code
- * @xdr: XDR stream
- * @status: status value to encode
- *
- * Return values:
- *   %false: Send buffer space was exhausted
- *   %true: Success
- */
-bool
-svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status)
+static __be32 *
+decode_fh(__be32 *p, struct svc_fh *fhp)
 {
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, sizeof(status));
-	if (!p)
-		return false;
-	*p = status;
-
-	return true;
-}
-
-/**
- * svcxdr_decode_fhandle - Decode an NFSv2 file handle
- * @xdr: XDR stream positioned at an encoded NFSv2 FH
- * @fhp: OUT: filled-in server file handle
- *
- * Return values:
- *  %false: The encoded file handle was not valid
- *  %true: @fhp has been initialized
- */
-bool
-svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp)
-{
-	__be32 *p;
-
-	p = xdr_inline_decode(xdr, NFS_FHSIZE);
-	if (!p)
-		return false;
 	fh_init(fhp, NFS_FHSIZE);
-	memcpy(&fhp->fh_handle.fh_raw, p, NFS_FHSIZE);
+	memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
 	fhp->fh_handle.fh_size = NFS_FHSIZE;
 
-	return true;
+	/* FIXME: Look up export pointer here and verify
+	 * Sun Secure RPC if requested */
+	return p + (NFS_FHSIZE >> 2);
 }
 
-static bool
-svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp)
+/* Helper function for NFSv2 ACL code */
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 {
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, NFS_FHSIZE);
-	if (!p)
-		return false;
-	memcpy(p, &fhp->fh_handle.fh_raw, NFS_FHSIZE);
-
-	return true;
+	return decode_fh(p, fhp);
 }
 
 static __be32 *
-encode_timeval(__be32 *p, const struct timespec64 *time)
+encode_fh(__be32 *p, struct svc_fh *fhp)
 {
-	*p++ = cpu_to_be32((u32)time->tv_sec);
-	if (time->tv_nsec)
-		*p++ = cpu_to_be32(time->tv_nsec / NSEC_PER_USEC);
-	else
-		*p++ = xdr_zero;
+	memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
+	return p + (NFS_FHSIZE>> 2);
+}
+
+/*
+ * Decode a file name and make sure that the path contains
+ * no slashes or null bytes.
+ */
+static __be32 *
+decode_filename(__be32 *p, char **namp, unsigned int *lenp)
+{
+	char		*name;
+	unsigned int	i;
+
+	if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) {
+		for (i = 0, name = *namp; i < *lenp; i++, name++) {
+			if (*name == '\0' || *name == '/')
+				return NULL;
+		}
+	}
+
 	return p;
 }
 
-static bool
-svcxdr_decode_filename(struct xdr_stream *xdr, char **name, unsigned int *len)
+static __be32 *
+decode_sattr(__be32 *p, struct iattr *iap, struct user_namespace *userns)
 {
-	u32 size, i;
-	__be32 *p;
-	char *c;
-
-	if (xdr_stream_decode_u32(xdr, &size) < 0)
-		return false;
-	if (size == 0 || size > NFS_MAXNAMLEN)
-		return false;
-	p = xdr_inline_decode(xdr, size);
-	if (!p)
-		return false;
-
-	*len = size;
-	*name = (char *)p;
-	for (i = 0, c = *name; i < size; i++, c++)
-		if (*c == '\0' || *c == '/')
-			return false;
-
-	return true;
-}
-
-static bool
-svcxdr_decode_diropargs(struct xdr_stream *xdr, struct svc_fh *fhp,
-			char **name, unsigned int *len)
-{
-	return svcxdr_decode_fhandle(xdr, fhp) &&
-		svcxdr_decode_filename(xdr, name, len);
-}
-
-static bool
-svcxdr_decode_sattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-		    struct iattr *iap)
-{
-	u32 tmp1, tmp2;
-	__be32 *p;
-
-	p = xdr_inline_decode(xdr, XDR_UNIT * 8);
-	if (!p)
-		return false;
+	u32	tmp, tmp1;
 
 	iap->ia_valid = 0;
 
-	/*
-	 * Some Sun clients put 0xffff in the mode field when they
-	 * mean 0xffffffff.
+	/* Sun client bug compatibility check: some sun clients seem to
+	 * put 0xffff in the mode field when they mean 0xffffffff.
+	 * Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah.
 	 */
-	tmp1 = be32_to_cpup(p++);
-	if (tmp1 != (u32)-1 && tmp1 != 0xffff) {
+	if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) {
 		iap->ia_valid |= ATTR_MODE;
-		iap->ia_mode = tmp1;
+		iap->ia_mode = tmp;
 	}
-
-	tmp1 = be32_to_cpup(p++);
-	if (tmp1 != (u32)-1) {
-		iap->ia_uid = make_kuid(nfsd_user_namespace(rqstp), tmp1);
+	if ((tmp = ntohl(*p++)) != (u32)-1) {
+		iap->ia_uid = make_kuid(userns, tmp);
 		if (uid_valid(iap->ia_uid))
 			iap->ia_valid |= ATTR_UID;
 	}
-
-	tmp1 = be32_to_cpup(p++);
-	if (tmp1 != (u32)-1) {
-		iap->ia_gid = make_kgid(nfsd_user_namespace(rqstp), tmp1);
+	if ((tmp = ntohl(*p++)) != (u32)-1) {
+		iap->ia_gid = make_kgid(userns, tmp);
 		if (gid_valid(iap->ia_gid))
 			iap->ia_valid |= ATTR_GID;
 	}
-
-	tmp1 = be32_to_cpup(p++);
-	if (tmp1 != (u32)-1) {
+	if ((tmp = ntohl(*p++)) != (u32)-1) {
 		iap->ia_valid |= ATTR_SIZE;
-		iap->ia_size = tmp1;
+		iap->ia_size = tmp;
 	}
-
-	tmp1 = be32_to_cpup(p++);
-	tmp2 = be32_to_cpup(p++);
-	if (tmp1 != (u32)-1 && tmp2 != (u32)-1) {
+	tmp  = ntohl(*p++); tmp1 = ntohl(*p++);
+	if (tmp != (u32)-1 && tmp1 != (u32)-1) {
 		iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
-		iap->ia_atime.tv_sec = tmp1;
-		iap->ia_atime.tv_nsec = tmp2 * NSEC_PER_USEC;
+		iap->ia_atime.tv_sec = tmp;
+		iap->ia_atime.tv_nsec = tmp1 * 1000; 
 	}
-
-	tmp1 = be32_to_cpup(p++);
-	tmp2 = be32_to_cpup(p++);
-	if (tmp1 != (u32)-1 && tmp2 != (u32)-1) {
+	tmp  = ntohl(*p++); tmp1 = ntohl(*p++);
+	if (tmp != (u32)-1 && tmp1 != (u32)-1) {
 		iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
-		iap->ia_mtime.tv_sec = tmp1;
-		iap->ia_mtime.tv_nsec = tmp2 * NSEC_PER_USEC;
+		iap->ia_mtime.tv_sec = tmp;
+		iap->ia_mtime.tv_nsec = tmp1 * 1000; 
 		/*
 		 * Passing the invalid value useconds=1000000 for mtime
 		 * is a Sun convention for "set both mtime and atime to
@@ -192,447 +119,476 @@ svcxdr_decode_sattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		 * sattr in section 6.1 of "NFS Illustrated" by
 		 * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
 		 */
-		if (tmp2 == 1000000)
+		if (tmp1 == 1000000)
 			iap->ia_valid &= ~(ATTR_ATIME_SET|ATTR_MTIME_SET);
 	}
-
-	return true;
+	return p;
 }
 
-/**
- * svcxdr_encode_fattr - Encode NFSv2 file attributes
- * @rqstp: Context of a completed RPC transaction
- * @xdr: XDR stream
- * @fhp: File handle to encode
- * @stat: Attributes to encode
- *
- * Return values:
- *   %false: Send buffer space was exhausted
- *   %true: Success
- */
-bool
-svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-		    const struct svc_fh *fhp, const struct kstat *stat)
+static __be32 *
+encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
+	     struct kstat *stat)
 {
 	struct user_namespace *userns = nfsd_user_namespace(rqstp);
-	struct dentry *dentry = fhp->fh_dentry;
-	int type = stat->mode & S_IFMT;
+	struct dentry	*dentry = fhp->fh_dentry;
+	int type;
 	struct timespec64 time;
-	__be32 *p;
-	u32 fsid;
+	u32 f;
 
-	p = xdr_reserve_space(xdr, XDR_UNIT * 17);
-	if (!p)
-		return false;
+	type = (stat->mode & S_IFMT);
 
-	*p++ = cpu_to_be32(nfs_ftypes[type >> 12]);
-	*p++ = cpu_to_be32((u32)stat->mode);
-	*p++ = cpu_to_be32((u32)stat->nlink);
-	*p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid));
-	*p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid));
+	*p++ = htonl(nfs_ftypes[type >> 12]);
+	*p++ = htonl((u32) stat->mode);
+	*p++ = htonl((u32) stat->nlink);
+	*p++ = htonl((u32) from_kuid_munged(userns, stat->uid));
+	*p++ = htonl((u32) from_kgid_munged(userns, stat->gid));
 
-	if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN)
-		*p++ = cpu_to_be32(NFS_MAXPATHLEN);
-	else
-		*p++ = cpu_to_be32((u32) stat->size);
-	*p++ = cpu_to_be32((u32) stat->blksize);
+	if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
+		*p++ = htonl(NFS_MAXPATHLEN);
+	} else {
+		*p++ = htonl((u32) stat->size);
+	}
+	*p++ = htonl((u32) stat->blksize);
 	if (S_ISCHR(type) || S_ISBLK(type))
-		*p++ = cpu_to_be32(new_encode_dev(stat->rdev));
+		*p++ = htonl(new_encode_dev(stat->rdev));
 	else
-		*p++ = cpu_to_be32(0xffffffff);
-	*p++ = cpu_to_be32((u32)stat->blocks);
-
+		*p++ = htonl(0xffffffff);
+	*p++ = htonl((u32) stat->blocks);
 	switch (fsid_source(fhp)) {
+	default:
+	case FSIDSOURCE_DEV:
+		*p++ = htonl(new_encode_dev(stat->dev));
+		break;
 	case FSIDSOURCE_FSID:
-		fsid = (u32)fhp->fh_export->ex_fsid;
+		*p++ = htonl((u32) fhp->fh_export->ex_fsid);
 		break;
 	case FSIDSOURCE_UUID:
-		fsid = ((u32 *)fhp->fh_export->ex_uuid)[0];
-		fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[1];
-		fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[2];
-		fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[3];
-		break;
-	default:
-		fsid = new_encode_dev(stat->dev);
+		f = ((u32*)fhp->fh_export->ex_uuid)[0];
+		f ^= ((u32*)fhp->fh_export->ex_uuid)[1];
+		f ^= ((u32*)fhp->fh_export->ex_uuid)[2];
+		f ^= ((u32*)fhp->fh_export->ex_uuid)[3];
+		*p++ = htonl(f);
 		break;
 	}
-	*p++ = cpu_to_be32(fsid);
-
-	*p++ = cpu_to_be32((u32)stat->ino);
-	p = encode_timeval(p, &stat->atime);
+	*p++ = htonl((u32) stat->ino);
+	*p++ = htonl((u32) stat->atime.tv_sec);
+	*p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
 	time = stat->mtime;
-	lease_get_mtime(d_inode(dentry), &time);
-	p = encode_timeval(p, &time);
-	encode_timeval(p, &stat->ctime);
+	lease_get_mtime(d_inode(dentry), &time); 
+	*p++ = htonl((u32) time.tv_sec);
+	*p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); 
+	*p++ = htonl((u32) stat->ctime.tv_sec);
+	*p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0);
 
-	return true;
+	return p;
+}
+
+/* Helper function for NFSv2 ACL code */
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat)
+{
+	return encode_fattr(rqstp, p, fhp, stat);
 }
 
 /*
  * XDR decode functions
  */
+int
+nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
+{
+	return xdr_argsize_check(rqstp, p);
+}
 
-bool
-nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_fhandle *args = rqstp->rq_argp;
 
-	return svcxdr_decode_fhandle(xdr, &args->fh);
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_sattrargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_fhandle(xdr, &args->fh) &&
-		svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_diropargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len);
+	if (!(p = decode_fh(p, &args->fh))
+	 || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_readargs *args = rqstp->rq_argp;
-	u32 totalcount;
+	unsigned int len;
+	int v;
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
 
-	if (!svcxdr_decode_fhandle(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return false;
-	/* totalcount is ignored */
-	if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
-		return false;
+	args->offset    = ntohl(*p++);
+	len = args->count     = ntohl(*p++);
+	p++; /* totalcount - unused */
 
-	return true;
+	len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
+
+	/* set up somewhere to store response.
+	 * We take pages, put them on reslist and include in iovec
+	 */
+	v=0;
+	while (len > 0) {
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
+		rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
+		len -= rqstp->rq_vec[v].iov_len;
+		v++;
+	}
+	args->vlen = v;
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_writeargs *args = rqstp->rq_argp;
-	u32 beginoffset, totalcount;
+	unsigned int len, hdr, dlen;
+	struct kvec *head = rqstp->rq_arg.head;
 
-	if (!svcxdr_decode_fhandle(xdr, &args->fh))
-		return false;
-	/* beginoffset is ignored */
-	if (xdr_stream_decode_u32(xdr, &beginoffset) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
-		return false;
-	/* totalcount is ignored */
-	if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
-		return false;
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
 
-	/* opaque data */
-	if (xdr_stream_decode_u32(xdr, &args->len) < 0)
-		return false;
-	if (args->len > NFSSVC_MAXBLKSIZE_V2)
-		return false;
+	p++;				/* beginoffset */
+	args->offset = ntohl(*p++);	/* offset */
+	p++;				/* totalcount */
+	len = args->len = ntohl(*p++);
+	/*
+	 * The protocol specifies a maximum of 8192 bytes.
+	 */
+	if (len > NFSSVC_MAXBLKSIZE_V2)
+		return 0;
 
-	return xdr_stream_subsegment(xdr, &args->payload, args->len);
+	/*
+	 * Check to make sure that we got the right number of
+	 * bytes.
+	 */
+	hdr = (void*)p - head->iov_base;
+	if (hdr > head->iov_len)
+		return 0;
+	dlen = head->iov_len + rqstp->rq_arg.page_len - hdr;
+
+	/*
+	 * Round the length of the data which was specified up to
+	 * the next multiple of XDR units and then compare that
+	 * against the length which was actually received.
+	 * Note that when RPCSEC/GSS (for example) is used, the
+	 * data buffer can be padded so dlen might be larger
+	 * than required.  It must never be smaller.
+	 */
+	if (dlen < XDR_QUADLEN(len)*4)
+		return 0;
+
+	args->first.iov_base = (void *)p;
+	args->first.iov_len = head->iov_len - hdr;
+	return 1;
 }
 
-bool
-nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_createargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_diropargs(xdr, &args->fh,
-				       &args->name, &args->len) &&
-		svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
+	if (   !(p = decode_fh(p, &args->fh))
+	    || !(p = decode_filename(p, &args->name, &args->len)))
+		return 0;
+	p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_renameargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_diropargs(xdr, &args->ffh,
-				       &args->fname, &args->flen) &&
-		svcxdr_decode_diropargs(xdr, &args->tfh,
-					&args->tname, &args->tlen);
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_filename(p, &args->fname, &args->flen))
+	 || !(p = decode_fh(p, &args->tfh))
+	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
+{
+	struct nfsd_readlinkargs *args = rqstp->rq_argp;
+
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	args->buffer = page_address(*(rqstp->rq_next_page++));
+
+	return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_linkargs *args = rqstp->rq_argp;
 
-	return svcxdr_decode_fhandle(xdr, &args->ffh) &&
-		svcxdr_decode_diropargs(xdr, &args->tfh,
-					&args->tname, &args->tlen);
+	if (!(p = decode_fh(p, &args->ffh))
+	 || !(p = decode_fh(p, &args->tfh))
+	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
+		return 0;
+
+	return xdr_argsize_check(rqstp, p);
 }
 
-bool
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_symlinkargs *args = rqstp->rq_argp;
-	struct kvec *head = rqstp->rq_arg.head;
+	char *base = (char *)p;
+	size_t xdrlen;
 
-	if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
-		return false;
+	if (   !(p = decode_fh(p, &args->ffh))
+	    || !(p = decode_filename(p, &args->fname, &args->flen)))
+		return 0;
+
+	args->tlen = ntohl(*p++);
 	if (args->tlen == 0)
-		return false;
+		return 0;
 
-	args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
-	args->first.iov_base = xdr_inline_decode(xdr, args->tlen);
-	if (!args->first.iov_base)
-		return false;
-	return svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
+	args->first.iov_base = p;
+	args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
+	args->first.iov_len -= (char *)p - base;
+
+	/* This request is never larger than a page. Therefore,
+	 * transport will deliver either:
+	 * 1. pathname in the pagelist -> sattr is in the tail.
+	 * 2. everything in the head buffer -> sattr is in the head.
+	 */
+	if (rqstp->rq_arg.page_len) {
+		if (args->tlen != rqstp->rq_arg.page_len)
+			return 0;
+		p = rqstp->rq_arg.tail[0].iov_base;
+	} else {
+		xdrlen = XDR_QUADLEN(args->tlen);
+		if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
+			return 0;
+		p += xdrlen;
+	}
+	decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
+
+	return 1;
 }
 
-bool
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_readdirargs *args = rqstp->rq_argp;
 
-	if (!svcxdr_decode_fhandle(xdr, &args->fh))
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->cookie) < 0)
-		return false;
-	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return false;
+	p = decode_fh(p, &args->fh);
+	if (!p)
+		return 0;
+	args->cookie = ntohl(*p++);
+	args->count  = ntohl(*p++);
+	args->count  = min_t(u32, args->count, PAGE_SIZE);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
-	return true;
+	return xdr_argsize_check(rqstp, p);
 }
 
 /*
  * XDR encode functions
  */
+int
+nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
+{
+	return xdr_ressize_check(rqstp, p);
+}
 
-bool
-nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_encode_stat(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_stat *resp = rqstp->rq_resp;
 
-	return svcxdr_encode_stat(xdr, resp->status);
+	*p++ = resp->status;
+	return xdr_ressize_check(rqstp, p);
 }
 
-bool
-nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_attrstat *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_stat(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-			return false;
-		break;
-	}
-
-	return true;
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		goto out;
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
+out:
+	return xdr_ressize_check(rqstp, p);
 }
 
-bool
-nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_diropres *resp = rqstp->rq_resp;
 
-	if (!svcxdr_encode_stat(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_fhandle(xdr, &resp->fh))
-			return false;
-		if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-			return false;
-		break;
-	}
-
-	return true;
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		goto out;
+	p = encode_fh(p, &resp->fh);
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
+out:
+	return xdr_ressize_check(rqstp, p);
 }
 
-bool
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_readlinkres *resp = rqstp->rq_resp;
-	struct kvec *head = rqstp->rq_res.head;
 
-	if (!svcxdr_encode_stat(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (xdr_stream_encode_u32(xdr, resp->len) < 0)
-			return false;
-		xdr_write_pages(xdr, &resp->page, 0, resp->len);
-		if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
-			return false;
-		break;
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		return xdr_ressize_check(rqstp, p);
+
+	*p++ = htonl(resp->len);
+	xdr_ressize_check(rqstp, p);
+	rqstp->rq_res.page_len = resp->len;
+	if (resp->len & 3) {
+		/* need to pad the tail */
+		rqstp->rq_res.tail[0].iov_base = p;
+		*p = 0;
+		rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
 	}
-
-	return true;
+	return 1;
 }
 
-bool
-nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_readres *resp = rqstp->rq_resp;
-	struct kvec *head = rqstp->rq_res.head;
 
-	if (!svcxdr_encode_stat(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-			return false;
-		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
-			return false;
-		xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
-				resp->count);
-		if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
-			return false;
-		break;
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		return xdr_ressize_check(rqstp, p);
+
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
+	*p++ = htonl(resp->count);
+	xdr_ressize_check(rqstp, p);
+
+	/* now update rqstp->rq_res to reflect data as well */
+	rqstp->rq_res.page_len = resp->count;
+	if (resp->count & 3) {
+		/* need to pad the tail */
+		rqstp->rq_res.tail[0].iov_base = p;
+		*p = 0;
+		rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
 	}
-
-	return true;
+	return 1;
 }
 
-bool
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_readdirres *resp = rqstp->rq_resp;
-	struct xdr_buf *dirlist = &resp->dirlist;
 
-	if (!svcxdr_encode_stat(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
-		/* no more entries */
-		if (xdr_stream_encode_item_absent(xdr) < 0)
-			return false;
-		if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0)
-			return false;
-		break;
-	}
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		return xdr_ressize_check(rqstp, p);
 
-	return true;
+	xdr_ressize_check(rqstp, p);
+	p = resp->buffer;
+	*p++ = 0;			/* no more entries */
+	*p++ = htonl((resp->common.err == nfserr_eof));
+	rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1;
+
+	return 1;
 }
 
-bool
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+int
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	struct kstatfs	*stat = &resp->stats;
-	__be32 *p;
 
-	if (!svcxdr_encode_stat(xdr, resp->status))
-		return false;
-	switch (resp->status) {
-	case nfs_ok:
-		p = xdr_reserve_space(xdr, XDR_UNIT * 5);
-		if (!p)
-			return false;
-		*p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2);
-		*p++ = cpu_to_be32(stat->f_bsize);
-		*p++ = cpu_to_be32(stat->f_blocks);
-		*p++ = cpu_to_be32(stat->f_bfree);
-		*p = cpu_to_be32(stat->f_bavail);
-		break;
+	*p++ = resp->status;
+	if (resp->status != nfs_ok)
+		return xdr_ressize_check(rqstp, p);
+
+	*p++ = htonl(NFSSVC_MAXBLKSIZE_V2);	/* max transfer size */
+	*p++ = htonl(stat->f_bsize);
+	*p++ = htonl(stat->f_blocks);
+	*p++ = htonl(stat->f_bfree);
+	*p++ = htonl(stat->f_bavail);
+	return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_entry(void *ccdv, const char *name,
+		    int namlen, loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct readdir_cd *ccd = ccdv;
+	struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common);
+	__be32	*p = cd->buffer;
+	int	buflen, slen;
+
+	/*
+	dprintk("nfsd: entry(%.*s off %ld ino %ld)\n",
+			namlen, name, offset, ino);
+	 */
+
+	if (offset > ~((u32) 0)) {
+		cd->common.err = nfserr_fbig;
+		return -EINVAL;
 	}
+	if (cd->offset)
+		*cd->offset = htonl(offset);
 
-	return true;
-}
+	/* truncate filename */
+	namlen = min(namlen, NFS2_MAXNAMLEN);
+	slen = XDR_QUADLEN(namlen);
 
-/**
- * nfssvc_encode_nfscookie - Encode a directory offset cookie
- * @resp: readdir result context
- * @offset: offset cookie to encode
- *
- * The buffer space for the offset cookie has already been reserved
- * by svcxdr_encode_entry_common().
- */
-void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset)
-{
-	__be32 cookie = cpu_to_be32(offset);
+	if ((buflen = cd->buflen - slen - 4) < 0) {
+		cd->common.err = nfserr_toosmall;
+		return -EINVAL;
+	}
+	if (ino > ~((u32) 0)) {
+		cd->common.err = nfserr_fbig;
+		return -EINVAL;
+	}
+	*p++ = xdr_one;				/* mark entry present */
+	*p++ = htonl((u32) ino);		/* file id */
+	p    = xdr_encode_array(p, name, namlen);/* name length & name */
+	cd->offset = p;			/* remember pointer */
+	*p++ = htonl(~0U);		/* offset of next entry */
 
-	if (!resp->cookie_offset)
-		return;
-
-	write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie,
-			       sizeof(cookie));
-	resp->cookie_offset = 0;
-}
-
-static bool
-svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name,
-			   int namlen, loff_t offset, u64 ino)
-{
-	struct xdr_buf *dirlist = &resp->dirlist;
-	struct xdr_stream *xdr = &resp->xdr;
-
-	if (xdr_stream_encode_item_present(xdr) < 0)
-		return false;
-	/* fileid */
-	if (xdr_stream_encode_u32(xdr, (u32)ino) < 0)
-		return false;
-	/* name */
-	if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS2_MAXNAMLEN)) < 0)
-		return false;
-	/* cookie */
-	resp->cookie_offset = dirlist->len;
-	if (xdr_stream_encode_u32(xdr, ~0U) < 0)
-		return false;
-
-	return true;
-}
-
-/**
- * nfssvc_encode_entry - encode one NFSv2 READDIR entry
- * @data: directory context
- * @name: name of the object to be encoded
- * @namlen: length of that name, in bytes
- * @offset: the offset of the previous entry
- * @ino: the fileid of this entry
- * @d_type: unused
- *
- * Return values:
- *   %0: Entry was successfully encoded.
- *   %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
- *
- * On exit, the following fields are updated:
- *   - resp->xdr
- *   - resp->common.err
- *   - resp->cookie_offset
- */
-int nfssvc_encode_entry(void *data, const char *name, int namlen,
-			loff_t offset, u64 ino, unsigned int d_type)
-{
-	struct readdir_cd *ccd = data;
-	struct nfsd_readdirres *resp = container_of(ccd,
-						    struct nfsd_readdirres,
-						    common);
-	unsigned int starting_length = resp->dirlist.len;
-
-	/* The offset cookie for the previous entry */
-	nfssvc_encode_nfscookie(resp, offset);
-
-	if (!svcxdr_encode_entry_common(resp, name, namlen, offset, ino))
-		goto out_toosmall;
-
-	xdr_commit_encode(&resp->xdr);
-	resp->common.err = nfs_ok;
+	cd->buflen = buflen;
+	cd->buffer = p;
+	cd->common.err = nfs_ok;
 	return 0;
-
-out_toosmall:
-	resp->cookie_offset = 0;
-	resp->common.err = nfserr_toosmall;
-	resp->dirlist.len = starting_length;
-	return -EINVAL;
 }
 
 /*
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e94634d30591..9eae11a9d21c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -57,11 +57,11 @@ typedef struct {
 } stateid_t;
 
 typedef struct {
-	stateid_t		cs_stid;
+	stateid_t		stid;
 #define NFS4_COPY_STID 1
 #define NFS4_COPYNOTIFY_STID 2
-	unsigned char		cs_type;
-	refcount_t		cs_count;
+	unsigned char		sc_type;
+	refcount_t		sc_count;
 } copy_stateid_t;
 
 struct nfsd4_callback {
@@ -149,7 +149,6 @@ struct nfs4_delegation {
 /* For recall: */
 	int			dl_retries;
 	struct nfsd4_callback	dl_recall;
-	bool			dl_recalled;
 };
 
 #define cb_to_delegation(cb) \
@@ -175,7 +174,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
 /* Maximum number of slots per session. 160 is useful for long haul TCP */
 #define NFSD_MAX_SLOTS_PER_SESSION     160
 /* Maximum number of operations per session compound */
-#define NFSD_MAX_OPS_PER_COMPOUND	50
+#define NFSD_MAX_OPS_PER_COMPOUND	16
 /* Maximum  session per slot cache size */
 #define NFSD_SLOT_CACHE_SIZE		2048
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
@@ -283,28 +282,6 @@ struct nfsd4_sessionid {
 
 #define HEXDIR_LEN     33 /* hex version of 16 byte md5 of cl_name plus '\0' */
 
-/*
- *       State                Meaning                  Where set
- * --------------------------------------------------------------------------
- * | NFSD4_ACTIVE      | Confirmed, active    | Default                     |
- * |------------------- ----------------------------------------------------|
- * | NFSD4_COURTESY    | Courtesy state.      | nfs4_get_client_reaplist    |
- * |                   | Lease/lock/share     |                             |
- * |                   | reservation conflict |                             |
- * |                   | can cause Courtesy   |                             |
- * |                   | client to be expired |                             |
- * |------------------------------------------------------------------------|
- * | NFSD4_EXPIRABLE   | Courtesy client to be| nfs4_laundromat             |
- * |                   | expired by Laundromat| try_to_expire_client        |
- * |                   | due to conflict      |                             |
- * |------------------------------------------------------------------------|
- */
-enum {
-	NFSD4_ACTIVE = 0,
-	NFSD4_COURTESY,
-	NFSD4_EXPIRABLE,
-};
-
 /*
  * struct nfs4_client - one per client.  Clientids live here.
  *
@@ -368,7 +345,6 @@ struct nfs4_client {
 #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
 #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
 					 1 << NFSD4_CLIENT_CB_KILL)
-#define NFSD4_CLIENT_CB_RECALL_ANY	(6)
 	unsigned long		cl_flags;
 	const struct cred	*cl_cb_cred;
 	struct rpc_clnt		*cl_cb_client;
@@ -395,10 +371,6 @@ struct nfs4_client {
 
 	/* debugging info directory under nfsd/clients/ : */
 	struct dentry		*cl_nfsd_dentry;
-	/* 'info' file within that directory. Ref is not counted,
-	 * but will remain valid iff cl_nfsd_dentry != NULL
-	 */
-	struct dentry		*cl_nfsd_info_dentry;
 
 	/* for nfs41 callbacks */
 	/* We currently support a single back channel with a single slot */
@@ -409,13 +381,6 @@ struct nfs4_client {
 	struct list_head	async_copies;	/* list of async copies */
 	spinlock_t		async_lock;	/* lock for async copies */
 	atomic_t		cl_cb_inflight;	/* Outstanding callbacks */
-
-	unsigned int		cl_state;
-	atomic_t		cl_delegs_in_recall;
-
-	struct nfsd4_cb_recall_any	*cl_ra;
-	time64_t		cl_ra_time;
-	struct list_head	cl_ra_cblist;
 };
 
 /* struct nfs4_client_reset
@@ -541,13 +506,14 @@ struct nfs4_clnt_odstate {
  * inode can have multiple filehandles associated with it, so there is
  * (potentially) a many to one relationship between this struct and struct
  * inode.
+ *
+ * These are hashed by filehandle in the file_hashtbl, which is protected by
+ * the global state_lock spinlock.
  */
 struct nfs4_file {
 	refcount_t		fi_ref;
-	struct inode *		fi_inode;
-	bool			fi_aliased;
 	spinlock_t		fi_lock;
-	struct rhlist_head	fi_rlist;
+	struct hlist_node       fi_hash;	/* hash on fi_fhandle */
 	struct list_head        fi_stateids;
 	union {
 		struct list_head	fi_delegations;
@@ -596,10 +562,6 @@ struct nfs4_ol_stateid {
 	struct list_head		st_locks;
 	struct nfs4_stateowner		*st_stateowner;
 	struct nfs4_clnt_odstate	*st_clnt_odstate;
-/*
- * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the
- * comment above bmap_to_share_mode() for explanation:
- */
 	unsigned char			st_access_bmap;
 	unsigned char			st_deny_bmap;
 	struct nfs4_ol_stateid		*st_openstp;
@@ -641,7 +603,6 @@ enum nfsd4_cb_op {
 	NFSPROC4_CLNT_CB_OFFLOAD,
 	NFSPROC4_CLNT_CB_SEQUENCE,
 	NFSPROC4_CLNT_CB_NOTIFY_LOCK,
-	NFSPROC4_CLNT_CB_RECALL_ANY,
 };
 
 /* Returns true iff a is later than b: */
@@ -662,7 +623,6 @@ struct nfsd4_blocked_lock {
 	struct file_lock	nbl_lock;
 	struct knfsd_fh		nbl_fh;
 	struct nfsd4_callback	nbl_cb;
-	struct kref		nbl_kref;
 };
 
 struct nfsd4_compound_state;
@@ -689,22 +649,26 @@ void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *)
 extern void nfs4_release_reclaim(struct nfsd_net *);
 extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct xdr_netobj name,
 							struct nfsd_net *nn);
-extern __be32 nfs4_check_open_reclaim(struct nfs4_client *);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
+		struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
 extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
 extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
 		const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
-extern bool nfsd4_run_cb(struct nfsd4_callback *cb);
+extern void nfsd4_run_cb(struct nfsd4_callback *cb);
 extern int nfsd4_create_callback_queue(void);
 extern void nfsd4_destroy_callback_queue(void);
 extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
+extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
 				struct xdr_netobj princhash, struct nfsd_net *nn);
 extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
 
+struct nfs4_file *find_file(struct knfsd_fh *fh);
 void put_nfs4_file(struct nfs4_file *fi);
+extern void nfs4_put_copy(struct nfsd4_copy *copy);
 extern struct nfsd4_copy *
 find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
 extern void nfs4_put_cpntf_state(struct nfsd_net *nn,
@@ -729,9 +693,4 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
 extern int nfsd4_client_record_check(struct nfs4_client *clp);
 extern void nfsd4_record_grace_done(struct nfsd_net *nn);
 
-static inline bool try_to_expire_client(struct nfs4_client *clp)
-{
-	cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
-	return clp->cl_state == NFSD4_EXPIRABLE;
-}
 #endif   /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 777e24e5da33..b1bc582b0493 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -7,14 +7,16 @@
  * Format:
  *	rc <hits> <misses> <nocache>
  *			Statistsics for the reply cache
- *	fh <stale> <deprecated filehandle cache stats>
+ *	fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
  *			statistics for filehandle lookup
  *	io <bytes-read> <bytes-written>
  *			statistics for IO throughput
- *	th <threads> <deprecated thread usage histogram stats>
- *			number of threads
- *	ra <deprecated ra-cache stats>
- *
+ *	th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 
+ *			time (seconds) when nfsd thread usage above thresholds
+ *			and number of times that all threads were in use
+ *	ra cache-size  <10%  <20%  <30% ... <100% not-found
+ *			number of times that read-ahead entry was found that deep in
+ *			the cache.
  *	plus generic RPC stats (see net/sunrpc/stats.c)
  *
  * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -32,28 +34,35 @@ struct svc_stat		nfsd_svcstats = {
 	.program	= &nfsd_program,
 };
 
-static int nfsd_show(struct seq_file *seq, void *v)
+static int nfsd_proc_show(struct seq_file *seq, void *v)
 {
 	int i;
 
-	seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n",
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]),
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]),
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]),
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]),
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]),
-		   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
-
+	seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
+		      nfsdstats.rchits,
+		      nfsdstats.rcmisses,
+		      nfsdstats.rcnocache,
+		      nfsdstats.fh_stale,
+		      nfsdstats.fh_lookup,
+		      nfsdstats.fh_anon,
+		      nfsdstats.fh_nocache_dir,
+		      nfsdstats.fh_nocache_nondir,
+		      nfsdstats.io_read,
+		      nfsdstats.io_write);
 	/* thread usage: */
-	seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
-
-	/* deprecated thread usage histogram stats */
-	for (i = 0; i < 10; i++)
-		seq_puts(seq, " 0.000");
-
-	/* deprecated ra-cache stats */
-	seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n");
+	seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
+	for (i=0; i<10; i++) {
+		unsigned int jifs = nfsdstats.th_usage[i];
+		unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ;
+		seq_printf(seq, " %u.%03u", sec, msec);
+	}
 
+	/* newline and ra-cache */
+	seq_printf(seq, "\nra %u", nfsdstats.ra_size);
+	for (i=0; i<11; i++)
+		seq_printf(seq, " %u", nfsdstats.ra_depth[i]);
+	seq_putc(seq, '\n');
+	
 	/* show my rpc info */
 	svc_seq_show(seq, &nfsd_svcstats);
 
@@ -61,10 +70,8 @@ static int nfsd_show(struct seq_file *seq, void *v)
 	/* Show count for individual nfsv4 operations */
 	/* Writing operation numbers 0 1 2 also for maintaining uniformity */
 	seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1);
-	for (i = 0; i <= LAST_NFS4_OP; i++) {
-		seq_printf(seq, " %lld",
-			   percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
-	}
+	for (i = 0; i <= LAST_NFS4_OP; i++)
+		seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]);
 
 	seq_putc(seq, '\n');
 #endif
@@ -72,65 +79,26 @@ static int nfsd_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-DEFINE_PROC_SHOW_ATTRIBUTE(nfsd);
-
-int nfsd_percpu_counters_init(struct percpu_counter counters[], int num)
+static int nfsd_proc_open(struct inode *inode, struct file *file)
 {
-	int i, err = 0;
-
-	for (i = 0; !err && i < num; i++)
-		err = percpu_counter_init(&counters[i], 0, GFP_KERNEL);
-
-	if (!err)
-		return 0;
-
-	for (; i > 0; i--)
-		percpu_counter_destroy(&counters[i-1]);
-
-	return err;
+	return single_open(file, nfsd_proc_show, NULL);
 }
 
-void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num)
+static const struct proc_ops nfsd_proc_ops = {
+	.proc_open	= nfsd_proc_open,
+	.proc_read	= seq_read,
+	.proc_lseek	= seq_lseek,
+	.proc_release	= single_release,
+};
+
+void
+nfsd_stat_init(void)
 {
-	int i;
-
-	for (i = 0; i < num; i++)
-		percpu_counter_set(&counters[i], 0);
-}
-
-void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
-{
-	int i;
-
-	for (i = 0; i < num; i++)
-		percpu_counter_destroy(&counters[i]);
-}
-
-static int nfsd_stat_counters_init(void)
-{
-	return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
-}
-
-static void nfsd_stat_counters_destroy(void)
-{
-	nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
-}
-
-int nfsd_stat_init(void)
-{
-	int err;
-
-	err = nfsd_stat_counters_init();
-	if (err)
-		return err;
-
 	svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
-
-	return 0;
 }
 
-void nfsd_stat_shutdown(void)
+void
+nfsd_stat_shutdown(void)
 {
-	nfsd_stat_counters_destroy();
 	svc_proc_unregister(&init_net, "nfsd");
 }
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 9b43dc3d9991..b23fdac69820 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -8,89 +8,37 @@
 #define _NFSD_STATS_H
 
 #include <uapi/linux/nfsd/stats.h>
-#include <linux/percpu_counter.h>
 
 
-enum {
-	NFSD_STATS_RC_HITS,		/* repcache hits */
-	NFSD_STATS_RC_MISSES,		/* repcache misses */
-	NFSD_STATS_RC_NOCACHE,		/* uncached reqs */
-	NFSD_STATS_FH_STALE,		/* FH stale error */
-	NFSD_STATS_IO_READ,		/* bytes returned to read requests */
-	NFSD_STATS_IO_WRITE,		/* bytes passed in write requests */
-#ifdef CONFIG_NFSD_V4
-	NFSD_STATS_FIRST_NFS4_OP,	/* count of individual nfsv4 operations */
-	NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
-#define NFSD_STATS_NFS4_OP(op)	(NFSD_STATS_FIRST_NFS4_OP + (op))
-#endif
-	NFSD_STATS_COUNTERS_NUM
-};
-
 struct nfsd_stats {
-	struct percpu_counter	counter[NFSD_STATS_COUNTERS_NUM];
+	unsigned int	rchits;		/* repcache hits */
+	unsigned int	rcmisses;	/* repcache hits */
+	unsigned int	rcnocache;	/* uncached reqs */
+	unsigned int	fh_stale;	/* FH stale error */
+	unsigned int	fh_lookup;	/* dentry cached */
+	unsigned int	fh_anon;	/* anon file dentry returned */
+	unsigned int	fh_nocache_dir;	/* filehandle not found in dcache */
+	unsigned int	fh_nocache_nondir;	/* filehandle not found in dcache */
+	unsigned int	io_read;	/* bytes returned to read requests */
+	unsigned int	io_write;	/* bytes passed in write requests */
+	unsigned int	th_cnt;		/* number of available threads */
+	unsigned int	th_usage[10];	/* number of ticks during which n perdeciles
+					 * of available threads were in use */
+	unsigned int	th_fullcnt;	/* number of times last free thread was used */
+	unsigned int	ra_size;	/* size of ra cache */
+	unsigned int	ra_depth[11];	/* number of times ra entry was found that deep
+					 * in the cache (10percentiles). [10] = not found */
+#ifdef CONFIG_NFSD_V4
+	unsigned int	nfs4_opcount[LAST_NFS4_OP + 1];	/* count of individual nfsv4 operations */
+#endif
 
-	atomic_t	th_cnt;		/* number of available threads */
 };
 
-extern struct nfsd_stats	nfsdstats;
 
+extern struct nfsd_stats	nfsdstats;
 extern struct svc_stat		nfsd_svcstats;
 
-int nfsd_percpu_counters_init(struct percpu_counter counters[], int num);
-void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num);
-void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num);
-int nfsd_stat_init(void);
-void nfsd_stat_shutdown(void);
-
-static inline void nfsd_stats_rc_hits_inc(void)
-{
-	percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]);
-}
-
-static inline void nfsd_stats_rc_misses_inc(void)
-{
-	percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]);
-}
-
-static inline void nfsd_stats_rc_nocache_inc(void)
-{
-	percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]);
-}
-
-static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp)
-{
-	percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]);
-	if (exp)
-		percpu_counter_inc(&exp->ex_stats.counter[EXP_STATS_FH_STALE]);
-}
-
-static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount)
-{
-	percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount);
-	if (exp)
-		percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_READ], amount);
-}
-
-static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount)
-{
-	percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount);
-	if (exp)
-		percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_WRITE], amount);
-}
-
-static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn)
-{
-	percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]);
-}
-
-static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount)
-{
-	percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
-}
-
-static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
-{
-	percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
-}
+void	nfsd_stat_init(void);
+void	nfsd_stat_shutdown(void);
 
 #endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c
index f008b95ceec2..90967466a1e5 100644
--- a/fs/nfsd/trace.c
+++ b/fs/nfsd/trace.c
@@ -1,4 +1,3 @@
-// SPDX-License-Identifier: GPL-2.0
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 445d00f00eab..a952f4a9b2a6 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -12,86 +12,6 @@
 #include "export.h"
 #include "nfsfh.h"
 
-#define NFSD_TRACE_PROC_ARG_FIELDS \
-		__field(unsigned int, netns_ino) \
-		__field(u32, xid) \
-		__array(unsigned char, server, sizeof(struct sockaddr_in6)) \
-		__array(unsigned char, client, sizeof(struct sockaddr_in6))
-
-#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \
-		do { \
-			__entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
-			__entry->xid = be32_to_cpu(rqstp->rq_xid); \
-			memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
-			       rqstp->rq_xprt->xpt_locallen); \
-			memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
-			       rqstp->rq_xprt->xpt_remotelen); \
-		} while (0);
-
-#define NFSD_TRACE_PROC_RES_FIELDS \
-		__field(unsigned int, netns_ino) \
-		__field(u32, xid) \
-		__field(unsigned long, status) \
-		__array(unsigned char, server, sizeof(struct sockaddr_in6)) \
-		__array(unsigned char, client, sizeof(struct sockaddr_in6))
-
-#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \
-		do { \
-			__entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
-			__entry->xid = be32_to_cpu(rqstp->rq_xid); \
-			__entry->status = be32_to_cpu(error); \
-			memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
-			       rqstp->rq_xprt->xpt_locallen); \
-			memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
-			       rqstp->rq_xprt->xpt_remotelen); \
-		} while (0);
-
-DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
-	TP_PROTO(
-		const struct svc_rqst *rqstp
-	),
-	TP_ARGS(rqstp),
-	TP_STRUCT__entry(
-		NFSD_TRACE_PROC_ARG_FIELDS
-
-		__field(u32, vers)
-		__field(u32, proc)
-	),
-	TP_fast_assign(
-		NFSD_TRACE_PROC_ARG_ASSIGNMENTS
-
-		__entry->vers = rqstp->rq_vers;
-		__entry->proc = rqstp->rq_proc;
-	),
-	TP_printk("xid=0x%08x vers=%u proc=%u",
-		__entry->xid, __entry->vers, __entry->proc
-	)
-);
-
-#define DEFINE_NFSD_XDR_ERR_EVENT(name) \
-DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \
-	TP_PROTO(const struct svc_rqst *rqstp), \
-	TP_ARGS(rqstp))
-
-DEFINE_NFSD_XDR_ERR_EVENT(garbage_args);
-DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
-
-#define show_nfsd_may_flags(x)						\
-	__print_flags(x, "|",						\
-		{ NFSD_MAY_EXEC,		"EXEC" },		\
-		{ NFSD_MAY_WRITE,		"WRITE" },		\
-		{ NFSD_MAY_READ,		"READ" },		\
-		{ NFSD_MAY_SATTR,		"SATTR" },		\
-		{ NFSD_MAY_TRUNC,		"TRUNC" },		\
-		{ NFSD_MAY_LOCK,		"LOCK" },		\
-		{ NFSD_MAY_OWNER_OVERRIDE,	"OWNER_OVERRIDE" },	\
-		{ NFSD_MAY_LOCAL_ACCESS,	"LOCAL_ACCESS" },	\
-		{ NFSD_MAY_BYPASS_GSS_ON_ROOT,	"BYPASS_GSS_ON_ROOT" },	\
-		{ NFSD_MAY_NOT_BREAK_LEASE,	"NOT_BREAK_LEASE" },	\
-		{ NFSD_MAY_BYPASS_GSS,		"BYPASS_GSS" },		\
-		{ NFSD_MAY_READ_IF_EXEC,	"READ_IF_EXEC" },	\
-		{ NFSD_MAY_64BIT_COOKIE,	"64BIT_COOKIE" })
-
 TRACE_EVENT(nfsd_compound,
 	TP_PROTO(const struct svc_rqst *rqst,
 		 u32 args_opcnt),
@@ -131,56 +51,6 @@ TRACE_EVENT(nfsd_compound_status,
 		__get_str(name), __entry->status)
 )
 
-TRACE_EVENT(nfsd_compound_decode_err,
-	TP_PROTO(
-		const struct svc_rqst *rqstp,
-		u32 args_opcnt,
-		u32 resp_opcnt,
-		u32 opnum,
-		__be32 status
-	),
-	TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status),
-	TP_STRUCT__entry(
-		NFSD_TRACE_PROC_RES_FIELDS
-
-		__field(u32, args_opcnt)
-		__field(u32, resp_opcnt)
-		__field(u32, opnum)
-	),
-	TP_fast_assign(
-		NFSD_TRACE_PROC_RES_ASSIGNMENTS(status)
-
-		__entry->args_opcnt = args_opcnt;
-		__entry->resp_opcnt = resp_opcnt;
-		__entry->opnum = opnum;
-	),
-	TP_printk("op=%u/%u opnum=%u status=%lu",
-		__entry->resp_opcnt, __entry->args_opcnt,
-		__entry->opnum, __entry->status)
-);
-
-TRACE_EVENT(nfsd_compound_encode_err,
-	TP_PROTO(
-		const struct svc_rqst *rqstp,
-		u32 opnum,
-		__be32 status
-	),
-	TP_ARGS(rqstp, opnum, status),
-	TP_STRUCT__entry(
-		NFSD_TRACE_PROC_RES_FIELDS
-
-		__field(u32, opnum)
-	),
-	TP_fast_assign(
-		NFSD_TRACE_PROC_RES_ASSIGNMENTS(status)
-
-		__entry->opnum = opnum;
-	),
-	TP_printk("opnum=%u status=%lu",
-		__entry->opnum, __entry->status)
-);
-
-
 DECLARE_EVENT_CLASS(nfsd_fh_err_class,
 	TP_PROTO(struct svc_rqst *rqstp,
 		 struct svc_fh	*fhp,
@@ -377,106 +247,10 @@ DEFINE_EVENT(nfsd_err_class, nfsd_##name,	\
 DEFINE_NFSD_ERR_EVENT(read_err);
 DEFINE_NFSD_ERR_EVENT(write_err);
 
-TRACE_EVENT(nfsd_dirent,
-	TP_PROTO(struct svc_fh *fhp,
-		 u64 ino,
-		 const char *name,
-		 int namlen),
-	TP_ARGS(fhp, ino, name, namlen),
-	TP_STRUCT__entry(
-		__field(u32, fh_hash)
-		__field(u64, ino)
-		__field(int, len)
-		__dynamic_array(unsigned char, name, namlen)
-	),
-	TP_fast_assign(
-		__entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
-		__entry->ino = ino;
-		__entry->len = namlen;
-		memcpy(__get_str(name), name, namlen);
-	),
-	TP_printk("fh_hash=0x%08x ino=%llu name=%.*s",
-		__entry->fh_hash, __entry->ino,
-		__entry->len, __get_str(name))
-)
-
-DECLARE_EVENT_CLASS(nfsd_copy_err_class,
-	TP_PROTO(struct svc_rqst *rqstp,
-		 struct svc_fh	*src_fhp,
-		 loff_t		src_offset,
-		 struct svc_fh	*dst_fhp,
-		 loff_t		dst_offset,
-		 u64		count,
-		 int		status),
-	TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status),
-	TP_STRUCT__entry(
-		__field(u32, xid)
-		__field(u32, src_fh_hash)
-		__field(loff_t, src_offset)
-		__field(u32, dst_fh_hash)
-		__field(loff_t, dst_offset)
-		__field(u64, count)
-		__field(int, status)
-	),
-	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqstp->rq_xid);
-		__entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle);
-		__entry->src_offset = src_offset;
-		__entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle);
-		__entry->dst_offset = dst_offset;
-		__entry->count = count;
-		__entry->status = status;
-	),
-	TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld "
-			"dst_fh_hash=0x%08x dst_offset=%lld "
-			"count=%llu status=%d",
-		  __entry->xid, __entry->src_fh_hash, __entry->src_offset,
-		  __entry->dst_fh_hash, __entry->dst_offset,
-		  (unsigned long long)__entry->count,
-		  __entry->status)
-)
-
-#define DEFINE_NFSD_COPY_ERR_EVENT(name)		\
-DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name,		\
-	TP_PROTO(struct svc_rqst	*rqstp,		\
-		 struct svc_fh		*src_fhp,	\
-		 loff_t			src_offset,	\
-		 struct svc_fh		*dst_fhp,	\
-		 loff_t			dst_offset,	\
-		 u64			count,		\
-		 int			status),	\
-	TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \
-		count, status))
-
-DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
-
 #include "state.h"
 #include "filecache.h"
 #include "vfs.h"
 
-TRACE_EVENT(nfsd_delegret_wakeup,
-	TP_PROTO(
-		const struct svc_rqst *rqstp,
-		const struct inode *inode,
-		long timeo
-	),
-	TP_ARGS(rqstp, inode, timeo),
-	TP_STRUCT__entry(
-		__field(u32, xid)
-		__field(const void *, inode)
-		__field(long, timeo)
-	),
-	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqstp->rq_xid);
-		__entry->inode = inode;
-		__entry->timeo = timeo;
-	),
-	TP_printk("xid=0x%08x inode=%p%s",
-		  __entry->xid, __entry->inode,
-		  __entry->timeo == 0 ? " (timed out)" : ""
-	)
-);
-
 DECLARE_EVENT_CLASS(nfsd_stateid_class,
 	TP_PROTO(stateid_t *stp),
 	TP_ARGS(stp),
@@ -517,7 +291,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);
 
 DEFINE_STATEID_EVENT(open);
 DEFINE_STATEID_EVENT(deleg_read);
-DEFINE_STATEID_EVENT(deleg_return);
+DEFINE_STATEID_EVENT(deleg_break);
 DEFINE_STATEID_EVENT(deleg_recall);
 
 DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
@@ -550,61 +324,6 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
 DEFINE_STATESEQID_EVENT(preprocess);
 DEFINE_STATESEQID_EVENT(open_confirm);
 
-TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
-TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
-TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
-TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
-
-#define show_stid_type(x)						\
-	__print_flags(x, "|",						\
-		{ NFS4_OPEN_STID,		"OPEN" },		\
-		{ NFS4_LOCK_STID,		"LOCK" },		\
-		{ NFS4_DELEG_STID,		"DELEG" },		\
-		{ NFS4_CLOSED_STID,		"CLOSED" },		\
-		{ NFS4_REVOKED_DELEG_STID,	"REVOKED" },		\
-		{ NFS4_CLOSED_DELEG_STID,	"CLOSED_DELEG" },	\
-		{ NFS4_LAYOUT_STID,		"LAYOUT" })
-
-DECLARE_EVENT_CLASS(nfsd_stid_class,
-	TP_PROTO(
-		const struct nfs4_stid *stid
-	),
-	TP_ARGS(stid),
-	TP_STRUCT__entry(
-		__field(unsigned long, sc_type)
-		__field(int, sc_count)
-		__field(u32, cl_boot)
-		__field(u32, cl_id)
-		__field(u32, si_id)
-		__field(u32, si_generation)
-	),
-	TP_fast_assign(
-		const stateid_t *stp = &stid->sc_stateid;
-
-		__entry->sc_type = stid->sc_type;
-		__entry->sc_count = refcount_read(&stid->sc_count);
-		__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
-		__entry->cl_id = stp->si_opaque.so_clid.cl_id;
-		__entry->si_id = stp->si_opaque.so_id;
-		__entry->si_generation = stp->si_generation;
-	),
-	TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
-		__entry->cl_boot, __entry->cl_id,
-		__entry->si_id, __entry->si_generation,
-		__entry->sc_count, show_stid_type(__entry->sc_type)
-	)
-);
-
-#define DEFINE_STID_EVENT(name)					\
-DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name,			\
-	TP_PROTO(const struct nfs4_stid *stid),			\
-	TP_ARGS(stid))
-
-DEFINE_STID_EVENT(revoke);
-
 DECLARE_EVENT_CLASS(nfsd_clientid_class,
 	TP_PROTO(const clientid_t *clid),
 	TP_ARGS(clid),
@@ -624,12 +343,7 @@ DEFINE_EVENT(nfsd_clientid_class, nfsd_clid_##name, \
 	TP_PROTO(const clientid_t *clid), \
 	TP_ARGS(clid))
 
-DEFINE_CLIENTID_EVENT(expire_unconf);
-DEFINE_CLIENTID_EVENT(reclaim_complete);
-DEFINE_CLIENTID_EVENT(confirmed);
-DEFINE_CLIENTID_EVENT(destroyed);
-DEFINE_CLIENTID_EVENT(admin_expired);
-DEFINE_CLIENTID_EVENT(replaced);
+DEFINE_CLIENTID_EVENT(expired);
 DEFINE_CLIENTID_EVENT(purged);
 DEFINE_CLIENTID_EVENT(renew);
 DEFINE_CLIENTID_EVENT(stale);
@@ -654,145 +368,56 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
 DEFINE_NET_EVENT(grace_start);
 DEFINE_NET_EVENT(grace_complete);
 
-TRACE_EVENT(nfsd_writeverf_reset,
-	TP_PROTO(
-		const struct nfsd_net *nn,
-		const struct svc_rqst *rqstp,
-		int error
-	),
-	TP_ARGS(nn, rqstp, error),
-	TP_STRUCT__entry(
-		__field(unsigned long long, boot_time)
-		__field(u32, xid)
-		__field(int, error)
-		__array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
-	),
-	TP_fast_assign(
-		__entry->boot_time = nn->boot_time;
-		__entry->xid = be32_to_cpu(rqstp->rq_xid);
-		__entry->error = error;
-
-		/* avoid seqlock inside TP_fast_assign */
-		memcpy(__entry->verifier, nn->writeverf,
-		       NFS4_VERIFIER_SIZE);
-	),
-	TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s",
-		__entry->boot_time, __entry->xid, __entry->error,
-		__print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
-	)
-);
-
-TRACE_EVENT(nfsd_clid_cred_mismatch,
-	TP_PROTO(
-		const struct nfs4_client *clp,
-		const struct svc_rqst *rqstp
-	),
-	TP_ARGS(clp, rqstp),
-	TP_STRUCT__entry(
-		__field(u32, cl_boot)
-		__field(u32, cl_id)
-		__field(unsigned long, cl_flavor)
-		__field(unsigned long, new_flavor)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
-	),
-	TP_fast_assign(
-		__entry->cl_boot = clp->cl_clientid.cl_boot;
-		__entry->cl_id = clp->cl_clientid.cl_id;
-		__entry->cl_flavor = clp->cl_cred.cr_flavor;
-		__entry->new_flavor = rqstp->rq_cred.cr_flavor;
-		memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote,
-			sizeof(struct sockaddr_in6));
-	),
-	TP_printk("client %08x:%08x flavor=%s, conflict=%s from addr=%pISpc",
-		__entry->cl_boot, __entry->cl_id,
-		show_nfsd_authflavor(__entry->cl_flavor),
-		show_nfsd_authflavor(__entry->new_flavor), __entry->addr
-	)
-)
-
-TRACE_EVENT(nfsd_clid_verf_mismatch,
-	TP_PROTO(
-		const struct nfs4_client *clp,
-		const struct svc_rqst *rqstp,
-		const nfs4_verifier *verf
-	),
-	TP_ARGS(clp, rqstp, verf),
-	TP_STRUCT__entry(
-		__field(u32, cl_boot)
-		__field(u32, cl_id)
-		__array(unsigned char, cl_verifier, NFS4_VERIFIER_SIZE)
-		__array(unsigned char, new_verifier, NFS4_VERIFIER_SIZE)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
-	),
-	TP_fast_assign(
-		__entry->cl_boot = clp->cl_clientid.cl_boot;
-		__entry->cl_id = clp->cl_clientid.cl_id;
-		memcpy(__entry->cl_verifier, (void *)&clp->cl_verifier,
-		       NFS4_VERIFIER_SIZE);
-		memcpy(__entry->new_verifier, (void *)verf,
-		       NFS4_VERIFIER_SIZE);
-		memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote,
-			sizeof(struct sockaddr_in6));
-	),
-	TP_printk("client %08x:%08x verf=0x%s, updated=0x%s from addr=%pISpc",
-		__entry->cl_boot, __entry->cl_id,
-		__print_hex_str(__entry->cl_verifier, NFS4_VERIFIER_SIZE),
-		__print_hex_str(__entry->new_verifier, NFS4_VERIFIER_SIZE),
-		__entry->addr
-	)
-);
-
-DECLARE_EVENT_CLASS(nfsd_clid_class,
+TRACE_EVENT(nfsd_clid_inuse_err,
 	TP_PROTO(const struct nfs4_client *clp),
 	TP_ARGS(clp),
 	TP_STRUCT__entry(
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
 		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
-		__field(unsigned long, flavor)
-		__array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
-		__dynamic_array(char, name, clp->cl_name.len + 1)
+		__field(unsigned int, namelen)
+		__dynamic_array(unsigned char, name, clp->cl_name.len)
 	),
 	TP_fast_assign(
 		__entry->cl_boot = clp->cl_clientid.cl_boot;
 		__entry->cl_id = clp->cl_clientid.cl_id;
 		memcpy(__entry->addr, &clp->cl_addr,
 			sizeof(struct sockaddr_in6));
-		__entry->flavor = clp->cl_cred.cr_flavor;
-		memcpy(__entry->verifier, (void *)&clp->cl_verifier,
-		       NFS4_VERIFIER_SIZE);
-		memcpy(__get_str(name), clp->cl_name.data, clp->cl_name.len);
-		__get_str(name)[clp->cl_name.len] = '\0';
+		__entry->namelen = clp->cl_name.len;
+		memcpy(__get_dynamic_array(name), clp->cl_name.data,
+			clp->cl_name.len);
 	),
-	TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x",
-		__entry->addr, __get_str(name),
-		__print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE),
-		show_nfsd_authflavor(__entry->flavor),
+	TP_printk("nfs4_clientid %.*s already in use by %pISpc, client %08x:%08x",
+		__entry->namelen, __get_str(name), __entry->addr,
 		__entry->cl_boot, __entry->cl_id)
-);
+)
 
-#define DEFINE_CLID_EVENT(name) \
-DEFINE_EVENT(nfsd_clid_class, nfsd_clid_##name, \
-	TP_PROTO(const struct nfs4_client *clp), \
-	TP_ARGS(clp))
+TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
+TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
+TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
+TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
+TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
 
-DEFINE_CLID_EVENT(fresh);
-DEFINE_CLID_EVENT(confirmed_r);
-
-/*
- * from fs/nfsd/filecache.h
- */
 #define show_nf_flags(val)						\
 	__print_flags(val, "|",						\
 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
 		{ 1 << NFSD_FILE_PENDING,	"PENDING" },		\
-		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED" },		\
-		{ 1 << NFSD_FILE_GC,		"GC" })
+		{ 1 << NFSD_FILE_BREAK_READ,	"BREAK_READ" },		\
+		{ 1 << NFSD_FILE_BREAK_WRITE,	"BREAK_WRITE" },	\
+		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED"})
+
+/* FIXME: This should probably be fleshed out in the future. */
+#define show_nf_may(val)						\
+	__print_flags(val, "|",						\
+		{ NFSD_MAY_READ,		"READ" },		\
+		{ NFSD_MAY_WRITE,		"WRITE" },		\
+		{ NFSD_MAY_NOT_BREAK_LEASE,	"NOT_BREAK_LEASE" })
 
 DECLARE_EVENT_CLASS(nfsd_file_class,
 	TP_PROTO(struct nfsd_file *nf),
 	TP_ARGS(nf),
 	TP_STRUCT__entry(
+		__field(unsigned int, nf_hashval)
 		__field(void *, nf_inode)
 		__field(int, nf_ref)
 		__field(unsigned long, nf_flags)
@@ -800,17 +425,19 @@ DECLARE_EVENT_CLASS(nfsd_file_class,
 		__field(struct file *, nf_file)
 	),
 	TP_fast_assign(
+		__entry->nf_hashval = nf->nf_hashval;
 		__entry->nf_inode = nf->nf_inode;
 		__entry->nf_ref = refcount_read(&nf->nf_ref);
 		__entry->nf_flags = nf->nf_flags;
 		__entry->nf_may = nf->nf_may;
 		__entry->nf_file = nf->nf_file;
 	),
-	TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p",
+	TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
+		__entry->nf_hashval,
 		__entry->nf_inode,
 		__entry->nf_ref,
 		show_nf_flags(__entry->nf_flags),
-		show_nfsd_may_flags(__entry->nf_may),
+		show_nf_may(__entry->nf_may),
 		__entry->nf_file)
 )
 
@@ -819,60 +446,34 @@ DEFINE_EVENT(nfsd_file_class, name, \
 	TP_PROTO(struct nfsd_file *nf), \
 	TP_ARGS(nf))
 
-DEFINE_NFSD_FILE_EVENT(nfsd_file_free);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
 DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
 DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
-DEFINE_NFSD_FILE_EVENT(nfsd_file_closing);
-DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue);
-
-TRACE_EVENT(nfsd_file_alloc,
-	TP_PROTO(
-		const struct nfsd_file *nf
-	),
-	TP_ARGS(nf),
-	TP_STRUCT__entry(
-		__field(const void *, nf_inode)
-		__field(unsigned long, nf_flags)
-		__field(unsigned long, nf_may)
-		__field(unsigned int, nf_ref)
-	),
-	TP_fast_assign(
-		__entry->nf_inode = nf->nf_inode;
-		__entry->nf_flags = nf->nf_flags;
-		__entry->nf_ref = refcount_read(&nf->nf_ref);
-		__entry->nf_may = nf->nf_may;
-	),
-	TP_printk("inode=%p ref=%u flags=%s may=%s",
-		__entry->nf_inode, __entry->nf_ref,
-		show_nf_flags(__entry->nf_flags),
-		show_nfsd_may_flags(__entry->nf_may)
-	)
-);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
 
 TRACE_EVENT(nfsd_file_acquire,
-	TP_PROTO(
-		const struct svc_rqst *rqstp,
-		const struct inode *inode,
-		unsigned int may_flags,
-		const struct nfsd_file *nf,
-		__be32 status
-	),
+	TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
+		 struct inode *inode, unsigned int may_flags,
+		 struct nfsd_file *nf, __be32 status),
 
-	TP_ARGS(rqstp, inode, may_flags, nf, status),
+	TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
 
 	TP_STRUCT__entry(
 		__field(u32, xid)
-		__field(const void *, inode)
-		__field(unsigned long, may_flags)
-		__field(unsigned int, nf_ref)
+		__field(unsigned int, hash)
+		__field(void *, inode)
+		__field(unsigned int, may_flags)
+		__field(int, nf_ref)
 		__field(unsigned long, nf_flags)
-		__field(unsigned long, nf_may)
-		__field(const void *, nf_file)
+		__field(unsigned char, nf_may)
+		__field(struct file *, nf_file)
 		__field(u32, status)
 	),
 
 	TP_fast_assign(
 		__entry->xid = be32_to_cpu(rqstp->rq_xid);
+		__entry->hash = hash;
 		__entry->inode = inode;
 		__entry->may_flags = may_flags;
 		__entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0;
@@ -882,132 +483,40 @@ TRACE_EVENT(nfsd_file_acquire,
 		__entry->status = be32_to_cpu(status);
 	),
 
-	TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u",
-			__entry->xid, __entry->inode,
-			show_nfsd_may_flags(__entry->may_flags),
-			__entry->nf_ref, show_nf_flags(__entry->nf_flags),
-			show_nfsd_may_flags(__entry->nf_may),
-			__entry->nf_file, __entry->status
-	)
+	TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
+			__entry->xid, __entry->hash, __entry->inode,
+			show_nf_may(__entry->may_flags), __entry->nf_ref,
+			show_nf_flags(__entry->nf_flags),
+			show_nf_may(__entry->nf_may), __entry->nf_file,
+			__entry->status)
 );
 
-TRACE_EVENT(nfsd_file_insert_err,
-	TP_PROTO(
-		const struct svc_rqst *rqstp,
-		const struct inode *inode,
-		unsigned int may_flags,
-		long error
-	),
-	TP_ARGS(rqstp, inode, may_flags, error),
+DECLARE_EVENT_CLASS(nfsd_file_search_class,
+	TP_PROTO(struct inode *inode, unsigned int hash, int found),
+	TP_ARGS(inode, hash, found),
 	TP_STRUCT__entry(
-		__field(u32, xid)
-		__field(const void *, inode)
-		__field(unsigned long, may_flags)
-		__field(long, error)
-	),
-	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqstp->rq_xid);
-		__entry->inode = inode;
-		__entry->may_flags = may_flags;
-		__entry->error = error;
-	),
-	TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld",
-		__entry->xid, __entry->inode,
-		show_nfsd_may_flags(__entry->may_flags),
-		__entry->error
-	)
-);
-
-TRACE_EVENT(nfsd_file_cons_err,
-	TP_PROTO(
-		const struct svc_rqst *rqstp,
-		const struct inode *inode,
-		unsigned int may_flags,
-		const struct nfsd_file *nf
-	),
-	TP_ARGS(rqstp, inode, may_flags, nf),
-	TP_STRUCT__entry(
-		__field(u32, xid)
-		__field(const void *, inode)
-		__field(unsigned long, may_flags)
-		__field(unsigned int, nf_ref)
-		__field(unsigned long, nf_flags)
-		__field(unsigned long, nf_may)
-		__field(const void *, nf_file)
-	),
-	TP_fast_assign(
-		__entry->xid = be32_to_cpu(rqstp->rq_xid);
-		__entry->inode = inode;
-		__entry->may_flags = may_flags;
-		__entry->nf_ref = refcount_read(&nf->nf_ref);
-		__entry->nf_flags = nf->nf_flags;
-		__entry->nf_may = nf->nf_may;
-		__entry->nf_file = nf->nf_file;
-	),
-	TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
-		__entry->xid, __entry->inode,
-		show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref,
-		show_nf_flags(__entry->nf_flags),
-		show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
-	)
-);
-
-DECLARE_EVENT_CLASS(nfsd_file_open_class,
-	TP_PROTO(const struct nfsd_file *nf, __be32 status),
-	TP_ARGS(nf, status),
-	TP_STRUCT__entry(
-		__field(void *, nf_inode)	/* cannot be dereferenced */
-		__field(int, nf_ref)
-		__field(unsigned long, nf_flags)
-		__field(unsigned long, nf_may)
-		__field(void *, nf_file)	/* cannot be dereferenced */
-	),
-	TP_fast_assign(
-		__entry->nf_inode = nf->nf_inode;
-		__entry->nf_ref = refcount_read(&nf->nf_ref);
-		__entry->nf_flags = nf->nf_flags;
-		__entry->nf_may = nf->nf_may;
-		__entry->nf_file = nf->nf_file;
-	),
-	TP_printk("inode=%p ref=%d flags=%s may=%s file=%p",
-		__entry->nf_inode,
-		__entry->nf_ref,
-		show_nf_flags(__entry->nf_flags),
-		show_nfsd_may_flags(__entry->nf_may),
-		__entry->nf_file)
-)
-
-#define DEFINE_NFSD_FILE_OPEN_EVENT(name)					\
-DEFINE_EVENT(nfsd_file_open_class, name,					\
-	TP_PROTO(							\
-		const struct nfsd_file *nf,				\
-		__be32 status						\
-	),								\
-	TP_ARGS(nf, status))
-
-DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open);
-DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened);
-
-TRACE_EVENT(nfsd_file_is_cached,
-	TP_PROTO(
-		const struct inode *inode,
-		int found
-	),
-	TP_ARGS(inode, found),
-	TP_STRUCT__entry(
-		__field(const struct inode *, inode)
+		__field(struct inode *, inode)
+		__field(unsigned int, hash)
 		__field(int, found)
 	),
 	TP_fast_assign(
 		__entry->inode = inode;
+		__entry->hash = hash;
 		__entry->found = found;
 	),
-	TP_printk("inode=%p is %scached",
-		__entry->inode,
-		__entry->found ? "" : "not "
-	)
+	TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
+			__entry->inode, __entry->found)
 );
 
+#define DEFINE_NFSD_FILE_SEARCH_EVENT(name)				\
+DEFINE_EVENT(nfsd_file_search_class, name,				\
+	TP_PROTO(struct inode *inode, unsigned int hash, int found),	\
+	TP_ARGS(inode, hash, found))
+
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
+
 TRACE_EVENT(nfsd_file_fsnotify_handle_event,
 	TP_PROTO(struct inode *inode, u32 mask),
 	TP_ARGS(inode, mask),
@@ -1023,95 +532,10 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event,
 		__entry->mode = inode->i_mode;
 		__entry->mask = mask;
 	),
-	TP_printk("inode=%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+	TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
 			__entry->nlink, __entry->mode, __entry->mask)
 );
 
-DECLARE_EVENT_CLASS(nfsd_file_gc_class,
-	TP_PROTO(
-		const struct nfsd_file *nf
-	),
-	TP_ARGS(nf),
-	TP_STRUCT__entry(
-		__field(void *, nf_inode)
-		__field(void *, nf_file)
-		__field(int, nf_ref)
-		__field(unsigned long, nf_flags)
-	),
-	TP_fast_assign(
-		__entry->nf_inode = nf->nf_inode;
-		__entry->nf_file = nf->nf_file;
-		__entry->nf_ref = refcount_read(&nf->nf_ref);
-		__entry->nf_flags = nf->nf_flags;
-	),
-	TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p",
-		__entry->nf_inode, __entry->nf_ref,
-		show_nf_flags(__entry->nf_flags),
-		__entry->nf_file
-	)
-);
-
-#define DEFINE_NFSD_FILE_GC_EVENT(name)					\
-DEFINE_EVENT(nfsd_file_gc_class, name,					\
-	TP_PROTO(							\
-		const struct nfsd_file *nf				\
-	),								\
-	TP_ARGS(nf))
-
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed);
-
-DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class,
-	TP_PROTO(
-		unsigned long removed,
-		unsigned long remaining
-	),
-	TP_ARGS(removed, remaining),
-	TP_STRUCT__entry(
-		__field(unsigned long, removed)
-		__field(unsigned long, remaining)
-	),
-	TP_fast_assign(
-		__entry->removed = removed;
-		__entry->remaining = remaining;
-	),
-	TP_printk("%lu entries removed, %lu remaining",
-		__entry->removed, __entry->remaining)
-);
-
-#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name)				\
-DEFINE_EVENT(nfsd_file_lruwalk_class, name,				\
-	TP_PROTO(							\
-		unsigned long removed,					\
-		unsigned long remaining					\
-	),								\
-	TP_ARGS(removed, remaining))
-
-DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
-DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
-
-TRACE_EVENT(nfsd_file_close,
-	TP_PROTO(
-		const struct inode *inode
-	),
-	TP_ARGS(inode),
-	TP_STRUCT__entry(
-		__field(const void *, inode)
-	),
-	TP_fast_assign(
-		__entry->inode = inode;
-	),
-	TP_printk("inode=%p",
-		__entry->inode
-	)
-);
-
 #include "cache.h"
 
 TRACE_DEFINE_ENUM(RC_DROPIT);
@@ -1192,9 +616,9 @@ TRACE_EVENT(nfsd_cb_args,
 		memcpy(__entry->addr, &conn->cb_addr,
 			sizeof(struct sockaddr_in6));
 	),
-	TP_printk("addr=%pISpc client %08x:%08x prog=%u ident=%u",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
-		__entry->prog, __entry->ident)
+	TP_printk("client %08x:%08x callback addr=%pISpc prog=%u ident=%u",
+		__entry->cl_boot, __entry->cl_id,
+		__entry->addr, __entry->prog, __entry->ident)
 );
 
 TRACE_EVENT(nfsd_cb_nodelegs,
@@ -1211,6 +635,11 @@ TRACE_EVENT(nfsd_cb_nodelegs,
 	TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id)
 )
 
+TRACE_DEFINE_ENUM(NFSD4_CB_UP);
+TRACE_DEFINE_ENUM(NFSD4_CB_UNKNOWN);
+TRACE_DEFINE_ENUM(NFSD4_CB_DOWN);
+TRACE_DEFINE_ENUM(NFSD4_CB_FAULT);
+
 #define show_cb_state(val)						\
 	__print_symbolic(val,						\
 		{ NFSD4_CB_UP,		"UP" },				\
@@ -1244,53 +673,10 @@ DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name,		\
 	TP_PROTO(const struct nfs4_client *clp),	\
 	TP_ARGS(clp))
 
+DEFINE_NFSD_CB_EVENT(setup);
 DEFINE_NFSD_CB_EVENT(state);
-DEFINE_NFSD_CB_EVENT(probe);
-DEFINE_NFSD_CB_EVENT(lost);
 DEFINE_NFSD_CB_EVENT(shutdown);
 
-TRACE_DEFINE_ENUM(RPC_AUTH_NULL);
-TRACE_DEFINE_ENUM(RPC_AUTH_UNIX);
-TRACE_DEFINE_ENUM(RPC_AUTH_GSS);
-TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5);
-TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I);
-TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P);
-
-#define show_nfsd_authflavor(val)					\
-	__print_symbolic(val,						\
-		{ RPC_AUTH_NULL,		"none" },		\
-		{ RPC_AUTH_UNIX,		"sys" },		\
-		{ RPC_AUTH_GSS,			"gss" },		\
-		{ RPC_AUTH_GSS_KRB5,		"krb5" },		\
-		{ RPC_AUTH_GSS_KRB5I,		"krb5i" },		\
-		{ RPC_AUTH_GSS_KRB5P,		"krb5p" })
-
-TRACE_EVENT(nfsd_cb_setup,
-	TP_PROTO(const struct nfs4_client *clp,
-		 const char *netid,
-		 rpc_authflavor_t authflavor
-	),
-	TP_ARGS(clp, netid, authflavor),
-	TP_STRUCT__entry(
-		__field(u32, cl_boot)
-		__field(u32, cl_id)
-		__field(unsigned long, authflavor)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
-		__array(unsigned char, netid, 8)
-	),
-	TP_fast_assign(
-		__entry->cl_boot = clp->cl_clientid.cl_boot;
-		__entry->cl_id = clp->cl_clientid.cl_id;
-		strlcpy(__entry->netid, netid, sizeof(__entry->netid));
-		__entry->authflavor = authflavor;
-		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-			sizeof(struct sockaddr_in6));
-	),
-	TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
-		__entry->netid, show_nfsd_authflavor(__entry->authflavor))
-);
-
 TRACE_EVENT(nfsd_cb_setup_err,
 	TP_PROTO(
 		const struct nfs4_client *clp,
@@ -1314,138 +700,54 @@ TRACE_EVENT(nfsd_cb_setup_err,
 		__entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error)
 );
 
-TRACE_EVENT(nfsd_cb_recall,
-	TP_PROTO(
-		const struct nfs4_stid *stid
-	),
-	TP_ARGS(stid),
-	TP_STRUCT__entry(
-		__field(u32, cl_boot)
-		__field(u32, cl_id)
-		__field(u32, si_id)
-		__field(u32, si_generation)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
-	),
-	TP_fast_assign(
-		const stateid_t *stp = &stid->sc_stateid;
-		const struct nfs4_client *clp = stid->sc_client;
-
-		__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
-		__entry->cl_id = stp->si_opaque.so_clid.cl_id;
-		__entry->si_id = stp->si_opaque.so_id;
-		__entry->si_generation = stp->si_generation;
-		if (clp)
-			memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-				sizeof(struct sockaddr_in6));
-		else
-			memset(__entry->addr, 0, sizeof(struct sockaddr_in6));
-	),
-	TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
-		__entry->si_id, __entry->si_generation)
-);
-
-TRACE_EVENT(nfsd_cb_notify_lock,
-	TP_PROTO(
-		const struct nfs4_lockowner *lo,
-		const struct nfsd4_blocked_lock *nbl
-	),
-	TP_ARGS(lo, nbl),
-	TP_STRUCT__entry(
-		__field(u32, cl_boot)
-		__field(u32, cl_id)
-		__field(u32, fh_hash)
-		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
-	),
-	TP_fast_assign(
-		const struct nfs4_client *clp = lo->lo_owner.so_client;
-
-		__entry->cl_boot = clp->cl_clientid.cl_boot;
-		__entry->cl_id = clp->cl_clientid.cl_id;
-		__entry->fh_hash = knfsd_fh_hash(&nbl->nbl_fh);
-		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
-			sizeof(struct sockaddr_in6));
-	),
-	TP_printk("addr=%pISpc client %08x:%08x fh_hash=0x%08x",
-		__entry->addr, __entry->cl_boot, __entry->cl_id,
-		__entry->fh_hash)
-);
-
-TRACE_EVENT(nfsd_cb_offload,
+TRACE_EVENT(nfsd_cb_work,
 	TP_PROTO(
 		const struct nfs4_client *clp,
-		const stateid_t *stp,
-		const struct knfsd_fh *fh,
-		u64 count,
-		__be32 status
+		const char *procedure
 	),
-	TP_ARGS(clp, stp, fh, count, status),
+	TP_ARGS(clp, procedure),
 	TP_STRUCT__entry(
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
-		__field(u32, si_id)
-		__field(u32, si_generation)
-		__field(u32, fh_hash)
-		__field(int, status)
-		__field(u64, count)
+		__string(procedure, procedure)
 		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
 	),
 	TP_fast_assign(
-		__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
-		__entry->cl_id = stp->si_opaque.so_clid.cl_id;
-		__entry->si_id = stp->si_opaque.so_id;
-		__entry->si_generation = stp->si_generation;
-		__entry->fh_hash = knfsd_fh_hash(fh);
-		__entry->status = be32_to_cpu(status);
-		__entry->count = count;
+		__entry->cl_boot = clp->cl_clientid.cl_boot;
+		__entry->cl_id = clp->cl_clientid.cl_id;
+		__assign_str(procedure, procedure)
 		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
 			sizeof(struct sockaddr_in6));
 	),
-	TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x fh_hash=0x%08x count=%llu status=%d",
+	TP_printk("addr=%pISpc client %08x:%08x procedure=%s",
 		__entry->addr, __entry->cl_boot, __entry->cl_id,
-		__entry->si_id, __entry->si_generation,
-		__entry->fh_hash, __entry->count, __entry->status)
+		__get_str(procedure))
 );
 
-DECLARE_EVENT_CLASS(nfsd_cb_done_class,
+TRACE_EVENT(nfsd_cb_done,
 	TP_PROTO(
-		const stateid_t *stp,
-		const struct rpc_task *task
+		const struct nfs4_client *clp,
+		int status
 	),
-	TP_ARGS(stp, task),
+	TP_ARGS(clp, status),
 	TP_STRUCT__entry(
 		__field(u32, cl_boot)
 		__field(u32, cl_id)
-		__field(u32, si_id)
-		__field(u32, si_generation)
 		__field(int, status)
+		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
 	),
 	TP_fast_assign(
-		__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
-		__entry->cl_id = stp->si_opaque.so_clid.cl_id;
-		__entry->si_id = stp->si_opaque.so_id;
-		__entry->si_generation = stp->si_generation;
-		__entry->status = task->tk_status;
+		__entry->cl_boot = clp->cl_clientid.cl_boot;
+		__entry->cl_id = clp->cl_clientid.cl_id;
+		__entry->status = status;
+		memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
+			sizeof(struct sockaddr_in6));
 	),
-	TP_printk("client %08x:%08x stateid %08x:%08x status=%d",
-		__entry->cl_boot, __entry->cl_id, __entry->si_id,
-		__entry->si_generation, __entry->status
-	)
+	TP_printk("addr=%pISpc client %08x:%08x status=%d",
+		__entry->addr, __entry->cl_boot, __entry->cl_id,
+		__entry->status)
 );
 
-#define DEFINE_NFSD_CB_DONE_EVENT(name)			\
-DEFINE_EVENT(nfsd_cb_done_class, name,			\
-	TP_PROTO(					\
-		const stateid_t *stp,			\
-		const struct rpc_task *task		\
-	),						\
-	TP_ARGS(stp, task))
-
-DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done);
-DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
-DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
-DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
-
 #endif /* _NFSD_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0ea05ddff0d0..31edb883afd0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -32,13 +32,14 @@
 #include <linux/writeback.h>
 #include <linux/security.h>
 
+#ifdef CONFIG_NFSD_V3
 #include "xdr3.h"
+#endif /* CONFIG_NFSD_V3 */
 
 #ifdef CONFIG_NFSD_V4
 #include "../internal.h"
 #include "acl.h"
 #include "idmap.h"
-#include "xdr4.h"
 #endif /* CONFIG_NFSD_V4 */
 
 #include "nfsd.h"
@@ -48,69 +49,6 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
 
-/**
- * nfserrno - Map Linux errnos to NFS errnos
- * @errno: POSIX(-ish) error code to be mapped
- *
- * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If
- * it's an error we don't expect, log it once and return nfserr_io.
- */
-__be32
-nfserrno (int errno)
-{
-	static struct {
-		__be32	nfserr;
-		int	syserr;
-	} nfs_errtbl[] = {
-		{ nfs_ok, 0 },
-		{ nfserr_perm, -EPERM },
-		{ nfserr_noent, -ENOENT },
-		{ nfserr_io, -EIO },
-		{ nfserr_nxio, -ENXIO },
-		{ nfserr_fbig, -E2BIG },
-		{ nfserr_stale, -EBADF },
-		{ nfserr_acces, -EACCES },
-		{ nfserr_exist, -EEXIST },
-		{ nfserr_xdev, -EXDEV },
-		{ nfserr_mlink, -EMLINK },
-		{ nfserr_nodev, -ENODEV },
-		{ nfserr_notdir, -ENOTDIR },
-		{ nfserr_isdir, -EISDIR },
-		{ nfserr_inval, -EINVAL },
-		{ nfserr_fbig, -EFBIG },
-		{ nfserr_nospc, -ENOSPC },
-		{ nfserr_rofs, -EROFS },
-		{ nfserr_mlink, -EMLINK },
-		{ nfserr_nametoolong, -ENAMETOOLONG },
-		{ nfserr_notempty, -ENOTEMPTY },
-		{ nfserr_dquot, -EDQUOT },
-		{ nfserr_stale, -ESTALE },
-		{ nfserr_jukebox, -ETIMEDOUT },
-		{ nfserr_jukebox, -ERESTARTSYS },
-		{ nfserr_jukebox, -EAGAIN },
-		{ nfserr_jukebox, -EWOULDBLOCK },
-		{ nfserr_jukebox, -ENOMEM },
-		{ nfserr_io, -ETXTBSY },
-		{ nfserr_notsupp, -EOPNOTSUPP },
-		{ nfserr_toosmall, -ETOOSMALL },
-		{ nfserr_serverfault, -ESERVERFAULT },
-		{ nfserr_serverfault, -ENFILE },
-		{ nfserr_io, -EREMOTEIO },
-		{ nfserr_stale, -EOPENSTALE },
-		{ nfserr_io, -EUCLEAN },
-		{ nfserr_perm, -ENOKEY },
-		{ nfserr_no_grace, -ENOGRACE},
-	};
-	int	i;
-
-	for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
-		if (nfs_errtbl[i].syserr == errno)
-			return nfs_errtbl[i].nfserr;
-	}
-	WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
-	return nfserr_io;
-}
-
 /* 
  * Called from nfsd_lookup and encode_dirent. Check if we have crossed 
  * a mount point.
@@ -261,13 +199,27 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				goto out_nfserr;
 		}
 	} else {
-		dentry = lookup_one_len_unlocked(name, dparent, len);
+		/*
+		 * In the nfsd4_open() case, this may be held across
+		 * subsequent open and delegation acquisition which may
+		 * need to take the child's i_mutex:
+		 */
+		fh_lock_nested(fhp, I_MUTEX_PARENT);
+		dentry = lookup_one_len(name, dparent, len);
 		host_err = PTR_ERR(dentry);
 		if (IS_ERR(dentry))
 			goto out_nfserr;
 		if (nfsd_mountpoint(dentry, exp)) {
-			host_err = nfsd_cross_mnt(rqstp, &dentry, &exp);
-			if (host_err) {
+			/*
+			 * We don't need the i_mutex after all.  It's
+			 * still possible we could open this (regular
+			 * files can be mountpoints too), but the
+			 * i_mutex is just there to prevent renames of
+			 * something that we might be about to delegate,
+			 * and a mountpoint won't be renamed:
+			 */
+			fh_unlock(fhp);
+			if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
 				dput(dentry);
 				goto out_nfserr;
 			}
@@ -282,15 +234,7 @@ out_nfserr:
 	return nfserrno(host_err);
 }
 
-/**
- * nfsd_lookup - look up a single path component for nfsd
- *
- * @rqstp:   the request context
- * @fhp:     the file handle of the directory
- * @name:    the component name, or %NULL to look up parent
- * @len:     length of name to examine
- * @resfh:   pointer to pre-initialised filehandle to hold result.
- *
+/*
  * Look up one component of a pathname.
  * N.B. After this call _both_ fhp and resfh need an fh_put
  *
@@ -300,11 +244,11 @@ out_nfserr:
  * returned. Otherwise the covered directory is returned.
  * NOTE: this mountpoint crossing is not supported properly by all
  *   clients and is explicitly disallowed for NFSv3
- *
+ *      NeilBrown <neilb@cse.unsw.edu.au>
  */
 __be32
 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
-	    unsigned int len, struct svc_fh *resfh)
+				unsigned int len, struct svc_fh *resfh)
 {
 	struct svc_export	*exp;
 	struct dentry		*dentry;
@@ -362,10 +306,6 @@ commit_metadata(struct svc_fh *fhp)
 static void
 nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
 {
-	/* Ignore mode updates on symlinks */
-	if (S_ISLNK(inode->i_mode))
-		iap->ia_valid &= ~ATTR_MODE;
-
 	/* sanitize the mode change */
 	if (iap->ia_valid & ATTR_MODE) {
 		iap->ia_mode &= S_IALLUGO;
@@ -419,77 +359,21 @@ out_nfserrno:
 	return nfserrno(host_err);
 }
 
-static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
-{
-	int host_err;
-
-	if (iap->ia_valid & ATTR_SIZE) {
-		/*
-		 * RFC5661, Section 18.30.4:
-		 *   Changing the size of a file with SETATTR indirectly
-		 *   changes the time_modify and change attributes.
-		 *
-		 * (and similar for the older RFCs)
-		 */
-		struct iattr size_attr = {
-			.ia_valid	= ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
-			.ia_size	= iap->ia_size,
-		};
-
-		if (iap->ia_size < 0)
-			return -EFBIG;
-
-		host_err = notify_change(dentry, &size_attr, NULL);
-		if (host_err)
-			return host_err;
-		iap->ia_valid &= ~ATTR_SIZE;
-
-		/*
-		 * Avoid the additional setattr call below if the only other
-		 * attribute that the client sends is the mtime, as we update
-		 * it as part of the size change above.
-		 */
-		if ((iap->ia_valid & ~ATTR_MTIME) == 0)
-			return 0;
-	}
-
-	if (!iap->ia_valid)
-		return 0;
-
-	iap->ia_valid |= ATTR_CTIME;
-	return notify_change(dentry, iap, NULL);
-}
-
-/**
- * nfsd_setattr - Set various file attributes.
- * @rqstp: controlling RPC transaction
- * @fhp: filehandle of target
- * @attr: attributes to set
- * @check_guard: set to 1 if guardtime is a valid timestamp
- * @guardtime: do not act if ctime.tv_sec does not match this timestamp
- *
- * This call may adjust the contents of @attr (in particular, this
- * call may change the bits in the na_iattr.ia_valid field).
- *
- * Returns nfs_ok on success, otherwise an NFS status code is
- * returned. Caller must release @fhp by calling fh_put in either
- * case.
+/*
+ * Set various file attributes.  After this call fhp needs an fh_put.
  */
 __be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
-	     struct nfsd_attrs *attr,
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	     int check_guard, time64_t guardtime)
 {
 	struct dentry	*dentry;
 	struct inode	*inode;
-	struct iattr	*iap = attr->na_iattr;
 	int		accmode = NFSD_MAY_SATTR;
 	umode_t		ftype = 0;
 	__be32		err;
-	int		host_err = 0;
+	int		host_err;
 	bool		get_write_count;
 	bool		size_change = (iap->ia_valid & ATTR_SIZE);
-	int		retries;
 
 	if (iap->ia_valid & ATTR_SIZE) {
 		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -525,6 +409,13 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	dentry = fhp->fh_dentry;
 	inode = d_inode(dentry);
 
+	/* Ignore any mode updates on symlinks */
+	if (S_ISLNK(inode->i_mode))
+		iap->ia_valid &= ~ATTR_MODE;
+
+	if (!iap->ia_valid)
+		return 0;
+
 	nfsd_sanitize_attrs(inode, iap);
 
 	if (check_guard && guardtime != inode->i_ctime.tv_sec)
@@ -543,41 +434,45 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			return err;
 	}
 
-	inode_lock(inode);
-	fh_fill_pre_attrs(fhp);
-	for (retries = 1;;) {
-		struct iattr attrs;
+	fh_lock(fhp);
+	if (size_change) {
+		/*
+		 * RFC5661, Section 18.30.4:
+		 *   Changing the size of a file with SETATTR indirectly
+		 *   changes the time_modify and change attributes.
+		 *
+		 * (and similar for the older RFCs)
+		 */
+		struct iattr size_attr = {
+			.ia_valid	= ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+			.ia_size	= iap->ia_size,
+		};
+
+		host_err = notify_change(dentry, &size_attr, NULL);
+		if (host_err)
+			goto out_unlock;
+		iap->ia_valid &= ~ATTR_SIZE;
 
 		/*
-		 * notify_change() can alter its iattr argument, making
-		 * @iap unsuitable for submission multiple times. Make a
-		 * copy for every loop iteration.
+		 * Avoid the additional setattr call below if the only other
+		 * attribute that the client sends is the mtime, as we update
+		 * it as part of the size change above.
 		 */
-		attrs = *iap;
-		host_err = __nfsd_setattr(dentry, &attrs);
-		if (host_err != -EAGAIN || !retries--)
-			break;
-		if (!nfsd_wait_for_delegreturn(rqstp, inode))
-			break;
+		if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+			goto out_unlock;
 	}
-	if (attr->na_seclabel && attr->na_seclabel->len)
-		attr->na_labelerr = security_inode_setsecctx(dentry,
-			attr->na_seclabel->data, attr->na_seclabel->len);
-	if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl)
-		attr->na_aclerr = set_posix_acl(inode, ACL_TYPE_ACCESS,
-						attr->na_pacl);
-	if (IS_ENABLED(CONFIG_FS_POSIX_ACL) &&
-	    !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode))
-		attr->na_aclerr = set_posix_acl(inode, ACL_TYPE_DEFAULT,
-						attr->na_dpacl);
-	fh_fill_post_attrs(fhp);
-	inode_unlock(inode);
+
+	iap->ia_valid |= ATTR_CTIME;
+	host_err = notify_change(dentry, iap, NULL);
+
+out_unlock:
+	fh_unlock(fhp);
 	if (size_change)
 		put_write_access(inode);
 out:
 	if (!host_err)
 		host_err = commit_metadata(fhp);
-	return err != 0 ? err : nfserrno(host_err);
+	return nfserrno(host_err);
 }
 
 #if defined(CONFIG_NFSD_V4)
@@ -608,16 +503,35 @@ int nfsd4_is_junction(struct dentry *dentry)
 		return 0;
 	return 1;
 }
-
-static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct xdr_netobj *label)
 {
-	return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
-}
+	__be32 error;
+	int host_error;
+	struct dentry *dentry;
 
-__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
-		struct nfsd_file *nf_src, u64 src_pos,
-		struct nfsd_file *nf_dst, u64 dst_pos,
-		u64 count, bool sync)
+	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
+	if (error)
+		return error;
+
+	dentry = fhp->fh_dentry;
+
+	inode_lock(d_inode(dentry));
+	host_error = security_inode_setsecctx(dentry, label->data, label->len);
+	inode_unlock(d_inode(dentry));
+	return nfserrno(host_error);
+}
+#else
+__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct xdr_netobj *label)
+{
+	return nfserr_notsupp;
+}
+#endif
+
+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+		struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
 {
 	struct file *src = nf_src->nf_file;
 	struct file *dst = nf_dst->nf_file;
@@ -644,17 +558,8 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
 		if (!status)
 			status = commit_inode_metadata(file_inode(src));
 		if (status < 0) {
-			struct nfsd_net *nn = net_generic(nf_dst->nf_net,
-							  nfsd_net_id);
-
-			trace_nfsd_clone_file_range_err(rqstp,
-					&nfsd4_get_cstate(rqstp)->save_fh,
-					src_pos,
-					&nfsd4_get_cstate(rqstp)->current_fh,
-					dst_pos,
-					count, status);
-			nfsd_reset_write_verifier(nn);
-			trace_nfsd_writeverf_reset(nn, rqstp, status);
+			nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
+						 nfsd_net_id));
 			ret = nfserrno(status);
 		}
 	}
@@ -701,6 +606,7 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
 }
 #endif /* defined(CONFIG_NFSD_V4) */
 
+#ifdef CONFIG_NFSD_V3
 /*
  * Check server access rights to a file system object
  */
@@ -812,6 +718,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
  out:
 	return error;
 }
+#endif /* CONFIG_NFSD_V3 */
 
 int nfsd_open_break_lease(struct inode *inode, int access)
 {
@@ -844,6 +751,9 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 	path.dentry = fhp->fh_dentry;
 	inode = d_inode(path.dentry);
 
+	/* Disallow write access to files with the append-only bit set
+	 * or any access when mandatory locking enabled
+	 */
 	err = nfserr_perm;
 	if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
 		goto out;
@@ -898,7 +808,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 		int may_flags, struct file **filp)
 {
 	__be32 err;
-	bool retried = false;
 
 	validate_process_creds();
 	/*
@@ -914,37 +823,21 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 	 */
 	if (type == S_IFREG)
 		may_flags |= NFSD_MAY_OWNER_OVERRIDE;
-retry:
 	err = fh_verify(rqstp, fhp, type, may_flags);
-	if (!err) {
+	if (!err)
 		err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
-		if (err == nfserr_stale && !retried) {
-			retried = true;
-			fh_put(fhp);
-			goto retry;
-		}
-	}
 	validate_process_creds();
 	return err;
 }
 
-/**
- * nfsd_open_verified - Open a regular file for the filecache
- * @rqstp: RPC request
- * @fhp: NFS filehandle of the file to open
- * @may_flags: internal permission flags
- * @filp: OUT: open "struct file *"
- *
- * Returns an nfsstat value in network byte order.
- */
 __be32
-nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
-		   struct file **filp)
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+		int may_flags, struct file **filp)
 {
 	__be32 err;
 
 	validate_process_creds();
-	err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
+	err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
 	validate_process_creds();
 	return err;
 }
@@ -959,24 +852,28 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		  struct splice_desc *sd)
 {
 	struct svc_rqst *rqstp = sd->u.data;
-	struct page *page = buf->page;	// may be a compound one
-	unsigned offset = buf->offset;
-	struct page *last_page;
+	struct page **pp = rqstp->rq_next_page;
+	struct page *page = buf->page;
+	size_t size;
 
-	last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
-	for (page += offset / PAGE_SIZE; page <= last_page; page++) {
-		/*
-		 * Skip page replacement when extending the contents
-		 * of the current page.
-		 */
-		if (page == *(rqstp->rq_next_page - 1))
-			continue;
-		svc_rqst_replace_page(rqstp, page);
-	}
-	if (rqstp->rq_res.page_len == 0)	// first call
-		rqstp->rq_res.page_base = offset % PAGE_SIZE;
-	rqstp->rq_res.page_len += sd->len;
-	return sd->len;
+	size = sd->len;
+
+	if (rqstp->rq_res.page_len == 0) {
+		get_page(page);
+		put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
+		rqstp->rq_res.page_base = buf->offset;
+		rqstp->rq_res.page_len = size;
+	} else if (page != pp[-1]) {
+		get_page(page);
+		if (*rqstp->rq_next_page)
+			put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
+		rqstp->rq_res.page_len += size;
+	} else
+		rqstp->rq_res.page_len += size;
+
+	return size;
 }
 
 static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
@@ -1000,7 +897,7 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			       unsigned long *count, u32 *eof, ssize_t host_err)
 {
 	if (host_err >= 0) {
-		nfsd_stats_io_read_add(fhp->fh_export, host_err);
+		nfsdstats.io_read += host_err;
 		*eof = nfsd_eof_on_read(file, offset, host_err, *count);
 		*count = host_err;
 		fsnotify_access(file);
@@ -1088,9 +985,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 				unsigned long *cnt, int stable,
 				__be32 *verf)
 {
-	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	struct file		*file = nf->nf_file;
-	struct super_block	*sb = file_inode(file)->i_sb;
 	struct svc_export	*exp;
 	struct iov_iter		iter;
 	errseq_t		since;
@@ -1098,18 +993,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 	int			host_err;
 	int			use_wgather;
 	loff_t			pos = offset;
-	unsigned long		exp_op_flags = 0;
 	unsigned int		pflags = current->flags;
 	rwf_t			flags = 0;
-	bool			restore_flags = false;
 
 	trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
 
-	if (sb->s_export_op)
-		exp_op_flags = sb->s_export_op->flags;
-
-	if (test_bit(RQ_LOCAL, &rqstp->rq_flags) &&
-	    !(exp_op_flags & EXPORT_OP_REMOTE_FS)) {
+	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
 		/*
 		 * We want throttling in balance_dirty_pages()
 		 * and shrink_inactive_list() to only consider
@@ -1118,8 +1007,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 		 * the client's dirty pages or its congested queue.
 		 */
 		current->flags |= PF_LOCAL_THROTTLE;
-		restore_flags = true;
-	}
 
 	exp = fhp->fh_export;
 	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
@@ -1132,18 +1019,29 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 
 	iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
 	since = READ_ONCE(file->f_wb_err);
-	if (verf)
-		nfsd_copy_write_verifier(verf, nn);
-	file_start_write(file);
-	host_err = vfs_iter_write(file, &iter, &pos, flags);
-	file_end_write(file);
+	if (flags & RWF_SYNC) {
+		if (verf)
+			nfsd_copy_boot_verifier(verf,
+					net_generic(SVC_NET(rqstp),
+					nfsd_net_id));
+		host_err = vfs_iter_write(file, &iter, &pos, flags);
+		if (host_err < 0)
+			nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+						 nfsd_net_id));
+	} else {
+		if (verf)
+			nfsd_copy_boot_verifier(verf,
+					net_generic(SVC_NET(rqstp),
+					nfsd_net_id));
+		host_err = vfs_iter_write(file, &iter, &pos, flags);
+	}
 	if (host_err < 0) {
-		nfsd_reset_write_verifier(nn);
-		trace_nfsd_writeverf_reset(nn, rqstp, host_err);
+		nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+					 nfsd_net_id));
 		goto out_nfserr;
 	}
 	*cnt = host_err;
-	nfsd_stats_io_write_add(exp, *cnt);
+	nfsdstats.io_write += *cnt;
 	fsnotify_modify(file);
 	host_err = filemap_check_wb_err(file->f_mapping, since);
 	if (host_err < 0)
@@ -1151,10 +1049,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 
 	if (stable && use_wgather) {
 		host_err = wait_for_concurrent_writes(file);
-		if (host_err < 0) {
-			nfsd_reset_write_verifier(nn);
-			trace_nfsd_writeverf_reset(nn, rqstp, host_err);
-		}
+		if (host_err < 0)
+			nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+						 nfsd_net_id));
 	}
 
 out_nfserr:
@@ -1165,7 +1062,7 @@ out_nfserr:
 		trace_nfsd_write_err(rqstp, fhp, offset, host_err);
 		nfserr = nfserrno(host_err);
 	}
-	if (restore_flags)
+	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
 		current_restore_flags(pflags, PF_LOCAL_THROTTLE);
 	return nfserr;
 }
@@ -1184,7 +1081,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	__be32 err;
 
 	trace_nfsd_read_start(rqstp, fhp, offset, *count);
-	err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf);
+	err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
 	if (err)
 		return err;
 
@@ -1216,7 +1113,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 
 	trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
 
-	err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf);
+	err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
 	if (err)
 		goto out;
 
@@ -1228,59 +1125,45 @@ out:
 	return err;
 }
 
-/**
- * nfsd_commit - Commit pending writes to stable storage
- * @rqstp: RPC request being processed
- * @fhp: NFS filehandle
- * @nf: target file
- * @offset: raw offset from beginning of file
- * @count: raw count of bytes to sync
- * @verf: filled in with the server's current write verifier
+#ifdef CONFIG_NFSD_V3
+/*
+ * Commit all pending writes to stable storage.
  *
- * Note: we guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced. The server
- * is permitted to sync data that lies outside this range at the
- * same time.
+ * Note: we only guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced.
  *
  * Unfortunately we cannot lock the file to make sure we return full WCC
  * data to the client, as locking happens lower down in the filesystem.
- *
- * Return values:
- *   An nfsstat value in network byte order.
  */
 __be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
-	    u64 offset, u32 count, __be32 *verf)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               loff_t offset, unsigned long count, __be32 *verf)
 {
-	__be32			err = nfs_ok;
-	u64			maxbytes;
-	loff_t			start, end;
-	struct nfsd_net		*nn;
+	struct nfsd_file	*nf;
+	loff_t			end = LLONG_MAX;
+	__be32			err = nfserr_inval;
 
-	/*
-	 * Convert the client-provided (offset, count) range to a
-	 * (start, end) range. If the client-provided range falls
-	 * outside the maximum file size of the underlying FS,
-	 * clamp the sync range appropriately.
-	 */
-	start = 0;
-	end = LLONG_MAX;
-	maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
-	if (offset < maxbytes) {
-		start = offset;
-		if (count && (offset + count - 1 < maxbytes))
-			end = offset + count - 1;
+	if (offset < 0)
+		goto out;
+	if (count != 0) {
+		end = offset + (loff_t)count - 1;
+		if (end < offset)
+			goto out;
 	}
 
-	nn = net_generic(nf->nf_net, nfsd_net_id);
+	err = nfsd_file_acquire(rqstp, fhp,
+			NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
+	if (err)
+		goto out;
 	if (EX_ISSYNC(fhp->fh_export)) {
 		errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
 		int err2;
 
-		err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
+		err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
 		switch (err2) {
 		case 0:
-			nfsd_copy_write_verifier(verf, nn);
+			nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+						nfsd_net_id));
 			err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
 						    since);
 			err = nfserrno(err2);
@@ -1289,37 +1172,28 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
 			err = nfserr_notsupp;
 			break;
 		default:
-			nfsd_reset_write_verifier(nn);
-			trace_nfsd_writeverf_reset(nn, rqstp, err2);
+			nfsd_reset_boot_verifier(net_generic(nf->nf_net,
+						 nfsd_net_id));
 			err = nfserrno(err2);
 		}
 	} else
-		nfsd_copy_write_verifier(verf, nn);
+		nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+					nfsd_net_id));
 
+	nfsd_file_put(nf);
+out:
 	return err;
 }
+#endif /* CONFIG_NFSD_V3 */
 
-/**
- * nfsd_create_setattr - Set a created file's attributes
- * @rqstp: RPC transaction being executed
- * @fhp: NFS filehandle of parent directory
- * @resfhp: NFS filehandle of new object
- * @attrs: requested attributes of new object
- *
- * Returns nfs_ok on success, or an nfsstat in network byte order.
- */
-__be32
-nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		    struct svc_fh *resfhp, struct nfsd_attrs *attrs)
+static __be32
+nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
+			struct iattr *iap)
 {
-	struct iattr *iap = attrs->na_iattr;
-	__be32 status;
-
 	/*
-	 * Mode has already been set by file creation.
+	 * Mode has already been set earlier in create:
 	 */
 	iap->ia_valid &= ~ATTR_MODE;
-
 	/*
 	 * Setting uid/gid works only for root.  Irix appears to
 	 * send along the gid on create when it tries to implement
@@ -1327,31 +1201,10 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 */
 	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
 		iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
-
-	/*
-	 * Callers expect new file metadata to be committed even
-	 * if the attributes have not changed.
-	 */
 	if (iap->ia_valid)
-		status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0);
-	else
-		status = nfserrno(commit_metadata(resfhp));
-
-	/*
-	 * Transactional filesystems had a chance to commit changes
-	 * for both parent and child simultaneously making the
-	 * following commit_metadata a noop in many cases.
-	 */
-	if (!status)
-		status = nfserrno(commit_metadata(fhp));
-
-	/*
-	 * Update the new filehandle to pick up the new attributes.
-	 */
-	if (!status)
-		status = fh_update(resfhp);
-
-	return status;
+		return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
+	/* Callers expect file metadata to be committed here */
+	return nfserrno(commit_metadata(resfhp));
 }
 
 /* HPUX client sometimes creates a file in mode 000, and sets size to 0.
@@ -1372,19 +1225,26 @@ nfsd_check_ignore_resizing(struct iattr *iap)
 /* The parent directory should already be locked: */
 __be32
 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		   struct nfsd_attrs *attrs,
-		   int type, dev_t rdev, struct svc_fh *resfhp)
+		char *fname, int flen, struct iattr *iap,
+		int type, dev_t rdev, struct svc_fh *resfhp)
 {
 	struct dentry	*dentry, *dchild;
 	struct inode	*dirp;
-	struct iattr	*iap = attrs->na_iattr;
 	__be32		err;
+	__be32		err2;
 	int		host_err;
 
 	dentry = fhp->fh_dentry;
 	dirp = d_inode(dentry);
 
 	dchild = dget(resfhp->fh_dentry);
+	if (!fhp->fh_locked) {
+		WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n",
+				dentry);
+		err = nfserr_io;
+		goto out;
+	}
+
 	err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
 	if (err)
 		goto out;
@@ -1397,6 +1257,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		iap->ia_mode &= ~current_umask();
 
 	err = 0;
+	host_err = 0;
 	switch (type) {
 	case S_IFREG:
 		host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
@@ -1442,8 +1303,22 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (host_err < 0)
 		goto out_nfserr;
 
-	err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
+	err = nfsd_create_setattr(rqstp, resfhp, iap);
 
+	/*
+	 * nfsd_create_setattr already committed the child.  Transactional
+	 * filesystems had a chance to commit changes for both parent and
+	 * child simultaneously making the following commit_metadata a
+	 * noop.
+	 */
+	err2 = nfserrno(commit_metadata(fhp));
+	if (err2)
+		err = err2;
+	/*
+	 * Update the file handle to get the new inode info.
+	 */
+	if (!err)
+		err = fh_update(resfhp);
 out:
 	dput(dchild);
 	return err;
@@ -1461,8 +1336,8 @@ out_nfserr:
  */
 __be32
 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
-	    char *fname, int flen, struct nfsd_attrs *attrs,
-	    int type, dev_t rdev, struct svc_fh *resfhp)
+		char *fname, int flen, struct iattr *iap,
+		int type, dev_t rdev, struct svc_fh *resfhp)
 {
 	struct dentry	*dentry, *dchild = NULL;
 	__be32		err;
@@ -1481,13 +1356,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (host_err)
 		return nfserrno(host_err);
 
-	inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
+	fh_lock_nested(fhp, I_MUTEX_PARENT);
 	dchild = lookup_one_len(fname, dentry, flen);
 	host_err = PTR_ERR(dchild);
-	if (IS_ERR(dchild)) {
-		err = nfserrno(host_err);
-		goto out_unlock;
-	}
+	if (IS_ERR(dchild))
+		return nfserrno(host_err);
 	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
 	/*
 	 * We unconditionally drop our ref to dchild as fh_compose will have
@@ -1495,15 +1368,179 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 */
 	dput(dchild);
 	if (err)
-		goto out_unlock;
-	fh_fill_pre_attrs(fhp);
-	err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
-	fh_fill_post_attrs(fhp);
-out_unlock:
-	inode_unlock(dentry->d_inode);
-	return err;
+		return err;
+	return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
+					rdev, resfhp);
 }
 
+#ifdef CONFIG_NFSD_V3
+
+/*
+ * NFSv3 and NFSv4 version of nfsd_create
+ */
+__be32
+do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		char *fname, int flen, struct iattr *iap,
+		struct svc_fh *resfhp, int createmode, u32 *verifier,
+	        bool *truncp, bool *created)
+{
+	struct dentry	*dentry, *dchild = NULL;
+	struct inode	*dirp;
+	__be32		err;
+	int		host_err;
+	__u32		v_mtime=0, v_atime=0;
+
+	err = nfserr_perm;
+	if (!flen)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+	if (!(iap->ia_valid & ATTR_MODE))
+		iap->ia_mode = 0;
+	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	dirp = d_inode(dentry);
+
+	host_err = fh_want_write(fhp);
+	if (host_err)
+		goto out_nfserr;
+
+	fh_lock_nested(fhp, I_MUTEX_PARENT);
+
+	/*
+	 * Compose the response file handle.
+	 */
+	dchild = lookup_one_len(fname, dentry, flen);
+	host_err = PTR_ERR(dchild);
+	if (IS_ERR(dchild))
+		goto out_nfserr;
+
+	/* If file doesn't exist, check for permissions to create one */
+	if (d_really_is_negative(dchild)) {
+		err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+		if (err)
+			goto out;
+	}
+
+	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+	if (err)
+		goto out;
+
+	if (nfsd_create_is_exclusive(createmode)) {
+		/* solaris7 gets confused (bugid 4218508) if these have
+		 * the high bit set, so just clear the high bits. If this is
+		 * ever changed to use different attrs for storing the
+		 * verifier, then do_open_lookup() will also need to be fixed
+		 * accordingly.
+		 */
+		v_mtime = verifier[0]&0x7fffffff;
+		v_atime = verifier[1]&0x7fffffff;
+	}
+	
+	if (d_really_is_positive(dchild)) {
+		err = 0;
+
+		switch (createmode) {
+		case NFS3_CREATE_UNCHECKED:
+			if (! d_is_reg(dchild))
+				goto out;
+			else if (truncp) {
+				/* in nfsv4, we need to treat this case a little
+				 * differently.  we don't want to truncate the
+				 * file now; this would be wrong if the OPEN
+				 * fails for some other reason.  furthermore,
+				 * if the size is nonzero, we should ignore it
+				 * according to spec!
+				 */
+				*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
+			}
+			else {
+				iap->ia_valid &= ATTR_SIZE;
+				goto set_attr;
+			}
+			break;
+		case NFS3_CREATE_EXCLUSIVE:
+			if (   d_inode(dchild)->i_mtime.tv_sec == v_mtime
+			    && d_inode(dchild)->i_atime.tv_sec == v_atime
+			    && d_inode(dchild)->i_size  == 0 ) {
+				if (created)
+					*created = true;
+				break;
+			}
+			fallthrough;
+		case NFS4_CREATE_EXCLUSIVE4_1:
+			if (   d_inode(dchild)->i_mtime.tv_sec == v_mtime
+			    && d_inode(dchild)->i_atime.tv_sec == v_atime
+			    && d_inode(dchild)->i_size  == 0 ) {
+				if (created)
+					*created = true;
+				goto set_attr;
+			}
+			fallthrough;
+		case NFS3_CREATE_GUARDED:
+			err = nfserr_exist;
+		}
+		fh_drop_write(fhp);
+		goto out;
+	}
+
+	if (!IS_POSIXACL(dirp))
+		iap->ia_mode &= ~current_umask();
+
+	host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
+	if (host_err < 0) {
+		fh_drop_write(fhp);
+		goto out_nfserr;
+	}
+	if (created)
+		*created = true;
+
+	nfsd_check_ignore_resizing(iap);
+
+	if (nfsd_create_is_exclusive(createmode)) {
+		/* Cram the verifier into atime/mtime */
+		iap->ia_valid = ATTR_MTIME|ATTR_ATIME
+			| ATTR_MTIME_SET|ATTR_ATIME_SET;
+		/* XXX someone who knows this better please fix it for nsec */ 
+		iap->ia_mtime.tv_sec = v_mtime;
+		iap->ia_atime.tv_sec = v_atime;
+		iap->ia_mtime.tv_nsec = 0;
+		iap->ia_atime.tv_nsec = 0;
+	}
+
+ set_attr:
+	err = nfsd_create_setattr(rqstp, resfhp, iap);
+
+	/*
+	 * nfsd_create_setattr already committed the child
+	 * (and possibly also the parent).
+	 */
+	if (!err)
+		err = nfserrno(commit_metadata(fhp));
+
+	/*
+	 * Update the filehandle to get the new inode info.
+	 */
+	if (!err)
+		err = fh_update(resfhp);
+
+ out:
+	fh_unlock(fhp);
+	if (dchild && !IS_ERR(dchild))
+		dput(dchild);
+	fh_drop_write(fhp);
+ 	return err;
+ 
+ out_nfserr:
+	err = nfserrno(host_err);
+	goto out;
+}
+#endif /* CONFIG_NFSD_V3 */
+
 /*
  * Read a symlink. On entry, *lenp must contain the maximum path length that
  * fits into the buffer. On return, it contains the true length.
@@ -1542,25 +1579,15 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 	return 0;
 }
 
-/**
- * nfsd_symlink - Create a symlink and look up its inode
- * @rqstp: RPC transaction being executed
- * @fhp: NFS filehandle of parent directory
- * @fname: filename of the new symlink
- * @flen: length of @fname
- * @path: content of the new symlink (NUL-terminated)
- * @attrs: requested attributes of new object
- * @resfhp: NFS filehandle of new object
- *
+/*
+ * Create a symlink and look up its inode
  * N.B. After this call _both_ fhp and resfhp need an fh_put
- *
- * Returns nfs_ok on success, or an nfsstat in network byte order.
  */
 __be32
 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
-	     char *fname, int flen,
-	     char *path, struct nfsd_attrs *attrs,
-	     struct svc_fh *resfhp)
+				char *fname, int flen,
+				char *path,
+				struct svc_fh *resfhp)
 {
 	struct dentry	*dentry, *dnew;
 	__be32		err, cerr;
@@ -1578,35 +1605,33 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out;
 
 	host_err = fh_want_write(fhp);
-	if (host_err) {
-		err = nfserrno(host_err);
-		goto out;
-	}
+	if (host_err)
+		goto out_nfserr;
 
+	fh_lock(fhp);
 	dentry = fhp->fh_dentry;
-	inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
 	dnew = lookup_one_len(fname, dentry, flen);
-	if (IS_ERR(dnew)) {
-		err = nfserrno(PTR_ERR(dnew));
-		inode_unlock(dentry->d_inode);
-		goto out_drop_write;
-	}
-	fh_fill_pre_attrs(fhp);
+	host_err = PTR_ERR(dnew);
+	if (IS_ERR(dnew))
+		goto out_nfserr;
+
 	host_err = vfs_symlink(d_inode(dentry), dnew, path);
 	err = nfserrno(host_err);
-	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
-	if (!err)
-		nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
-	fh_fill_post_attrs(fhp);
-	inode_unlock(dentry->d_inode);
 	if (!err)
 		err = nfserrno(commit_metadata(fhp));
+	fh_unlock(fhp);
+
+	fh_drop_write(fhp);
+
+	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
 	dput(dnew);
 	if (err==0) err = cerr;
-out_drop_write:
-	fh_drop_write(fhp);
 out:
 	return err;
+
+out_nfserr:
+	err = nfserrno(host_err);
+	goto out;
 }
 
 /*
@@ -1644,25 +1669,21 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 		goto out;
 	}
 
+	fh_lock_nested(ffhp, I_MUTEX_PARENT);
 	ddir = ffhp->fh_dentry;
 	dirp = d_inode(ddir);
-	inode_lock_nested(dirp, I_MUTEX_PARENT);
 
 	dnew = lookup_one_len(name, ddir, len);
-	if (IS_ERR(dnew)) {
-		err = nfserrno(PTR_ERR(dnew));
-		goto out_unlock;
-	}
+	host_err = PTR_ERR(dnew);
+	if (IS_ERR(dnew))
+		goto out_nfserr;
 
 	dold = tfhp->fh_dentry;
 
 	err = nfserr_noent;
 	if (d_really_is_negative(dold))
 		goto out_dput;
-	fh_fill_pre_attrs(ffhp);
 	host_err = vfs_link(dold, dirp, dnew, NULL);
-	fh_fill_post_attrs(ffhp);
-	inode_unlock(dirp);
 	if (!host_err) {
 		err = nfserrno(commit_metadata(ffhp));
 		if (!err)
@@ -1673,17 +1694,17 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 		else
 			err = nfserrno(host_err);
 	}
+out_dput:
 	dput(dnew);
-out_drop_write:
+out_unlock:
+	fh_unlock(ffhp);
 	fh_drop_write(tfhp);
 out:
 	return err;
 
-out_dput:
-	dput(dnew);
-out_unlock:
-	inode_unlock(dirp);
-	goto out_drop_write;
+out_nfserr:
+	err = nfserrno(host_err);
+	goto out_unlock;
 }
 
 static void
@@ -1718,7 +1739,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	struct inode	*fdir, *tdir;
 	__be32		err;
 	int		host_err;
-	bool		close_cached = false;
+	bool		has_cached = false;
 
 	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
 	if (err)
@@ -1750,9 +1771,12 @@ retry:
 		goto out;
 	}
 
+	/* cannot use fh_lock as we need deadlock protective ordering
+	 * so do it by hand */
 	trap = lock_rename(tdentry, fdentry);
-	fh_fill_pre_attrs(ffhp);
-	fh_fill_pre_attrs(tfhp);
+	ffhp->fh_locked = tfhp->fh_locked = true;
+	fill_pre_wcc(ffhp);
+	fill_pre_wcc(tfhp);
 
 	odentry = lookup_one_len(fname, fdentry, flen);
 	host_err = PTR_ERR(odentry);
@@ -1774,26 +1798,11 @@ retry:
 	if (ndentry == trap)
 		goto out_dput_new;
 
-	if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
-	    nfsd_has_cached_files(ndentry)) {
-		close_cached = true;
+	if (nfsd_has_cached_files(ndentry)) {
+		has_cached = true;
 		goto out_dput_old;
 	} else {
-		struct renamedata rd = {
-			.old_dir	= fdir,
-			.old_dentry	= odentry,
-			.new_dir	= tdir,
-			.new_dentry	= ndentry,
-		};
-		int retries;
-
-		for (retries = 1;;) {
-			host_err = vfs_rename(&rd);
-			if (host_err != -EAGAIN || !retries--)
-				break;
-			if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry)))
-				break;
-		}
+		host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
 		if (!host_err) {
 			host_err = commit_metadata(tfhp);
 			if (!host_err)
@@ -1806,12 +1815,17 @@ retry:
 	dput(odentry);
  out_nfserr:
 	err = nfserrno(host_err);
-
-	if (!close_cached) {
-		fh_fill_post_attrs(ffhp);
-		fh_fill_post_attrs(tfhp);
+	/*
+	 * We cannot rely on fh_unlock on the two filehandles,
+	 * as that would do the wrong thing if the two directories
+	 * were the same, so again we do it by hand.
+	 */
+	if (!has_cached) {
+		fill_post_wcc(ffhp);
+		fill_post_wcc(tfhp);
 	}
 	unlock_rename(tdentry, fdentry);
+	ffhp->fh_locked = tfhp->fh_locked = false;
 	fh_drop_write(ffhp);
 
 	/*
@@ -1820,8 +1834,8 @@ retry:
 	 * shouldn't be done with locks held however, so we delay it until this
 	 * point and then reattempt the whole shebang.
 	 */
-	if (close_cached) {
-		close_cached = false;
+	if (has_cached) {
+		has_cached = false;
 		nfsd_close_cached_files(ndentry);
 		dput(ndentry);
 		goto retry;
@@ -1840,7 +1854,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 {
 	struct dentry	*dentry, *rdentry;
 	struct inode	*dirp;
-	struct inode	*rinode;
 	__be32		err;
 	int		host_err;
 
@@ -1855,50 +1868,34 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	if (host_err)
 		goto out_nfserr;
 
+	fh_lock_nested(fhp, I_MUTEX_PARENT);
 	dentry = fhp->fh_dentry;
 	dirp = d_inode(dentry);
-	inode_lock_nested(dirp, I_MUTEX_PARENT);
 
 	rdentry = lookup_one_len(fname, dentry, flen);
 	host_err = PTR_ERR(rdentry);
 	if (IS_ERR(rdentry))
-		goto out_unlock;
+		goto out_drop_write;
 
 	if (d_really_is_negative(rdentry)) {
 		dput(rdentry);
 		host_err = -ENOENT;
-		goto out_unlock;
+		goto out_drop_write;
 	}
-	rinode = d_inode(rdentry);
-	ihold(rinode);
 
 	if (!type)
 		type = d_inode(rdentry)->i_mode & S_IFMT;
 
-	fh_fill_pre_attrs(fhp);
 	if (type != S_IFDIR) {
-		int retries;
-
-		if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
-			nfsd_close_cached_files(rdentry);
-
-		for (retries = 1;;) {
-			host_err = vfs_unlink(dirp, rdentry, NULL);
-			if (host_err != -EAGAIN || !retries--)
-				break;
-			if (!nfsd_wait_for_delegreturn(rqstp, rinode))
-				break;
-		}
+		nfsd_close_cached_files(rdentry);
+		host_err = vfs_unlink(dirp, rdentry, NULL);
 	} else {
 		host_err = vfs_rmdir(dirp, rdentry);
 	}
-	fh_fill_post_attrs(fhp);
 
-	inode_unlock(dirp);
 	if (!host_err)
 		host_err = commit_metadata(fhp);
 	dput(rdentry);
-	iput(rinode);    /* truncate the inode here */
 
 out_drop_write:
 	fh_drop_write(fhp);
@@ -1916,9 +1913,6 @@ out_nfserr:
 	}
 out:
 	return err;
-out_unlock:
-	inode_unlock(dirp);
-	goto out_drop_write;
 }
 
 /*
@@ -1968,9 +1962,8 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
 	return 0;
 }
 
-static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp,
-				    nfsd_filldir_t func, struct readdir_cd *cdp,
-				    loff_t *offsetp)
+static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
+				    struct readdir_cd *cdp, loff_t *offsetp)
 {
 	struct buffered_dirent *de;
 	int host_err;
@@ -2016,8 +2009,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp,
 			if (cdp->err != nfs_ok)
 				break;
 
-			trace_nfsd_dirent(fhp, de->ino, de->name, de->namlen);
-
 			reclen = ALIGN(sizeof(*de) + de->namlen,
 				       sizeof(u64));
 			size -= reclen;
@@ -2065,7 +2056,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
 		goto out_close;
 	}
 
-	err = nfsd_buffered_readdir(file, fhp, func, cdp, offsetp);
+	err = nfsd_buffered_readdir(file, func, cdp, offsetp);
 
 	if (err == nfserr_eof || err == nfserr_toosmall)
 		err = nfs_ok; /* can still be found in ->err */
@@ -2272,16 +2263,13 @@ out:
 	return err;
 }
 
-/**
- * nfsd_removexattr - Remove an extended attribute
- * @rqstp: RPC transaction being executed
- * @fhp: NFS filehandle of object with xattr to remove
- * @name: name of xattr to remove (NUL-terminate)
- *
- * Pass in a NULL pointer for delegated_inode, and let the client deal
- * with NFS4ERR_DELAY (same as with e.g. setattr and remove).
- *
- * Returns nfs_ok on success, or an nfsstat in network byte order.
+/*
+ * Removexattr and setxattr need to call fh_lock to both lock the inode
+ * and set the change attribute. Since the top-level vfs_removexattr
+ * and vfs_setxattr calls already do their own inode_lock calls, call
+ * the _locked variant. Pass in a NULL pointer for delegated_inode,
+ * and let the client deal with NFS4ERR_DELAY (same as with e.g.
+ * setattr and remove).
  */
 __be32
 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
@@ -2297,13 +2285,11 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
 	if (ret)
 		return nfserrno(ret);
 
-	inode_lock(fhp->fh_dentry->d_inode);
-	fh_fill_pre_attrs(fhp);
+	fh_lock(fhp);
 
 	ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL);
 
-	fh_fill_post_attrs(fhp);
-	inode_unlock(fhp->fh_dentry->d_inode);
+	fh_unlock(fhp);
 	fh_drop_write(fhp);
 
 	return nfsd_xattr_errno(ret);
@@ -2323,13 +2309,12 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
 	ret = fh_want_write(fhp);
 	if (ret)
 		return nfserrno(ret);
-	inode_lock(fhp->fh_dentry->d_inode);
-	fh_fill_pre_attrs(fhp);
+	fh_lock(fhp);
 
 	ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags,
 				    NULL);
-	fh_fill_post_attrs(fhp);
-	inode_unlock(fhp->fh_dentry->d_inode);
+
+	fh_unlock(fhp);
 	fh_drop_write(fhp);
 
 	return nfsd_xattr_errno(ret);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index dbdfef7ae85b..a2442ebe5acf 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -6,8 +6,6 @@
 #ifndef LINUX_NFSD_VFS_H
 #define LINUX_NFSD_VFS_H
 
-#include <linux/fs.h>
-#include <linux/posix_acl.h>
 #include "nfsfh.h"
 #include "nfsd.h"
 
@@ -44,23 +42,6 @@ struct nfsd_file;
 typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
 
 /* nfsd/vfs.c */
-struct nfsd_attrs {
-	struct iattr		*na_iattr;	/* input */
-	struct xdr_netobj	*na_seclabel;	/* input */
-	struct posix_acl	*na_pacl;	/* input */
-	struct posix_acl	*na_dpacl;	/* input */
-
-	int			na_labelerr;	/* output */
-	int			na_aclerr;	/* output */
-};
-
-static inline void nfsd_attrs_free(struct nfsd_attrs *attrs)
-{
-	posix_acl_release(attrs->na_pacl);
-	posix_acl_release(attrs->na_dpacl);
-}
-
-__be32		nfserrno (int errno);
 int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 		                struct svc_export **expp);
 __be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -69,28 +50,32 @@ __be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
 				const char *, unsigned int,
 				struct svc_export **, struct dentry **);
 __be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
-				struct nfsd_attrs *, int, time64_t);
+				struct iattr *, int, time64_t);
 int nfsd_mountpoint(struct dentry *, struct svc_export *);
 #ifdef CONFIG_NFSD_V4
+__be32          nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
+		    struct xdr_netobj *);
 __be32		nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
 				    struct file *, loff_t, loff_t, int);
-__be32		nfsd4_clone_file_range(struct svc_rqst *rqstp,
-				       struct nfsd_file *nf_src, u64 src_pos,
+__be32		nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
 				       struct nfsd_file *nf_dst, u64 dst_pos,
 				       u64 count, bool sync);
 #endif /* CONFIG_NFSD_V4 */
 __be32		nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
-				struct nfsd_attrs *attrs, int type, dev_t rdev,
-				struct svc_fh *res);
-__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, struct nfsd_attrs *attrs,
+				char *name, int len, struct iattr *attrs,
 				int type, dev_t rdev, struct svc_fh *res);
+__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, struct iattr *attrs,
+				int type, dev_t rdev, struct svc_fh *res);
+#ifdef CONFIG_NFSD_V3
 __be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
-__be32		nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
-				struct svc_fh *resfhp, struct nfsd_attrs *iap);
-__be32		nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
-				struct nfsd_file *nf, u64 offset, u32 count,
-				__be32 *verf);
+__be32		do_nfsd_create(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, struct iattr *attrs,
+				struct svc_fh *res, int createmode,
+				u32 *verifier, bool *truncp, bool *created);
+__be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
+				loff_t, unsigned long, __be32 *verf);
+#endif /* CONFIG_NFSD_V3 */
 #ifdef CONFIG_NFSD_V4
 __be32		nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			    char *name, void **bufp, int *lenp);
@@ -104,7 +89,7 @@ __be32		nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 int 		nfsd_open_break_lease(struct inode *, int);
 __be32		nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
 				int, struct file **);
-__be32		nfsd_open_verified(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
 				int, struct file **);
 __be32		nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				struct file *file, loff_t offset,
@@ -128,9 +113,8 @@ __be32		nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
 				char *, int *);
 __be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
-			     char *name, int len, char *path,
-			     struct nfsd_attrs *attrs,
-			     struct svc_fh *res);
+				char *name, int len, char *path,
+				struct svc_fh *res);
 __be32		nfsd_link(struct svc_rqst *, struct svc_fh *,
 				char *, int, struct svc_fh *);
 ssize_t		nfsd_copy_file_range(struct file *, u64,
@@ -168,7 +152,7 @@ static inline void fh_drop_write(struct svc_fh *fh)
 	}
 }
 
-static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
+static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
 {
 	struct path p = {.mnt = fh->fh_export->ex_path.mnt,
 			 .dentry = fh->fh_dentry};
@@ -176,4 +160,10 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
 				    AT_STATX_SYNC_AS_STAT));
 }
 
+static inline int nfsd_create_is_exclusive(int createmode)
+{
+	return createmode == NFS3_CREATE_EXCLUSIVE
+	       || createmode == NFS4_CREATE_EXCLUSIVE4_1;
+}
+
 #endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 852f71580bd0..b8cc6a4b2e0e 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -27,13 +27,14 @@ struct nfsd_readargs {
 	struct svc_fh		fh;
 	__u32			offset;
 	__u32			count;
+	int			vlen;
 };
 
 struct nfsd_writeargs {
 	svc_fh			fh;
 	__u32			offset;
 	__u32			len;
-	struct xdr_buf		payload;
+	struct kvec		first;
 };
 
 struct nfsd_createargs {
@@ -52,6 +53,11 @@ struct nfsd_renameargs {
 	unsigned int		tlen;
 };
 
+struct nfsd_readlinkargs {
+	struct svc_fh		fh;
+	char *			buffer;
+};
+	
 struct nfsd_linkargs {
 	struct svc_fh		ffh;
 	struct svc_fh		tfh;
@@ -73,6 +79,7 @@ struct nfsd_readdirargs {
 	struct svc_fh		fh;
 	__u32			cookie;
 	__u32			count;
+	__be32 *		buffer;
 };
 
 struct nfsd_stat {
@@ -94,7 +101,6 @@ struct nfsd_diropres  {
 struct nfsd_readlinkres {
 	__be32			status;
 	int			len;
-	struct page		*page;
 };
 
 struct nfsd_readres {
@@ -102,20 +108,17 @@ struct nfsd_readres {
 	struct svc_fh		fh;
 	unsigned long		count;
 	struct kstat		stat;
-	struct page		**pages;
 };
 
 struct nfsd_readdirres {
-	/* Components of the reply */
 	__be32			status;
 
 	int			count;
 
-	/* Used to encode the reply's entry list */
-	struct xdr_stream	xdr;
-	struct xdr_buf		dirlist;
 	struct readdir_cd	common;
-	unsigned int		cookie_offset;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
 };
 
 struct nfsd_statfsres {
@@ -141,37 +144,36 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_void(struct svc_rqst *, __be32 *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *);
+int nfssvc_encode_stat(struct svc_rqst *, __be32 *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
 
-bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset);
-int nfssvc_encode_entry(void *data, const char *name, int namlen,
-			loff_t offset, u64 ino, unsigned int d_type);
+int nfssvc_encode_entry(void *, const char *name,
+			int namlen, loff_t offset, u64 ino, unsigned int);
 
 void nfssvc_release_attrstat(struct svc_rqst *rqstp);
 void nfssvc_release_diropres(struct svc_rqst *rqstp);
 void nfssvc_release_readres(struct svc_rqst *rqstp);
 
 /* Helper functions for NFSv2 ACL code */
-bool svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp);
-bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status);
-bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-			 const struct svc_fh *fhp, const struct kstat *stat);
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
 
 #endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 03fe4e21306c..ae6fa6c9cb46 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -25,13 +25,14 @@ struct nfsd3_diropargs {
 
 struct nfsd3_accessargs {
 	struct svc_fh		fh;
-	__u32			access;
+	unsigned int		access;
 };
 
 struct nfsd3_readargs {
 	struct svc_fh		fh;
 	__u64			offset;
 	__u32			count;
+	int			vlen;
 };
 
 struct nfsd3_writeargs {
@@ -40,7 +41,7 @@ struct nfsd3_writeargs {
 	__u32			count;
 	int			stable;
 	__u32			len;
-	struct xdr_buf		payload;
+	struct kvec		first;
 };
 
 struct nfsd3_createargs {
@@ -70,6 +71,11 @@ struct nfsd3_renameargs {
 	unsigned int		tlen;
 };
 
+struct nfsd3_readlinkargs {
+	struct svc_fh		fh;
+	char *			buffer;
+};
+
 struct nfsd3_linkargs {
 	struct svc_fh		ffh;
 	struct svc_fh		tfh;
@@ -90,8 +96,10 @@ struct nfsd3_symlinkargs {
 struct nfsd3_readdirargs {
 	struct svc_fh		fh;
 	__u64			cookie;
+	__u32			dircount;
 	__u32			count;
 	__be32 *		verf;
+	__be32 *		buffer;
 };
 
 struct nfsd3_commitargs {
@@ -102,13 +110,13 @@ struct nfsd3_commitargs {
 
 struct nfsd3_getaclargs {
 	struct svc_fh		fh;
-	__u32			mask;
+	int			mask;
 };
 
 struct posix_acl;
 struct nfsd3_setaclargs {
 	struct svc_fh		fh;
-	__u32			mask;
+	int			mask;
 	struct posix_acl	*acl_access;
 	struct posix_acl	*acl_default;
 };
@@ -137,7 +145,6 @@ struct nfsd3_readlinkres {
 	__be32			status;
 	struct svc_fh		fh;
 	__u32			len;
-	struct page		**pages;
 };
 
 struct nfsd3_readres {
@@ -145,7 +152,6 @@ struct nfsd3_readres {
 	struct svc_fh		fh;
 	unsigned long		count;
 	__u32			eof;
-	struct page		**pages;
 };
 
 struct nfsd3_writeres {
@@ -169,17 +175,19 @@ struct nfsd3_linkres {
 };
 
 struct nfsd3_readdirres {
-	/* Components of the reply */
 	__be32			status;
 	struct svc_fh		fh;
+	/* Just to save kmalloc on every readdirplus entry (svc_fh is a
+	 * little large for the stack): */
+	struct svc_fh		scratch;
+	int			count;
 	__be32			verf[2];
 
-	/* Used to encode the reply's entry list */
-	struct xdr_stream	xdr;
-	struct xdr_buf		dirlist;
-	struct svc_fh		scratch;
 	struct readdir_cd	common;
-	unsigned int		cookie_offset;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+	__be32 *		offset1;
 	struct svc_rqst *	rqstp;
 
 };
@@ -265,50 +273,52 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_voidarg(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
 
 void nfs3svc_release_fhandle(struct svc_rqst *);
 void nfs3svc_release_fhandle2(struct svc_rqst *);
-
-void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset);
-int nfs3svc_encode_entry3(void *data, const char *name, int namlen,
-			  loff_t offset, u64 ino, unsigned int d_type);
-int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen,
-			      loff_t offset, u64 ino, unsigned int d_type);
+int nfs3svc_encode_entry(void *, const char *name,
+				int namlen, loff_t offset, u64 ino,
+				unsigned int);
+int nfs3svc_encode_entry_plus(void *, const char *name,
+				int namlen, loff_t offset, u64 ino,
+				unsigned int);
 /* Helper functions for NFSv3 ACL code */
-bool svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp);
-bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status);
-bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
-				const struct svc_fh *fhp);
+__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
+				struct svc_fh *fhp);
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
 
 #endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index a034b9b62137..679d40af1bbb 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -76,7 +76,12 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
 
 struct nfsd4_change_info {
 	u32		atomic;
+	bool		change_supported;
+	u32		before_ctime_sec;
+	u32		before_ctime_nsec;
 	u64		before_change;
+	u32		after_ctime_sec;
+	u32		after_ctime_nsec;
 	u64		after_change;
 };
 
@@ -247,8 +252,7 @@ struct nfsd4_listxattrs {
 
 struct nfsd4_open {
 	u32		op_claim_type;      /* request */
-	u32		op_fnamelen;
-	char *		op_fname;	    /* request - everything but CLAIM_PREV */
+	struct xdr_netobj op_fname;	    /* request - everything but CLAIM_PREV */
 	u32		op_delegate_type;   /* request - CLAIM_PREV only */
 	stateid_t       op_delegate_stateid; /* request - response */
 	u32		op_why_no_deleg;    /* response - DELEG_NONE_EXT only */
@@ -273,13 +277,11 @@ struct nfsd4_open {
 	bool		op_truncate;        /* used during processing */
 	bool		op_created;         /* used during processing */
 	struct nfs4_openowner *op_openowner; /* used during processing */
-	struct file	*op_filp;           /* used during processing */
 	struct nfs4_file *op_file;          /* used during processing */
 	struct nfs4_ol_stateid *op_stp;	    /* used during processing */
 	struct nfs4_clnt_odstate *op_odstate; /* used during processing */
 	struct nfs4_acl *op_acl;
 	struct xdr_netobj op_label;
-	struct svc_rqst *op_rqstp;
 };
 
 struct nfsd4_open_confirm {
@@ -303,10 +305,9 @@ struct nfsd4_read {
 	u32			rd_length;          /* request */
 	int			rd_vlen;
 	struct nfsd_file	*rd_nf;
-
+	
 	struct svc_rqst		*rd_rqstp;          /* response */
-	struct svc_fh		*rd_fhp;            /* response */
-	u32			rd_eof;             /* response */
+	struct svc_fh		*rd_fhp;             /* response */
 };
 
 struct nfsd4_readdir {
@@ -384,6 +385,13 @@ struct nfsd4_setclientid_confirm {
 	nfs4_verifier	sc_confirm;
 };
 
+struct nfsd4_saved_compoundargs {
+	__be32 *p;
+	__be32 *end;
+	int pagelen;
+	struct page **pagelist;
+};
+
 struct nfsd4_test_stateid_id {
 	__be32			ts_id_status;
 	stateid_t		ts_id_stateid;
@@ -411,7 +419,8 @@ struct nfsd4_write {
 	u64		wr_offset;          /* request */
 	u32		wr_stable_how;      /* request */
 	u32		wr_buflen;          /* request */
-	struct xdr_buf	wr_payload;         /* request */
+	struct kvec	wr_head;
+	struct page **	wr_pagelist;        /* request */
 
 	u32		wr_bytes_written;   /* response */
 	u32		wr_how_written;     /* response */
@@ -424,7 +433,7 @@ struct nfsd4_exchange_id {
 	u32		flags;
 	clientid_t	clientid;
 	u32		seqid;
-	u32		spa_how;
+	int		spa_how;
 	u32             spo_must_enforce[3];
 	u32             spo_must_allow[3];
 	struct xdr_netobj nii_domain;
@@ -534,13 +543,6 @@ struct nfsd42_write_res {
 	stateid_t		cb_stateid;
 };
 
-struct nfsd4_cb_offload {
-	struct nfsd4_callback	co_cb;
-	struct nfsd42_write_res	co_res;
-	__be32			co_nfserr;
-	struct knfsd_fh		co_fh;
-};
-
 struct nfsd4_copy {
 	/* request */
 	stateid_t		cp_src_stateid;
@@ -548,16 +550,18 @@ struct nfsd4_copy {
 	u64			cp_src_pos;
 	u64			cp_dst_pos;
 	u64			cp_count;
-	struct nl4_server	*cp_src;
+	struct nl4_server	cp_src;
+	bool			cp_intra;
 
-	unsigned long		cp_flags;
-#define NFSD4_COPY_F_STOPPED		(0)
-#define NFSD4_COPY_F_INTRA		(1)
-#define NFSD4_COPY_F_SYNCHRONOUS	(2)
-#define NFSD4_COPY_F_COMMITTED		(3)
+	/* both */
+	bool		cp_synchronous;
 
 	/* response */
 	struct nfsd42_write_res	cp_res;
+
+	/* for cb_offload */
+	struct nfsd4_callback	cp_cb;
+	__be32			nfserr;
 	struct knfsd_fh		fh;
 
 	struct nfs4_client      *cp_clp;
@@ -570,34 +574,13 @@ struct nfsd4_copy {
 	struct list_head	copies;
 	struct task_struct	*copy_task;
 	refcount_t		refcount;
+	bool			stopped;
 
-	struct nfsd4_ssc_umount_item *ss_nsui;
+	struct vfsmount		*ss_mnt;
 	struct nfs_fh		c_fh;
 	nfs4_stateid		stateid;
 };
-
-static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync)
-{
-	if (sync)
-		set_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
-	else
-		clear_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
-}
-
-static inline bool nfsd4_copy_is_sync(const struct nfsd4_copy *copy)
-{
-	return test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
-}
-
-static inline bool nfsd4_copy_is_async(const struct nfsd4_copy *copy)
-{
-	return !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
-}
-
-static inline bool nfsd4_ssc_is_inter(const struct nfsd4_copy *copy)
-{
-	return !test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
-}
+extern bool inter_copy_offload_enable;
 
 struct nfsd4_seek {
 	/* request */
@@ -622,20 +605,19 @@ struct nfsd4_offload_status {
 struct nfsd4_copy_notify {
 	/* request */
 	stateid_t		cpn_src_stateid;
-	struct nl4_server	*cpn_dst;
+	struct nl4_server	cpn_dst;
 
 	/* response */
 	stateid_t		cpn_cnr_stateid;
 	u64			cpn_sec;
 	u32			cpn_nsec;
-	struct nl4_server	*cpn_src;
+	struct nl4_server	cpn_src;
 };
 
 struct nfsd4_op {
-	u32					opnum;
+	int					opnum;
+	const struct nfsd4_operation *		opdesc;
 	__be32					status;
-	const struct nfsd4_operation		*opdesc;
-	struct nfs4_replay			*replay;
 	union nfsd4_op_u {
 		struct nfsd4_access		access;
 		struct nfsd4_close		close;
@@ -699,6 +681,7 @@ struct nfsd4_op {
 		struct nfsd4_listxattrs		listxattrs;
 		struct nfsd4_removexattr	removexattr;
 	} u;
+	struct nfs4_replay *			replay;
 };
 
 bool nfsd4_cache_this_op(struct nfsd4_op *);
@@ -713,29 +696,35 @@ struct svcxdr_tmpbuf {
 
 struct nfsd4_compoundargs {
 	/* scratch variables for XDR decode */
-	struct xdr_stream		*xdr;
+	__be32 *			p;
+	__be32 *			end;
+	struct page **			pagelist;
+	int				pagelen;
+	bool				tail;
+	__be32				tmp[8];
+	__be32 *			tmpp;
 	struct svcxdr_tmpbuf		*to_free;
+
 	struct svc_rqst			*rqstp;
 
-	char *				tag;
 	u32				taglen;
+	char *				tag;
 	u32				minorversion;
-	u32				client_opcnt;
 	u32				opcnt;
 	struct nfsd4_op			*ops;
 	struct nfsd4_op			iops[8];
+	int				cachetype;
 };
 
 struct nfsd4_compoundres {
 	/* scratch variables for XDR encode */
-	struct xdr_stream		*xdr;
+	struct xdr_stream		xdr;
 	struct svc_rqst *		rqstp;
 
-	__be32				*statusp;
-	char *				tag;
 	u32				taglen;
+	char *				tag;
 	u32				opcnt;
-
+	__be32 *			tagp; /* tag, opcount encode location */
 	struct nfsd4_compound_state	cstate;
 };
 
@@ -778,16 +767,24 @@ static inline void
 set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 {
 	BUG_ON(!fhp->fh_pre_saved);
-	cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
+	cinfo->atomic = (u32)fhp->fh_post_saved;
+	cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry));
 
 	cinfo->before_change = fhp->fh_pre_change;
 	cinfo->after_change = fhp->fh_post_change;
+	cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+	cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+	cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+	cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+
 }
 
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs4svc_decode_voidarg(struct svc_rqst *, __be32 *);
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
@@ -888,19 +885,13 @@ struct nfsd4_operation {
 	u32 op_flags;
 	char *op_name;
 	/* Try to get response size before operation */
-	u32 (*op_rsize_bop)(const struct svc_rqst *rqstp,
-			const struct nfsd4_op *op);
+	u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
 	void (*op_get_currentstateid)(struct nfsd4_compound_state *,
 			union nfsd4_op_u *);
 	void (*op_set_currentstateid)(struct nfsd4_compound_state *,
 			union nfsd4_op_u *);
 };
 
-struct nfsd4_cb_recall_any {
-	struct nfsd4_callback	ra_cb;
-	u32			ra_keep;
-	u32			ra_bmval[1];
-};
 
 #endif
 
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 0d39af1b00a0..547cf07cf4e0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,9 +48,3 @@
 #define NFS4_dec_cb_offload_sz		(cb_compound_dec_hdr_sz  +      \
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
-#define NFS4_enc_cb_recall_any_sz	(cb_compound_enc_hdr_sz +       \
-					cb_sequence_enc_sz +            \
-					1 + 1 + 1)
-#define NFS4_dec_cb_recall_any_sz	(cb_compound_dec_hdr_sz  +      \
-					cb_sequence_dec_sz +            \
-					op_dec_sz)
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index fa81c59a2ad4..e45ca6ecba95 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -150,7 +150,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 		return;
 	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
 
-	fsnotify_group_lock(dnotify_group);
+	mutex_lock(&dnotify_group->mark_mutex);
 
 	spin_lock(&fsn_mark->lock);
 	prev = &dn_mark->dn;
@@ -173,7 +173,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 		free = true;
 	}
 
-	fsnotify_group_unlock(dnotify_group);
+	mutex_unlock(&dnotify_group->mark_mutex);
 
 	if (free)
 		fsnotify_free_mark(fsn_mark);
@@ -196,7 +196,7 @@ static __u32 convert_arg(unsigned long arg)
 	if (arg & DN_ATTRIB)
 		new_mask |= FS_ATTRIB;
 	if (arg & DN_RENAME)
-		new_mask |= FS_RENAME;
+		new_mask |= FS_DN_RENAME;
 	if (arg & DN_CREATE)
 		new_mask |= (FS_CREATE | FS_MOVED_TO);
 
@@ -306,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	new_dn_mark->dn = NULL;
 
 	/* this is needed to prevent the fcntl/close race described below */
-	fsnotify_group_lock(dnotify_group);
+	mutex_lock(&dnotify_group->mark_mutex);
 
 	/* add the new_fsn_mark or find an old one. */
 	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
@@ -316,7 +316,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	} else {
 		error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0);
 		if (error) {
-			fsnotify_group_unlock(dnotify_group);
+			mutex_unlock(&dnotify_group->mark_mutex);
 			goto out_err;
 		}
 		spin_lock(&new_fsn_mark->lock);
@@ -327,7 +327,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	}
 
 	rcu_read_lock();
-	f = lookup_fd_rcu(fd);
+	f = fcheck(fd);
 	rcu_read_unlock();
 
 	/* if (f != filp) means that we lost a race and another task/thread
@@ -365,7 +365,7 @@ out:
 
 	if (destroy)
 		fsnotify_detach_mark(fsn_mark);
-	fsnotify_group_unlock(dnotify_group);
+	mutex_unlock(&dnotify_group->mark_mutex);
 	if (destroy)
 		fsnotify_free_mark(fsn_mark);
 	fsnotify_put_mark(fsn_mark);
@@ -383,8 +383,7 @@ static int __init dnotify_init(void)
 					  SLAB_PANIC|SLAB_ACCOUNT);
 	dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
 
-	dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops,
-					     FSNOTIFY_GROUP_NOFS);
+	dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
 	if (IS_ERR(dnotify_group))
 		panic("unable to allocate fsnotify group for dnotify\n");
 	return 0;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index a2a15bc4df28..c3af99e94f1d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -14,33 +14,20 @@
 #include <linux/audit.h>
 #include <linux/sched/mm.h>
 #include <linux/statfs.h>
-#include <linux/stringhash.h>
 
 #include "fanotify.h"
 
-static bool fanotify_path_equal(const struct path *p1, const struct path *p2)
+static bool fanotify_path_equal(struct path *p1, struct path *p2)
 {
 	return p1->mnt == p2->mnt && p1->dentry == p2->dentry;
 }
 
-static unsigned int fanotify_hash_path(const struct path *path)
-{
-	return hash_ptr(path->dentry, FANOTIFY_EVENT_HASH_BITS) ^
-		hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS);
-}
-
 static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
 				       __kernel_fsid_t *fsid2)
 {
 	return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
 }
 
-static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid)
-{
-	return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^
-		hash_32(fsid->val[1], FANOTIFY_EVENT_HASH_BITS);
-}
-
 static bool fanotify_fh_equal(struct fanotify_fh *fh1,
 			      struct fanotify_fh *fh2)
 {
@@ -51,16 +38,6 @@ static bool fanotify_fh_equal(struct fanotify_fh *fh1,
 		!memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len);
 }
 
-static unsigned int fanotify_hash_fh(struct fanotify_fh *fh)
-{
-	long salt = (long)fh->type | (long)fh->len << 8;
-
-	/*
-	 * full_name_hash() works long by long, so it handles fh buf optimally.
-	 */
-	return full_name_hash((void *)salt, fanotify_fh_buf(fh), fh->len);
-}
-
 static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1,
 				     struct fanotify_fid_event *ffe2)
 {
@@ -76,10 +53,8 @@ static bool fanotify_info_equal(struct fanotify_info *info1,
 				struct fanotify_info *info2)
 {
 	if (info1->dir_fh_totlen != info2->dir_fh_totlen ||
-	    info1->dir2_fh_totlen != info2->dir2_fh_totlen ||
 	    info1->file_fh_totlen != info2->file_fh_totlen ||
-	    info1->name_len != info2->name_len ||
-	    info1->name2_len != info2->name2_len)
+	    info1->name_len != info2->name_len)
 		return false;
 
 	if (info1->dir_fh_totlen &&
@@ -87,24 +62,14 @@ static bool fanotify_info_equal(struct fanotify_info *info1,
 			       fanotify_info_dir_fh(info2)))
 		return false;
 
-	if (info1->dir2_fh_totlen &&
-	    !fanotify_fh_equal(fanotify_info_dir2_fh(info1),
-			       fanotify_info_dir2_fh(info2)))
-		return false;
-
 	if (info1->file_fh_totlen &&
 	    !fanotify_fh_equal(fanotify_info_file_fh(info1),
 			       fanotify_info_file_fh(info2)))
 		return false;
 
-	if (info1->name_len &&
-	    memcmp(fanotify_info_name(info1), fanotify_info_name(info2),
-		   info1->name_len))
-		return false;
-
-	return !info1->name2_len ||
-		!memcmp(fanotify_info_name2(info1), fanotify_info_name2(info2),
-			info1->name2_len);
+	return !info1->name_len ||
+		!memcmp(fanotify_info_name(info1), fanotify_info_name(info2),
+			info1->name_len);
 }
 
 static bool fanotify_name_event_equal(struct fanotify_name_event *fne1,
@@ -123,22 +88,16 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1,
 	return fanotify_info_equal(info1, info2);
 }
 
-static bool fanotify_error_event_equal(struct fanotify_error_event *fee1,
-				       struct fanotify_error_event *fee2)
+static bool fanotify_should_merge(struct fsnotify_event *old_fsn,
+				  struct fsnotify_event *new_fsn)
 {
-	/* Error events against the same file system are always merged. */
-	if (!fanotify_fsid_equal(&fee1->fsid, &fee2->fsid))
-		return false;
+	struct fanotify_event *old, *new;
 
-	return true;
-}
+	pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
+	old = FANOTIFY_E(old_fsn);
+	new = FANOTIFY_E(new_fsn);
 
-static bool fanotify_should_merge(struct fanotify_event *old,
-				  struct fanotify_event *new)
-{
-	pr_debug("%s: old=%p new=%p\n", __func__, old, new);
-
-	if (old->hash != new->hash ||
+	if (old_fsn->objectid != new_fsn->objectid ||
 	    old->type != new->type || old->pid != new->pid)
 		return false;
 
@@ -153,13 +112,6 @@ static bool fanotify_should_merge(struct fanotify_event *old,
 	if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR))
 		return false;
 
-	/*
-	 * FAN_RENAME event is reported with special info record types,
-	 * so we cannot merge it with other events.
-	 */
-	if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME))
-		return false;
-
 	switch (old->type) {
 	case FANOTIFY_EVENT_TYPE_PATH:
 		return fanotify_path_equal(fanotify_event_path(old),
@@ -170,9 +122,6 @@ static bool fanotify_should_merge(struct fanotify_event *old,
 	case FANOTIFY_EVENT_TYPE_FID_NAME:
 		return fanotify_name_event_equal(FANOTIFY_NE(old),
 						 FANOTIFY_NE(new));
-	case FANOTIFY_EVENT_TYPE_FS_ERROR:
-		return fanotify_error_event_equal(FANOTIFY_EE(old),
-						  FANOTIFY_EE(new));
 	default:
 		WARN_ON_ONCE(1);
 	}
@@ -184,16 +133,14 @@ static bool fanotify_should_merge(struct fanotify_event *old,
 #define FANOTIFY_MAX_MERGE_EVENTS 128
 
 /* and the list better be locked by something too! */
-static int fanotify_merge(struct fsnotify_group *group,
-			  struct fsnotify_event *event)
+static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
 {
-	struct fanotify_event *old, *new = FANOTIFY_E(event);
-	unsigned int bucket = fanotify_event_hash_bucket(group, new);
-	struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket];
+	struct fsnotify_event *test_event;
+	struct fanotify_event *new;
 	int i = 0;
 
-	pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
-		 group, event, bucket);
+	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
+	new = FANOTIFY_E(event);
 
 	/*
 	 * Don't merge a permission event with any other event so that we know
@@ -203,15 +150,11 @@ static int fanotify_merge(struct fsnotify_group *group,
 	if (fanotify_is_perm_event(new->mask))
 		return 0;
 
-	hlist_for_each_entry(old, hlist, merge_list) {
+	list_for_each_entry_reverse(test_event, list, list) {
 		if (++i > FANOTIFY_MAX_MERGE_EVENTS)
 			break;
-		if (fanotify_should_merge(old, new)) {
-			old->mask |= new->mask;
-
-			if (fanotify_is_error_event(old->mask))
-				FANOTIFY_EE(old)->err_count++;
-
+		if (fanotify_should_merge(test_event, event)) {
+			FANOTIFY_E(test_event)->mask |= new->mask;
 			return 1;
 		}
 	}
@@ -247,11 +190,8 @@ static int fanotify_get_response(struct fsnotify_group *group,
 			return ret;
 		}
 		/* Event not yet reported? Just remove it. */
-		if (event->state == FAN_EVENT_INIT) {
+		if (event->state == FAN_EVENT_INIT)
 			fsnotify_remove_queued_event(group, &event->fae.fse);
-			/* Permission events are not supposed to be hashed */
-			WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list));
-		}
 		/*
 		 * Event may be also answered in case signal delivery raced
 		 * with wakeup. In that case we have nothing to do besides
@@ -291,17 +231,15 @@ out:
  */
 static u32 fanotify_group_event_mask(struct fsnotify_group *group,
 				     struct fsnotify_iter_info *iter_info,
-				     u32 *match_mask, u32 event_mask,
-				     const void *data, int data_type,
-				     struct inode *dir)
+				     u32 event_mask, const void *data,
+				     int data_type, struct inode *dir)
 {
-	__u32 marks_mask = 0, marks_ignore_mask = 0;
+	__u32 marks_mask = 0, marks_ignored_mask = 0;
 	__u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS |
 				     FANOTIFY_EVENT_FLAGS;
 	const struct path *path = fsnotify_data_path(data, data_type);
 	unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
 	struct fsnotify_mark *mark;
-	bool ondir = event_mask & FAN_ONDIR;
 	int type;
 
 	pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
@@ -316,30 +254,37 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
 			return 0;
 	} else if (!(fid_mode & FAN_REPORT_FID)) {
 		/* Do we have a directory inode to report? */
-		if (!dir && !ondir)
+		if (!dir && !(event_mask & FS_ISDIR))
 			return 0;
 	}
 
-	fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
-		/*
-		 * Apply ignore mask depending on event flags in ignore mask.
-		 */
-		marks_ignore_mask |=
-			fsnotify_effective_ignore_mask(mark, ondir, type);
+	fsnotify_foreach_obj_type(type) {
+		if (!fsnotify_iter_should_report_type(iter_info, type))
+			continue;
+		mark = iter_info->marks[type];
+
+		/* Apply ignore mask regardless of ISDIR and ON_CHILD flags */
+		marks_ignored_mask |= mark->ignored_mask;
 
 		/*
-		 * Send the event depending on event flags in mark mask.
+		 * If the event is on dir and this mark doesn't care about
+		 * events on dir, don't send it!
 		 */
-		if (!fsnotify_mask_applicable(mark->mask, ondir, type))
+		if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR))
+			continue;
+
+		/*
+		 * If the event is on a child and this mark is on a parent not
+		 * watching children, don't send it!
+		 */
+		if (type == FSNOTIFY_OBJ_TYPE_PARENT &&
+		    !(mark->mask & FS_EVENT_ON_CHILD))
 			continue;
 
 		marks_mask |= mark->mask;
-
-		/* Record the mark types of this group that matched the event */
-		*match_mask |= 1U << type;
 	}
 
-	test_mask = event_mask & marks_mask & ~marks_ignore_mask;
+	test_mask = event_mask & marks_mask & ~marks_ignored_mask;
 
 	/*
 	 * For dirent modification events (create/delete/move) that do not carry
@@ -374,23 +319,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
 static int fanotify_encode_fh_len(struct inode *inode)
 {
 	int dwords = 0;
-	int fh_len;
 
 	if (!inode)
 		return 0;
 
 	exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
-	fh_len = dwords << 2;
 
-	/*
-	 * struct fanotify_error_event might be preallocated and is
-	 * limited to MAX_HANDLE_SZ.  This should never happen, but
-	 * safeguard by forcing an invalid file handle.
-	 */
-	if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ))
-		return 0;
-
-	return fh_len;
+	return dwords << 2;
 }
 
 /*
@@ -400,8 +335,7 @@ static int fanotify_encode_fh_len(struct inode *inode)
  * Return 0 on failure to encode.
  */
 static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
-			      unsigned int fh_len, unsigned int *hash,
-			      gfp_t gfp)
+			      unsigned int fh_len, gfp_t gfp)
 {
 	int dwords, type = 0;
 	char *ext_buf = NULL;
@@ -411,21 +345,15 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
 	fh->type = FILEID_ROOT;
 	fh->len = 0;
 	fh->flags = 0;
-
-	/*
-	 * Invalid FHs are used by FAN_FS_ERROR for errors not
-	 * linked to any inode. The f_handle won't be reported
-	 * back to userspace.
-	 */
 	if (!inode)
-		goto out;
+		return 0;
 
 	/*
 	 * !gpf means preallocated variable size fh, but fh_len could
 	 * be zero in that case if encoding fh len failed.
 	 */
 	err = -ENOENT;
-	if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4) || fh_len > MAX_HANDLE_SZ)
+	if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4))
 		goto out_err;
 
 	/* No external buffer in a variable size allocated fh */
@@ -450,14 +378,6 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
 	fh->type = type;
 	fh->len = fh_len;
 
-out:
-	/*
-	 * Mix fh into event merge key.  Hash might be NULL in case of
-	 * unhashed FID events (i.e. FAN_FS_ERROR).
-	 */
-	if (hash)
-		*hash ^= fanotify_hash_fh(fh);
-
 	return FANOTIFY_FH_HDR_LEN + fh_len;
 
 out_err:
@@ -472,41 +392,17 @@ out_err:
 }
 
 /*
- * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for
- * some events and the fid of the parent for create/delete/move events.
- *
- * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported
- * also in create/delete/move events in addition to the fid of the parent
- * and the name of the child.
- */
-static inline bool fanotify_report_child_fid(unsigned int fid_mode, u32 mask)
-{
-	if (mask & ALL_FSNOTIFY_DIRENT_EVENTS)
-		return (fid_mode & FAN_REPORT_TARGET_FID);
-
-	return (fid_mode & FAN_REPORT_FID) && !(mask & FAN_ONDIR);
-}
-
-/*
- * The inode to use as identifier when reporting fid depends on the event
- * and the group flags.
- *
- * With the group flag FAN_REPORT_TARGET_FID, always report the child fid.
- *
- * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory
- * fid on dirent events and the child fid otherwise.
- *
+ * The inode to use as identifier when reporting fid depends on the event.
+ * Report the modified directory inode on dirent modification events.
+ * Report the "victim" inode otherwise.
  * For example:
- * FS_ATTRIB reports the child fid even if reported on a watched parent.
- * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID.
- *       and reports the created child fid with FAN_REPORT_TARGET_FID.
+ * FS_ATTRIB reports the child inode even if reported on a watched parent.
+ * FS_CREATE reports the modified dir inode and not the created inode.
  */
 static struct inode *fanotify_fid_inode(u32 event_mask, const void *data,
-					int data_type, struct inode *dir,
-					unsigned int fid_mode)
+					int data_type, struct inode *dir)
 {
-	if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) &&
-	    !(fid_mode & FAN_REPORT_TARGET_FID))
+	if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
 		return dir;
 
 	return fsnotify_data_inode(data, data_type);
@@ -528,14 +424,13 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data,
 	if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
 		return dir;
 
-	if (inode && S_ISDIR(inode->i_mode))
+	if (S_ISDIR(inode->i_mode))
 		return inode;
 
 	return dir;
 }
 
 static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
-							unsigned int *hash,
 							gfp_t gfp)
 {
 	struct fanotify_path_event *pevent;
@@ -546,7 +441,6 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
 
 	pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH;
 	pevent->path = *path;
-	*hash ^= fanotify_hash_path(path);
 	path_get(path);
 
 	return &pevent->fae;
@@ -572,7 +466,6 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
 
 static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
 						       __kernel_fsid_t *fsid,
-						       unsigned int *hash,
 						       gfp_t gfp)
 {
 	struct fanotify_fid_event *ffe;
@@ -583,153 +476,78 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
 
 	ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
 	ffe->fsid = *fsid;
-	*hash ^= fanotify_hash_fsid(fsid);
 	fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id),
-			   hash, gfp);
+			   gfp);
 
 	return &ffe->fae;
 }
 
-static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir,
+static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
 							__kernel_fsid_t *fsid,
-							const struct qstr *name,
+							const struct qstr *file_name,
 							struct inode *child,
-							struct dentry *moved,
-							unsigned int *hash,
 							gfp_t gfp)
 {
 	struct fanotify_name_event *fne;
 	struct fanotify_info *info;
 	struct fanotify_fh *dfh, *ffh;
-	struct inode *dir2 = moved ? d_inode(moved->d_parent) : NULL;
-	const struct qstr *name2 = moved ? &moved->d_name : NULL;
-	unsigned int dir_fh_len = fanotify_encode_fh_len(dir);
-	unsigned int dir2_fh_len = fanotify_encode_fh_len(dir2);
+	unsigned int dir_fh_len = fanotify_encode_fh_len(id);
 	unsigned int child_fh_len = fanotify_encode_fh_len(child);
-	unsigned long name_len = name ? name->len : 0;
-	unsigned long name2_len = name2 ? name2->len : 0;
-	unsigned int len, size;
+	unsigned int size;
 
-	/* Reserve terminating null byte even for empty name */
-	size = sizeof(*fne) + name_len + name2_len + 2;
-	if (dir_fh_len)
-		size += FANOTIFY_FH_HDR_LEN + dir_fh_len;
-	if (dir2_fh_len)
-		size += FANOTIFY_FH_HDR_LEN + dir2_fh_len;
+	size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len;
 	if (child_fh_len)
 		size += FANOTIFY_FH_HDR_LEN + child_fh_len;
+	if (file_name)
+		size += file_name->len + 1;
 	fne = kmalloc(size, gfp);
 	if (!fne)
 		return NULL;
 
 	fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME;
 	fne->fsid = *fsid;
-	*hash ^= fanotify_hash_fsid(fsid);
 	info = &fne->info;
 	fanotify_info_init(info);
-	if (dir_fh_len) {
-		dfh = fanotify_info_dir_fh(info);
-		len = fanotify_encode_fh(dfh, dir, dir_fh_len, hash, 0);
-		fanotify_info_set_dir_fh(info, len);
-	}
-	if (dir2_fh_len) {
-		dfh = fanotify_info_dir2_fh(info);
-		len = fanotify_encode_fh(dfh, dir2, dir2_fh_len, hash, 0);
-		fanotify_info_set_dir2_fh(info, len);
-	}
+	dfh = fanotify_info_dir_fh(info);
+	info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, 0);
 	if (child_fh_len) {
 		ffh = fanotify_info_file_fh(info);
-		len = fanotify_encode_fh(ffh, child, child_fh_len, hash, 0);
-		fanotify_info_set_file_fh(info, len);
-	}
-	if (name_len) {
-		fanotify_info_copy_name(info, name);
-		*hash ^= full_name_hash((void *)name_len, name->name, name_len);
-	}
-	if (name2_len) {
-		fanotify_info_copy_name2(info, name2);
-		*hash ^= full_name_hash((void *)name2_len, name2->name,
-					name2_len);
+		info->file_fh_totlen = fanotify_encode_fh(ffh, child, child_fh_len, 0);
 	}
+	if (file_name)
+		fanotify_info_copy_name(info, file_name);
 
-	pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
-		 __func__, size, dir_fh_len, child_fh_len,
+	pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
+		 __func__, id->i_ino, size, dir_fh_len, child_fh_len,
 		 info->name_len, info->name_len, fanotify_info_name(info));
 
-	if (dir2_fh_len) {
-		pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n",
-			 __func__, dir2_fh_len, info->name2_len,
-			 info->name2_len, fanotify_info_name2(info));
-	}
-
 	return &fne->fae;
 }
 
-static struct fanotify_event *fanotify_alloc_error_event(
-						struct fsnotify_group *group,
-						__kernel_fsid_t *fsid,
-						const void *data, int data_type,
-						unsigned int *hash)
-{
-	struct fs_error_report *report =
-			fsnotify_data_error_report(data, data_type);
-	struct inode *inode;
-	struct fanotify_error_event *fee;
-	int fh_len;
-
-	if (WARN_ON_ONCE(!report))
-		return NULL;
-
-	fee = mempool_alloc(&group->fanotify_data.error_events_pool, GFP_NOFS);
-	if (!fee)
-		return NULL;
-
-	fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR;
-	fee->error = report->error;
-	fee->err_count = 1;
-	fee->fsid = *fsid;
-
-	inode = report->inode;
-	fh_len = fanotify_encode_fh_len(inode);
-
-	/* Bad fh_len. Fallback to using an invalid fh. Should never happen. */
-	if (!fh_len && inode)
-		inode = NULL;
-
-	fanotify_encode_fh(&fee->object_fh, inode, fh_len, NULL, 0);
-
-	*hash ^= fanotify_hash_fsid(fsid);
-
-	return &fee->fae;
-}
-
-static struct fanotify_event *fanotify_alloc_event(
-				struct fsnotify_group *group,
-				u32 mask, const void *data, int data_type,
-				struct inode *dir, const struct qstr *file_name,
-				__kernel_fsid_t *fsid, u32 match_mask)
+static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+						   u32 mask, const void *data,
+						   int data_type, struct inode *dir,
+						   const struct qstr *file_name,
+						   __kernel_fsid_t *fsid)
 {
 	struct fanotify_event *event = NULL;
 	gfp_t gfp = GFP_KERNEL_ACCOUNT;
-	unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
-	struct inode *id = fanotify_fid_inode(mask, data, data_type, dir,
-					      fid_mode);
+	struct inode *id = fanotify_fid_inode(mask, data, data_type, dir);
 	struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
 	const struct path *path = fsnotify_data_path(data, data_type);
+	unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
 	struct mem_cgroup *old_memcg;
-	struct dentry *moved = NULL;
 	struct inode *child = NULL;
 	bool name_event = false;
-	unsigned int hash = 0;
-	bool ondir = mask & FAN_ONDIR;
-	struct pid *pid;
 
 	if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
 		/*
-		 * For certain events and group flags, report the child fid
+		 * With both flags FAN_REPORT_DIR_FID and FAN_REPORT_FID, we
+		 * report the child fid for events reported on a non-dir child
 		 * in addition to reporting the parent fid and maybe child name.
 		 */
-		if (fanotify_report_child_fid(fid_mode, mask) && id != dirid)
+		if ((fid_mode & FAN_REPORT_FID) &&
+		    id != dirid && !(mask & FAN_ONDIR))
 			child = id;
 
 		id = dirid;
@@ -750,41 +568,10 @@ static struct fanotify_event *fanotify_alloc_event(
 		if (!(fid_mode & FAN_REPORT_NAME)) {
 			name_event = !!child;
 			file_name = NULL;
-		} else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) {
+		} else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
+			   !(mask & FAN_ONDIR)) {
 			name_event = true;
 		}
-
-		/*
-		 * In the special case of FAN_RENAME event, use the match_mask
-		 * to determine if we need to report only the old parent+name,
-		 * only the new parent+name or both.
-		 * 'dirid' and 'file_name' are the old parent+name and
-		 * 'moved' has the new parent+name.
-		 */
-		if (mask & FAN_RENAME) {
-			bool report_old, report_new;
-
-			if (WARN_ON_ONCE(!match_mask))
-				return NULL;
-
-			/* Report both old and new parent+name if sb watching */
-			report_old = report_new =
-				match_mask & (1U << FSNOTIFY_ITER_TYPE_SB);
-			report_old |=
-				match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE);
-			report_new |=
-				match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE2);
-
-			if (!report_old) {
-				/* Do not report old parent+name */
-				dirid = NULL;
-				file_name = NULL;
-			}
-			if (report_new) {
-				/* Report new parent+name */
-				moved = fsnotify_data_dentry(data, data_type);
-			}
-		}
 	}
 
 	/*
@@ -803,30 +590,28 @@ static struct fanotify_event *fanotify_alloc_event(
 
 	if (fanotify_is_perm_event(mask)) {
 		event = fanotify_alloc_perm_event(path, gfp);
-	} else if (fanotify_is_error_event(mask)) {
-		event = fanotify_alloc_error_event(group, fsid, data,
-						   data_type, &hash);
-	} else if (name_event && (file_name || moved || child)) {
-		event = fanotify_alloc_name_event(dirid, fsid, file_name, child,
-						  moved, &hash, gfp);
+	} else if (name_event && (file_name || child)) {
+		event = fanotify_alloc_name_event(id, fsid, file_name, child,
+						  gfp);
 	} else if (fid_mode) {
-		event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
+		event = fanotify_alloc_fid_event(id, fsid, gfp);
 	} else {
-		event = fanotify_alloc_path_event(path, &hash, gfp);
+		event = fanotify_alloc_path_event(path, gfp);
 	}
 
 	if (!event)
 		goto out;
 
+	/*
+	 * Use the victim inode instead of the watching inode as the id for
+	 * event queue, so event reported on parent is merged with event
+	 * reported on child when both directory and child watches exist.
+	 */
+	fanotify_init_event(event, (unsigned long)id, mask);
 	if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
-		pid = get_pid(task_pid(current));
+		event->pid = get_pid(task_pid(current));
 	else
-		pid = get_pid(task_tgid(current));
-
-	/* Mix event info, FAN_ONDIR flag and pid into event merge key */
-	hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS);
-	fanotify_init_event(event, hash, mask);
-	event->pid = pid;
+		event->pid = get_pid(task_tgid(current));
 
 out:
 	set_active_memcg(old_memcg);
@@ -840,14 +625,16 @@ out:
  */
 static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
 {
-	struct fsnotify_mark *mark;
 	int type;
 	__kernel_fsid_t fsid = {};
 
-	fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+	fsnotify_foreach_obj_type(type) {
 		struct fsnotify_mark_connector *conn;
 
-		conn = READ_ONCE(mark->connector);
+		if (!fsnotify_iter_should_report_type(iter_info, type))
+			continue;
+
+		conn = READ_ONCE(iter_info->marks[type]->connector);
 		/* Mark is just getting destroyed or created? */
 		if (!conn)
 			continue;
@@ -864,27 +651,6 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
 	return fsid;
 }
 
-/*
- * Add an event to hash table for faster merge.
- */
-static void fanotify_insert_event(struct fsnotify_group *group,
-				  struct fsnotify_event *fsn_event)
-{
-	struct fanotify_event *event = FANOTIFY_E(fsn_event);
-	unsigned int bucket = fanotify_event_hash_bucket(group, event);
-	struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket];
-
-	assert_spin_locked(&group->notification_lock);
-
-	if (!fanotify_is_hashed_event(event->mask))
-		return;
-
-	pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
-		 group, event, bucket);
-
-	hlist_add_head(&event->merge_list, hlist);
-}
-
 static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 				 const void *data, int data_type,
 				 struct inode *dir,
@@ -895,7 +661,6 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	struct fanotify_event *event;
 	struct fsnotify_event *fsn_event;
 	__kernel_fsid_t fsid = {};
-	u32 match_mask = 0;
 
 	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
 	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
@@ -916,18 +681,15 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
 	BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
 	BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
-	BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
-	BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
 
-	mask = fanotify_group_event_mask(group, iter_info, &match_mask,
-					 mask, data, data_type, dir);
+	mask = fanotify_group_event_mask(group, iter_info, mask, data,
+					 data_type, dir);
 	if (!mask)
 		return 0;
 
-	pr_debug("%s: group=%p mask=%x report_mask=%x\n", __func__,
-		 group, mask, match_mask);
+	pr_debug("%s: group=%p mask=%x\n", __func__, group, mask);
 
 	if (fanotify_is_perm_event(mask)) {
 		/*
@@ -946,7 +708,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	}
 
 	event = fanotify_alloc_event(group, mask, data, data_type, dir,
-				     file_name, &fsid, match_mask);
+				     file_name, &fsid);
 	ret = -ENOMEM;
 	if (unlikely(!event)) {
 		/*
@@ -959,8 +721,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	}
 
 	fsn_event = &event->fse;
-	ret = fsnotify_insert_event(group, fsn_event, fanotify_merge,
-				    fanotify_insert_event);
+	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
 	if (ret) {
 		/* Permission events shouldn't be merged */
 		BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
@@ -981,13 +742,11 @@ finish:
 
 static void fanotify_free_group_priv(struct fsnotify_group *group)
 {
-	kfree(group->fanotify_data.merge_hash);
-	if (group->fanotify_data.ucounts)
-		dec_ucount(group->fanotify_data.ucounts,
-			   UCOUNT_FANOTIFY_GROUPS);
+	struct user_struct *user;
 
-	if (mempool_initialized(&group->fanotify_data.error_events_pool))
-		mempool_exit(&group->fanotify_data.error_events_pool);
+	user = group->fanotify_data.user;
+	atomic_dec(&user->fanotify_listeners);
+	free_uid(user);
 }
 
 static void fanotify_free_path_event(struct fanotify_event *event)
@@ -1016,16 +775,7 @@ static void fanotify_free_name_event(struct fanotify_event *event)
 	kfree(FANOTIFY_NE(event));
 }
 
-static void fanotify_free_error_event(struct fsnotify_group *group,
-				      struct fanotify_event *event)
-{
-	struct fanotify_error_event *fee = FANOTIFY_EE(event);
-
-	mempool_free(fee, &group->fanotify_data.error_events_pool);
-}
-
-static void fanotify_free_event(struct fsnotify_group *group,
-				struct fsnotify_event *fsn_event)
+static void fanotify_free_event(struct fsnotify_event *fsn_event)
 {
 	struct fanotify_event *event;
 
@@ -1047,21 +797,11 @@ static void fanotify_free_event(struct fsnotify_group *group,
 	case FANOTIFY_EVENT_TYPE_OVERFLOW:
 		kfree(event);
 		break;
-	case FANOTIFY_EVENT_TYPE_FS_ERROR:
-		fanotify_free_error_event(group, event);
-		break;
 	default:
 		WARN_ON_ONCE(1);
 	}
 }
 
-static void fanotify_freeing_mark(struct fsnotify_mark *mark,
-				  struct fsnotify_group *group)
-{
-	if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS))
-		dec_ucount(group->fanotify_data.ucounts, UCOUNT_FANOTIFY_MARKS);
-}
-
 static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
 {
 	kmem_cache_free(fanotify_mark_cache, fsn_mark);
@@ -1071,6 +811,5 @@ const struct fsnotify_ops fanotify_fsnotify_ops = {
 	.handle_event = fanotify_handle_event,
 	.free_group_priv = fanotify_free_group_priv,
 	.free_event = fanotify_free_event,
-	.freeing_mark = fanotify_freeing_mark,
 	.free_mark = fanotify_free_mark,
 };
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 57f51a9a3015..896c819a1786 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -3,7 +3,6 @@
 #include <linux/path.h>
 #include <linux/slab.h>
 #include <linux/exportfs.h>
-#include <linux/hashtable.h>
 
 extern struct kmem_cache *fanotify_mark_cache;
 extern struct kmem_cache *fanotify_fid_event_cachep;
@@ -40,45 +39,15 @@ struct fanotify_fh {
 struct fanotify_info {
 	/* size of dir_fh/file_fh including fanotify_fh hdr size */
 	u8 dir_fh_totlen;
-	u8 dir2_fh_totlen;
 	u8 file_fh_totlen;
 	u8 name_len;
-	u8 name2_len;
-	u8 pad[3];
+	u8 pad;
 	unsigned char buf[];
 	/*
 	 * (struct fanotify_fh) dir_fh starts at buf[0]
-	 * (optional) dir2_fh starts at buf[dir_fh_totlen]
-	 * (optional) file_fh starts at buf[dir_fh_totlen + dir2_fh_totlen]
-	 * name starts at buf[dir_fh_totlen + dir2_fh_totlen + file_fh_totlen]
-	 * ...
+	 * (optional) file_fh starts at buf[dir_fh_totlen]
+	 * name starts at buf[dir_fh_totlen + file_fh_totlen]
 	 */
-#define FANOTIFY_DIR_FH_SIZE(info)	((info)->dir_fh_totlen)
-#define FANOTIFY_DIR2_FH_SIZE(info)	((info)->dir2_fh_totlen)
-#define FANOTIFY_FILE_FH_SIZE(info)	((info)->file_fh_totlen)
-#define FANOTIFY_NAME_SIZE(info)	((info)->name_len + 1)
-#define FANOTIFY_NAME2_SIZE(info)	((info)->name2_len + 1)
-
-#define FANOTIFY_DIR_FH_OFFSET(info)	0
-#define FANOTIFY_DIR2_FH_OFFSET(info) \
-	(FANOTIFY_DIR_FH_OFFSET(info) + FANOTIFY_DIR_FH_SIZE(info))
-#define FANOTIFY_FILE_FH_OFFSET(info) \
-	(FANOTIFY_DIR2_FH_OFFSET(info) + FANOTIFY_DIR2_FH_SIZE(info))
-#define FANOTIFY_NAME_OFFSET(info) \
-	(FANOTIFY_FILE_FH_OFFSET(info) + FANOTIFY_FILE_FH_SIZE(info))
-#define FANOTIFY_NAME2_OFFSET(info) \
-	(FANOTIFY_NAME_OFFSET(info) + FANOTIFY_NAME_SIZE(info))
-
-#define FANOTIFY_DIR_FH_BUF(info) \
-	((info)->buf + FANOTIFY_DIR_FH_OFFSET(info))
-#define FANOTIFY_DIR2_FH_BUF(info) \
-	((info)->buf + FANOTIFY_DIR2_FH_OFFSET(info))
-#define FANOTIFY_FILE_FH_BUF(info) \
-	((info)->buf + FANOTIFY_FILE_FH_OFFSET(info))
-#define FANOTIFY_NAME_BUF(info) \
-	((info)->buf + FANOTIFY_NAME_OFFSET(info))
-#define FANOTIFY_NAME2_BUF(info) \
-	((info)->buf + FANOTIFY_NAME2_OFFSET(info))
 } __aligned(4);
 
 static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh)
@@ -117,21 +86,7 @@ static inline struct fanotify_fh *fanotify_info_dir_fh(struct fanotify_info *inf
 {
 	BUILD_BUG_ON(offsetof(struct fanotify_info, buf) % 4);
 
-	return (struct fanotify_fh *)FANOTIFY_DIR_FH_BUF(info);
-}
-
-static inline int fanotify_info_dir2_fh_len(struct fanotify_info *info)
-{
-	if (!info->dir2_fh_totlen ||
-	    WARN_ON_ONCE(info->dir2_fh_totlen < FANOTIFY_FH_HDR_LEN))
-		return 0;
-
-	return info->dir2_fh_totlen - FANOTIFY_FH_HDR_LEN;
-}
-
-static inline struct fanotify_fh *fanotify_info_dir2_fh(struct fanotify_info *info)
-{
-	return (struct fanotify_fh *)FANOTIFY_DIR2_FH_BUF(info);
+	return (struct fanotify_fh *)info->buf;
 }
 
 static inline int fanotify_info_file_fh_len(struct fanotify_info *info)
@@ -145,90 +100,27 @@ static inline int fanotify_info_file_fh_len(struct fanotify_info *info)
 
 static inline struct fanotify_fh *fanotify_info_file_fh(struct fanotify_info *info)
 {
-	return (struct fanotify_fh *)FANOTIFY_FILE_FH_BUF(info);
+	return (struct fanotify_fh *)(info->buf + info->dir_fh_totlen);
 }
 
-static inline char *fanotify_info_name(struct fanotify_info *info)
+static inline const char *fanotify_info_name(struct fanotify_info *info)
 {
-	if (!info->name_len)
-		return NULL;
-
-	return FANOTIFY_NAME_BUF(info);
-}
-
-static inline char *fanotify_info_name2(struct fanotify_info *info)
-{
-	if (!info->name2_len)
-		return NULL;
-
-	return FANOTIFY_NAME2_BUF(info);
+	return info->buf + info->dir_fh_totlen + info->file_fh_totlen;
 }
 
 static inline void fanotify_info_init(struct fanotify_info *info)
 {
-	BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN + MAX_HANDLE_SZ > U8_MAX);
-	BUILD_BUG_ON(NAME_MAX > U8_MAX);
-
 	info->dir_fh_totlen = 0;
-	info->dir2_fh_totlen = 0;
 	info->file_fh_totlen = 0;
 	info->name_len = 0;
-	info->name2_len = 0;
-}
-
-/* These set/copy helpers MUST be called by order */
-static inline void fanotify_info_set_dir_fh(struct fanotify_info *info,
-					    unsigned int totlen)
-{
-	if (WARN_ON_ONCE(info->dir2_fh_totlen > 0) ||
-	    WARN_ON_ONCE(info->file_fh_totlen > 0) ||
-	    WARN_ON_ONCE(info->name_len > 0) ||
-	    WARN_ON_ONCE(info->name2_len > 0))
-		return;
-
-	info->dir_fh_totlen = totlen;
-}
-
-static inline void fanotify_info_set_dir2_fh(struct fanotify_info *info,
-					     unsigned int totlen)
-{
-	if (WARN_ON_ONCE(info->file_fh_totlen > 0) ||
-	    WARN_ON_ONCE(info->name_len > 0) ||
-	    WARN_ON_ONCE(info->name2_len > 0))
-		return;
-
-	info->dir2_fh_totlen = totlen;
-}
-
-static inline void fanotify_info_set_file_fh(struct fanotify_info *info,
-					     unsigned int totlen)
-{
-	if (WARN_ON_ONCE(info->name_len > 0) ||
-	    WARN_ON_ONCE(info->name2_len > 0))
-		return;
-
-	info->file_fh_totlen = totlen;
 }
 
 static inline void fanotify_info_copy_name(struct fanotify_info *info,
 					   const struct qstr *name)
 {
-	if (WARN_ON_ONCE(name->len > NAME_MAX) ||
-	    WARN_ON_ONCE(info->name2_len > 0))
-		return;
-
 	info->name_len = name->len;
-	strcpy(fanotify_info_name(info), name->name);
-}
-
-static inline void fanotify_info_copy_name2(struct fanotify_info *info,
-					    const struct qstr *name)
-{
-	if (WARN_ON_ONCE(name->len > NAME_MAX))
-		return;
-
-	info->name2_len = name->len;
-	strcpy(fanotify_info_name2(info), name->name);
+	strcpy(info->buf + info->dir_fh_totlen + info->file_fh_totlen,
+	       name->name);
 }
 
 /*
@@ -243,48 +135,29 @@ enum fanotify_event_type {
 	FANOTIFY_EVENT_TYPE_PATH,
 	FANOTIFY_EVENT_TYPE_PATH_PERM,
 	FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
-	FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
-	__FANOTIFY_EVENT_TYPE_NUM
 };
 
-#define FANOTIFY_EVENT_TYPE_BITS \
-	(ilog2(__FANOTIFY_EVENT_TYPE_NUM - 1) + 1)
-#define FANOTIFY_EVENT_HASH_BITS \
-	(32 - FANOTIFY_EVENT_TYPE_BITS)
-
 struct fanotify_event {
 	struct fsnotify_event fse;
-	struct hlist_node merge_list;	/* List for hashed merge */
 	u32 mask;
-	struct {
-		unsigned int type : FANOTIFY_EVENT_TYPE_BITS;
-		unsigned int hash : FANOTIFY_EVENT_HASH_BITS;
-	};
+	enum fanotify_event_type type;
 	struct pid *pid;
 };
 
 static inline void fanotify_init_event(struct fanotify_event *event,
-				       unsigned int hash, u32 mask)
+				       unsigned long id, u32 mask)
 {
-	fsnotify_init_event(&event->fse);
-	INIT_HLIST_NODE(&event->merge_list);
-	event->hash = hash;
+	fsnotify_init_event(&event->fse, id);
 	event->mask = mask;
 	event->pid = NULL;
 }
 
-#define FANOTIFY_INLINE_FH(name, size)					\
-struct {								\
-	struct fanotify_fh (name);					\
-	/* Space for object_fh.buf[] - access with fanotify_fh_buf() */	\
-	unsigned char _inline_fh_buf[(size)];				\
-}
-
 struct fanotify_fid_event {
 	struct fanotify_event fae;
 	__kernel_fsid_t fsid;
-
-	FANOTIFY_INLINE_FH(object_fh, FANOTIFY_INLINE_FH_LEN);
+	struct fanotify_fh object_fh;
+	/* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
+	unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
 };
 
 static inline struct fanotify_fid_event *
@@ -305,30 +178,12 @@ FANOTIFY_NE(struct fanotify_event *event)
 	return container_of(event, struct fanotify_name_event, fae);
 }
 
-struct fanotify_error_event {
-	struct fanotify_event fae;
-	s32 error; /* Error reported by the Filesystem. */
-	u32 err_count; /* Suppressed errors count */
-
-	__kernel_fsid_t fsid; /* FSID this error refers to. */
-
-	FANOTIFY_INLINE_FH(object_fh, MAX_HANDLE_SZ);
-};
-
-static inline struct fanotify_error_event *
-FANOTIFY_EE(struct fanotify_event *event)
-{
-	return container_of(event, struct fanotify_error_event, fae);
-}
-
 static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event)
 {
 	if (event->type == FANOTIFY_EVENT_TYPE_FID)
 		return &FANOTIFY_FE(event)->fsid;
 	else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
 		return &FANOTIFY_NE(event)->fsid;
-	else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
-		return &FANOTIFY_EE(event)->fsid;
 	else
 		return NULL;
 }
@@ -340,8 +195,6 @@ static inline struct fanotify_fh *fanotify_event_object_fh(
 		return &FANOTIFY_FE(event)->object_fh;
 	else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
 		return fanotify_info_file_fh(&FANOTIFY_NE(event)->info);
-	else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
-		return &FANOTIFY_EE(event)->object_fh;
 	else
 		return NULL;
 }
@@ -373,37 +226,6 @@ static inline int fanotify_event_dir_fh_len(struct fanotify_event *event)
 	return info ? fanotify_info_dir_fh_len(info) : 0;
 }
 
-static inline int fanotify_event_dir2_fh_len(struct fanotify_event *event)
-{
-	struct fanotify_info *info = fanotify_event_info(event);
-
-	return info ? fanotify_info_dir2_fh_len(info) : 0;
-}
-
-static inline bool fanotify_event_has_object_fh(struct fanotify_event *event)
-{
-	/* For error events, even zeroed fh are reported. */
-	if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
-		return true;
-	return fanotify_event_object_fh_len(event) > 0;
-}
-
-static inline bool fanotify_event_has_dir_fh(struct fanotify_event *event)
-{
-	return fanotify_event_dir_fh_len(event) > 0;
-}
-
-static inline bool fanotify_event_has_dir2_fh(struct fanotify_event *event)
-{
-	return fanotify_event_dir2_fh_len(event) > 0;
-}
-
-static inline bool fanotify_event_has_any_dir_fh(struct fanotify_event *event)
-{
-	return fanotify_event_has_dir_fh(event) ||
-		fanotify_event_has_dir2_fh(event);
-}
-
 struct fanotify_path_event {
 	struct fanotify_event fae;
 	struct path path;
@@ -447,12 +269,13 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
 	return container_of(fse, struct fanotify_event, fse);
 }
 
-static inline bool fanotify_is_error_event(u32 mask)
+static inline bool fanotify_event_has_path(struct fanotify_event *event)
 {
-	return mask & FAN_FS_ERROR;
+	return event->type == FANOTIFY_EVENT_TYPE_PATH ||
+		event->type == FANOTIFY_EVENT_TYPE_PATH_PERM;
 }
 
-static inline const struct path *fanotify_event_path(struct fanotify_event *event)
+static inline struct path *fanotify_event_path(struct fanotify_event *event)
 {
 	if (event->type == FANOTIFY_EVENT_TYPE_PATH)
 		return &FANOTIFY_PE(event)->path;
@@ -461,40 +284,3 @@ static inline const struct path *fanotify_event_path(struct fanotify_event *even
 	else
 		return NULL;
 }
-
-/*
- * Use 128 size hash table to speed up events merge.
- */
-#define FANOTIFY_HTABLE_BITS	(7)
-#define FANOTIFY_HTABLE_SIZE	(1 << FANOTIFY_HTABLE_BITS)
-#define FANOTIFY_HTABLE_MASK	(FANOTIFY_HTABLE_SIZE - 1)
-
-/*
- * Permission events and overflow event do not get merged - don't hash them.
- */
-static inline bool fanotify_is_hashed_event(u32 mask)
-{
-	return !(fanotify_is_perm_event(mask) ||
-		 fsnotify_is_overflow_event(mask));
-}
-
-static inline unsigned int fanotify_event_hash_bucket(
-						struct fsnotify_group *group,
-						struct fanotify_event *event)
-{
-	return event->hash & FANOTIFY_HTABLE_MASK;
-}
-
-static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
-{
-	unsigned int mflags = 0;
-
-	if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
-		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
-	if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)
-		mflags |= FAN_MARK_EVICTABLE;
-	if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
-		mflags |= FAN_MARK_IGNORE;
-
-	return mflags;
-}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 5302313f28be..84de9f97bbc0 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/fanotify.h>
 #include <linux/fcntl.h>
-#include <linux/fdtable.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/anon_inodes.h>
@@ -28,62 +27,8 @@
 #include "fanotify.h"
 
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
-#define FANOTIFY_OLD_DEFAULT_MAX_MARKS	8192
-#define FANOTIFY_DEFAULT_MAX_GROUPS	128
-#define FANOTIFY_DEFAULT_FEE_POOL_SIZE	32
-
-/*
- * Legacy fanotify marks limits (8192) is per group and we introduced a tunable
- * limit of marks per user, similar to inotify.  Effectively, the legacy limit
- * of fanotify marks per user is <max marks per group> * <max groups per user>.
- * This default limit (1M) also happens to match the increased limit of inotify
- * max_user_watches since v5.10.
- */
-#define FANOTIFY_DEFAULT_MAX_USER_MARKS	\
-	(FANOTIFY_OLD_DEFAULT_MAX_MARKS * FANOTIFY_DEFAULT_MAX_GROUPS)
-
-/*
- * Most of the memory cost of adding an inode mark is pinning the marked inode.
- * The size of the filesystem inode struct is not uniform across filesystems,
- * so double the size of a VFS inode is used as a conservative approximation.
- */
-#define INODE_MARK_COST	(2 * sizeof(struct inode))
-
-/* configurable via /proc/sys/fs/fanotify/ */
-static int fanotify_max_queued_events __read_mostly;
-
-#ifdef CONFIG_SYSCTL
-
-#include <linux/sysctl.h>
-
-struct ctl_table fanotify_table[] = {
-	{
-		.procname	= "max_user_groups",
-		.data	= &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "max_user_marks",
-		.data	= &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS],
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "max_queued_events",
-		.data		= &fanotify_max_queued_events,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
+#define FANOTIFY_DEFAULT_MAX_MARKS	8192
+#define FANOTIFY_DEFAULT_MAX_LISTENERS	128
 
 /*
  * All flags that may be specified in parameter event_f_flags of fanotify_init.
@@ -106,12 +51,8 @@ struct kmem_cache *fanotify_path_event_cachep __read_mostly;
 struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
 
 #define FANOTIFY_EVENT_ALIGN 4
-#define FANOTIFY_FID_INFO_HDR_LEN \
+#define FANOTIFY_INFO_HDR_LEN \
 	(sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
-#define FANOTIFY_PIDFD_INFO_HDR_LEN \
-	sizeof(struct fanotify_event_info_pidfd)
-#define FANOTIFY_ERROR_INFO_LEN \
-	(sizeof(struct fanotify_event_info_error))
 
 static int fanotify_fid_info_len(int fh_len, int name_len)
 {
@@ -120,45 +61,21 @@ static int fanotify_fid_info_len(int fh_len, int name_len)
 	if (name_len)
 		info_len += name_len + 1;
 
-	return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len,
-		       FANOTIFY_EVENT_ALIGN);
+	return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN);
 }
 
-/* FAN_RENAME may have one or two dir+name info records */
-static int fanotify_dir_name_info_len(struct fanotify_event *event)
+static int fanotify_event_info_len(unsigned int fid_mode,
+				   struct fanotify_event *event)
 {
 	struct fanotify_info *info = fanotify_event_info(event);
 	int dir_fh_len = fanotify_event_dir_fh_len(event);
-	int dir2_fh_len = fanotify_event_dir2_fh_len(event);
+	int fh_len = fanotify_event_object_fh_len(event);
 	int info_len = 0;
-
-	if (dir_fh_len)
-		info_len += fanotify_fid_info_len(dir_fh_len,
-						  info->name_len);
-	if (dir2_fh_len)
-		info_len += fanotify_fid_info_len(dir2_fh_len,
-						  info->name2_len);
-
-	return info_len;
-}
-
-static size_t fanotify_event_len(unsigned int info_mode,
-				 struct fanotify_event *event)
-{
-	size_t event_len = FAN_EVENT_METADATA_LEN;
-	int fh_len;
 	int dot_len = 0;
 
-	if (!info_mode)
-		return event_len;
-
-	if (fanotify_is_error_event(event->mask))
-		event_len += FANOTIFY_ERROR_INFO_LEN;
-
-	if (fanotify_event_has_any_dir_fh(event)) {
-		event_len += fanotify_dir_name_info_len(event);
-	} else if ((info_mode & FAN_REPORT_NAME) &&
-		   (event->mask & FAN_ONDIR)) {
+	if (dir_fh_len) {
+		info_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
+	} else if ((fid_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) {
 		/*
 		 * With group flag FAN_REPORT_NAME, if name was not recorded in
 		 * event on a directory, we will report the name ".".
@@ -166,32 +83,10 @@ static size_t fanotify_event_len(unsigned int info_mode,
 		dot_len = 1;
 	}
 
-	if (info_mode & FAN_REPORT_PIDFD)
-		event_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
+	if (fh_len)
+		info_len += fanotify_fid_info_len(fh_len, dot_len);
 
-	if (fanotify_event_has_object_fh(event)) {
-		fh_len = fanotify_event_object_fh_len(event);
-		event_len += fanotify_fid_info_len(fh_len, dot_len);
-	}
-
-	return event_len;
-}
-
-/*
- * Remove an hashed event from merge hash table.
- */
-static void fanotify_unhash_event(struct fsnotify_group *group,
-				  struct fanotify_event *event)
-{
-	assert_spin_locked(&group->notification_lock);
-
-	pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
-		 group, event, fanotify_event_hash_bucket(group, event));
-
-	if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list)))
-		return;
-
-	hlist_del_init(&event->merge_list);
+	return info_len;
 }
 
 /*
@@ -203,41 +98,34 @@ static void fanotify_unhash_event(struct fsnotify_group *group,
 static struct fanotify_event *get_one_event(struct fsnotify_group *group,
 					    size_t count)
 {
-	size_t event_size;
+	size_t event_size = FAN_EVENT_METADATA_LEN;
 	struct fanotify_event *event = NULL;
-	struct fsnotify_event *fsn_event;
-	unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
+	unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
 
 	pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
 
 	spin_lock(&group->notification_lock);
-	fsn_event = fsnotify_peek_first_event(group);
-	if (!fsn_event)
+	if (fsnotify_notify_queue_is_empty(group))
 		goto out;
 
-	event = FANOTIFY_E(fsn_event);
-	event_size = fanotify_event_len(info_mode, event);
+	if (fid_mode) {
+		event_size += fanotify_event_info_len(fid_mode,
+			FANOTIFY_E(fsnotify_peek_first_event(group)));
+	}
 
 	if (event_size > count) {
 		event = ERR_PTR(-EINVAL);
 		goto out;
 	}
-
-	/*
-	 * Held the notification_lock the whole time, so this is the
-	 * same event we peeked above.
-	 */
-	fsnotify_remove_first_event(group);
+	event = FANOTIFY_E(fsnotify_remove_first_event(group));
 	if (fanotify_is_perm_event(event->mask))
 		FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED;
-	if (fanotify_is_hashed_event(event->mask))
-		fanotify_unhash_event(group, event);
 out:
 	spin_unlock(&group->notification_lock);
 	return event;
 }
 
-static int create_fd(struct fsnotify_group *group, const struct path *path,
+static int create_fd(struct fsnotify_group *group, struct path *path,
 		     struct file **file)
 {
 	int client_fd;
@@ -252,7 +140,7 @@ static int create_fd(struct fsnotify_group *group, const struct path *path,
 	 * originally opened O_WRONLY.
 	 */
 	new_file = dentry_open(path,
-			       group->fanotify_data.f_flags | __FMODE_NONOTIFY,
+			       group->fanotify_data.f_flags | FMODE_NONOTIFY,
 			       current_cred());
 	if (IS_ERR(new_file)) {
 		/*
@@ -337,31 +225,9 @@ static int process_access_response(struct fsnotify_group *group,
 	return -ENOENT;
 }
 
-static size_t copy_error_info_to_user(struct fanotify_event *event,
-				      char __user *buf, int count)
-{
-	struct fanotify_event_info_error info = { };
-	struct fanotify_error_event *fee = FANOTIFY_EE(event);
-
-	info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR;
-	info.hdr.len = FANOTIFY_ERROR_INFO_LEN;
-
-	if (WARN_ON(count < info.hdr.len))
-		return -EFAULT;
-
-	info.error = fee->error;
-	info.error_count = fee->err_count;
-
-	if (copy_to_user(buf, &info, sizeof(info)))
-		return -EFAULT;
-
-	return info.hdr.len;
-}
-
-static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
-				 int info_type, const char *name,
-				 size_t name_len,
-				 char __user *buf, size_t count)
+static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+			     int info_type, const char *name, size_t name_len,
+			     char __user *buf, size_t count)
 {
 	struct fanotify_event_info_fid info = { };
 	struct file_handle handle = { };
@@ -373,6 +239,9 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
 	pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n",
 		 __func__, fh_len, name_len, info_len, count);
 
+	if (!fh_len)
+		return 0;
+
 	if (WARN_ON_ONCE(len < sizeof(info) || len > count))
 		return -EFAULT;
 
@@ -387,8 +256,6 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
 			return -EFAULT;
 		break;
 	case FAN_EVENT_INFO_TYPE_DFID_NAME:
-	case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME:
-	case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME:
 		if (WARN_ON_ONCE(!name || !name_len))
 			return -EFAULT;
 		break;
@@ -409,11 +276,6 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
 
 	handle.handle_type = fh->type;
 	handle.handle_bytes = fh_len;
-
-	/* Mangle handle_type for bad file_handle */
-	if (!fh_len)
-		handle.handle_type = FILEID_INVALID;
-
 	if (copy_to_user(buf, &handle, sizeof(handle)))
 		return -EFAULT;
 
@@ -458,79 +320,68 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
 	return info_len;
 }
 
-static int copy_pidfd_info_to_user(int pidfd,
-				   char __user *buf,
-				   size_t count)
+static ssize_t copy_event_to_user(struct fsnotify_group *group,
+				  struct fanotify_event *event,
+				  char __user *buf, size_t count)
 {
-	struct fanotify_event_info_pidfd info = { };
-	size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN;
+	struct fanotify_event_metadata metadata;
+	struct path *path = fanotify_event_path(event);
+	struct fanotify_info *info = fanotify_event_info(event);
+	unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+	struct file *f = NULL;
+	int ret, fd = FAN_NOFD;
+	int info_type = 0;
 
-	if (WARN_ON_ONCE(info_len > count))
-		return -EFAULT;
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD;
-	info.hdr.len = info_len;
-	info.pidfd = pidfd;
+	metadata.event_len = FAN_EVENT_METADATA_LEN +
+				fanotify_event_info_len(fid_mode, event);
+	metadata.metadata_len = FAN_EVENT_METADATA_LEN;
+	metadata.vers = FANOTIFY_METADATA_VERSION;
+	metadata.reserved = 0;
+	metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
+	metadata.pid = pid_vnr(event->pid);
 
-	if (copy_to_user(buf, &info, info_len))
-		return -EFAULT;
-
-	return info_len;
-}
-
-static int copy_info_records_to_user(struct fanotify_event *event,
-				     struct fanotify_info *info,
-				     unsigned int info_mode, int pidfd,
-				     char __user *buf, size_t count)
-{
-	int ret, total_bytes = 0, info_type = 0;
-	unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS;
-	unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
+	if (path && path->mnt && path->dentry) {
+		fd = create_fd(group, path, &f);
+		if (fd < 0)
+			return fd;
+	}
+	metadata.fd = fd;
 
+	ret = -EFAULT;
 	/*
-	 * Event info records order is as follows:
-	 * 1. dir fid + name
-	 * 2. (optional) new dir fid + new name
-	 * 3. (optional) child fid
+	 * Sanity check copy size in case get_one_event() and
+	 * event_len sizes ever get out of sync.
 	 */
-	if (fanotify_event_has_dir_fh(event)) {
+	if (WARN_ON_ONCE(metadata.event_len > count))
+		goto out_close_fd;
+
+	if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN))
+		goto out_close_fd;
+
+	buf += FAN_EVENT_METADATA_LEN;
+	count -= FAN_EVENT_METADATA_LEN;
+
+	if (fanotify_is_perm_event(event->mask))
+		FANOTIFY_PERM(event)->fd = fd;
+
+	/* Event info records order is: dir fid + name, child fid */
+	if (fanotify_event_dir_fh_len(event)) {
 		info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
 					     FAN_EVENT_INFO_TYPE_DFID;
-
-		/* FAN_RENAME uses special info types */
-		if (event->mask & FAN_RENAME)
-			info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME;
-
-		ret = copy_fid_info_to_user(fanotify_event_fsid(event),
-					    fanotify_info_dir_fh(info),
-					    info_type,
-					    fanotify_info_name(info),
-					    info->name_len, buf, count);
+		ret = copy_info_to_user(fanotify_event_fsid(event),
+					fanotify_info_dir_fh(info),
+					info_type, fanotify_info_name(info),
+					info->name_len, buf, count);
 		if (ret < 0)
-			return ret;
+			goto out_close_fd;
 
 		buf += ret;
 		count -= ret;
-		total_bytes += ret;
 	}
 
-	/* New dir fid+name may be reported in addition to old dir fid+name */
-	if (fanotify_event_has_dir2_fh(event)) {
-		info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME;
-		ret = copy_fid_info_to_user(fanotify_event_fsid(event),
-					    fanotify_info_dir2_fh(info),
-					    info_type,
-					    fanotify_info_name2(info),
-					    info->name2_len, buf, count);
-		if (ret < 0)
-			return ret;
-
-		buf += ret;
-		count -= ret;
-		total_bytes += ret;
-	}
-
-	if (fanotify_event_has_object_fh(event)) {
+	if (fanotify_event_object_fh_len(event)) {
 		const char *dot = NULL;
 		int dot_len = 0;
 
@@ -544,8 +395,8 @@ static int copy_info_records_to_user(struct fanotify_event *event,
 			   (event->mask & FAN_ONDIR)) {
 			/*
 			 * With group flag FAN_REPORT_NAME, if name was not
-			 * recorded in an event on a directory, report the name
-			 * "." with info type DFID_NAME.
+			 * recorded in an event on a directory, report the
+			 * name "." with info type DFID_NAME.
 			 */
 			info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
 			dot = ".";
@@ -568,132 +419,14 @@ static int copy_info_records_to_user(struct fanotify_event *event,
 			info_type = FAN_EVENT_INFO_TYPE_FID;
 		}
 
-		ret = copy_fid_info_to_user(fanotify_event_fsid(event),
-					    fanotify_event_object_fh(event),
-					    info_type, dot, dot_len,
-					    buf, count);
-		if (ret < 0)
-			return ret;
-
-		buf += ret;
-		count -= ret;
-		total_bytes += ret;
-	}
-
-	if (pidfd_mode) {
-		ret = copy_pidfd_info_to_user(pidfd, buf, count);
-		if (ret < 0)
-			return ret;
-
-		buf += ret;
-		count -= ret;
-		total_bytes += ret;
-	}
-
-	if (fanotify_is_error_event(event->mask)) {
-		ret = copy_error_info_to_user(event, buf, count);
-		if (ret < 0)
-			return ret;
-		buf += ret;
-		count -= ret;
-		total_bytes += ret;
-	}
-
-	return total_bytes;
-}
-
-static ssize_t copy_event_to_user(struct fsnotify_group *group,
-				  struct fanotify_event *event,
-				  char __user *buf, size_t count)
-{
-	struct fanotify_event_metadata metadata;
-	const struct path *path = fanotify_event_path(event);
-	struct fanotify_info *info = fanotify_event_info(event);
-	unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
-	unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
-	struct file *f = NULL;
-	int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
-
-	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
-
-	metadata.event_len = fanotify_event_len(info_mode, event);
-	metadata.metadata_len = FAN_EVENT_METADATA_LEN;
-	metadata.vers = FANOTIFY_METADATA_VERSION;
-	metadata.reserved = 0;
-	metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
-	metadata.pid = pid_vnr(event->pid);
-	/*
-	 * For an unprivileged listener, event->pid can be used to identify the
-	 * events generated by the listener process itself, without disclosing
-	 * the pids of other processes.
-	 */
-	if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) &&
-	    task_tgid(current) != event->pid)
-		metadata.pid = 0;
-
-	/*
-	 * For now, fid mode is required for an unprivileged listener and
-	 * fid mode does not report fd in events.  Keep this check anyway
-	 * for safety in case fid mode requirement is relaxed in the future
-	 * to allow unprivileged listener to get events with no fd and no fid.
-	 */
-	if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) &&
-	    path && path->mnt && path->dentry) {
-		fd = create_fd(group, path, &f);
-		if (fd < 0)
-			return fd;
-	}
-	metadata.fd = fd;
-
-	if (pidfd_mode) {
-		/*
-		 * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual
-		 * exclusion is ever lifted. At the time of incoporating pidfd
-		 * support within fanotify, the pidfd API only supported the
-		 * creation of pidfds for thread-group leaders.
-		 */
-		WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID));
-
-		/*
-		 * The PIDTYPE_TGID check for an event->pid is performed
-		 * preemptively in an attempt to catch out cases where the event
-		 * listener reads events after the event generating process has
-		 * already terminated. Report FAN_NOPIDFD to the event listener
-		 * in those cases, with all other pidfd creation errors being
-		 * reported as FAN_EPIDFD.
-		 */
-		if (metadata.pid == 0 ||
-		    !pid_has_task(event->pid, PIDTYPE_TGID)) {
-			pidfd = FAN_NOPIDFD;
-		} else {
-			pidfd = pidfd_create(event->pid, 0);
-			if (pidfd < 0)
-				pidfd = FAN_EPIDFD;
-		}
-	}
-
-	ret = -EFAULT;
-	/*
-	 * Sanity check copy size in case get_one_event() and
-	 * event_len sizes ever get out of sync.
-	 */
-	if (WARN_ON_ONCE(metadata.event_len > count))
-		goto out_close_fd;
-
-	if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN))
-		goto out_close_fd;
-
-	buf += FAN_EVENT_METADATA_LEN;
-	count -= FAN_EVENT_METADATA_LEN;
-
-	if (fanotify_is_perm_event(event->mask))
-		FANOTIFY_PERM(event)->fd = fd;
-
-	if (info_mode) {
-		ret = copy_info_records_to_user(event, info, info_mode, pidfd,
-						buf, count);
+		ret = copy_info_to_user(fanotify_event_fsid(event),
+					fanotify_event_object_fh(event),
+					info_type, dot, dot_len, buf, count);
 		if (ret < 0)
 			goto out_close_fd;
+
+		buf += ret;
+		count -= ret;
 	}
 
 	if (f)
@@ -706,10 +439,6 @@ out_close_fd:
 		put_unused_fd(fd);
 		fput(f);
 	}
-
-	if (pidfd >= 0)
-		close_fd(pidfd);
-
 	return ret;
 }
 
@@ -844,7 +573,6 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
-	struct fsnotify_event *fsn_event;
 
 	/*
 	 * Stop new events from arriving in the notification queue. since
@@ -873,12 +601,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	 * dequeue them and set the response. They will be freed once the
 	 * response is consumed and fanotify_get_response() returns.
 	 */
-	while ((fsn_event = fsnotify_remove_first_event(group))) {
-		struct fanotify_event *event = FANOTIFY_E(fsn_event);
+	while (!fsnotify_notify_queue_is_empty(group)) {
+		struct fanotify_event *event;
 
+		event = FANOTIFY_E(fsnotify_remove_first_event(group));
 		if (!(event->mask & FANOTIFY_PERM_EVENTS)) {
 			spin_unlock(&group->notification_lock);
-			fsnotify_destroy_event(group, fsn_event);
+			fsnotify_destroy_event(group, &event->fse);
 		} else {
 			finish_permission_event(group, FANOTIFY_PERM(event),
 						FAN_ALLOW);
@@ -973,7 +702,7 @@ static int fanotify_find_path(int dfd, const char __user *filename,
 	}
 
 	/* you can only watch an inode if you have read permissions on it */
-	ret = path_permission(path, MAY_READ);
+	ret = inode_permission(path->dentry->d_inode, MAY_READ);
 	if (ret) {
 		path_put(path);
 		goto out;
@@ -991,28 +720,27 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
 					    __u32 mask, unsigned int flags,
 					    __u32 umask, int *destroy)
 {
-	__u32 oldmask, newmask;
+	__u32 oldmask = 0;
 
 	/* umask bits cannot be removed by user */
 	mask &= ~umask;
 	spin_lock(&fsn_mark->lock);
-	oldmask = fsnotify_calc_mask(fsn_mark);
-	if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) {
+	if (!(flags & FAN_MARK_IGNORED_MASK)) {
+		oldmask = fsn_mark->mask;
 		fsn_mark->mask &= ~mask;
 	} else {
-		fsn_mark->ignore_mask &= ~mask;
+		fsn_mark->ignored_mask &= ~mask;
 	}
-	newmask = fsnotify_calc_mask(fsn_mark);
 	/*
 	 * We need to keep the mark around even if remaining mask cannot
 	 * result in any events (e.g. mask == FAN_ONDIR) to support incremenal
 	 * changes to the mask.
 	 * Destroy mark when only umask bits remain.
 	 */
-	*destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask);
+	*destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask);
 	spin_unlock(&fsn_mark->lock);
 
-	return oldmask & ~newmask;
+	return mask & oldmask;
 }
 
 static int fanotify_remove_mark(struct fsnotify_group *group,
@@ -1023,10 +751,10 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
 	__u32 removed;
 	int destroy_mark;
 
-	fsnotify_group_lock(group);
+	mutex_lock(&group->mark_mutex);
 	fsn_mark = fsnotify_find_mark(connp, group);
 	if (!fsn_mark) {
-		fsnotify_group_unlock(group);
+		mutex_unlock(&group->mark_mutex);
 		return -ENOENT;
 	}
 
@@ -1036,7 +764,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
 		fsnotify_recalc_mask(fsn_mark->connector);
 	if (destroy_mark)
 		fsnotify_detach_mark(fsn_mark);
-	fsnotify_group_unlock(group);
+	mutex_unlock(&group->mark_mutex);
 	if (destroy_mark)
 		fsnotify_free_mark(fsn_mark);
 
@@ -1069,199 +797,76 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 				    flags, umask);
 }
 
-static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
-				       unsigned int fan_flags)
+static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+				       __u32 mask,
+				       unsigned int flags)
 {
-	bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE);
-	unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS;
-	bool recalc = false;
-
-	/*
-	 * When using FAN_MARK_IGNORE for the first time, mark starts using
-	 * independent event flags in ignore mask.  After that, trying to
-	 * update the ignore mask with the old FAN_MARK_IGNORED_MASK API
-	 * will result in EEXIST error.
-	 */
-	if (ignore == FAN_MARK_IGNORE)
-		fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS;
-
-	/*
-	 * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
-	 * the removal of the FS_MODIFY bit in calculated mask if it was set
-	 * because of an ignore mask that is now going to survive FS_MODIFY.
-	 */
-	if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
-	    !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
-		fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
-		if (!(fsn_mark->mask & FS_MODIFY))
-			recalc = true;
-	}
-
-	if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE ||
-	    want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
-		return recalc;
-
-	/*
-	 * NO_IREF may be removed from a mark, but not added.
-	 * When removed, fsnotify_recalc_mask() will take the inode ref.
-	 */
-	WARN_ON_ONCE(!want_iref);
-	fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF;
-
-	return true;
-}
-
-static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
-				      __u32 mask, unsigned int fan_flags)
-{
-	bool recalc;
+	__u32 oldmask = -1;
 
 	spin_lock(&fsn_mark->lock);
-	if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS))
+	if (!(flags & FAN_MARK_IGNORED_MASK)) {
+		oldmask = fsn_mark->mask;
 		fsn_mark->mask |= mask;
-	else
-		fsn_mark->ignore_mask |= mask;
-
-	recalc = fsnotify_calc_mask(fsn_mark) &
-		~fsnotify_conn_mask(fsn_mark->connector);
-
-	recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags);
+	} else {
+		fsn_mark->ignored_mask |= mask;
+		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
+			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
+	}
 	spin_unlock(&fsn_mark->lock);
 
-	return recalc;
+	return mask & ~oldmask;
 }
 
 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 						   fsnotify_connp_t *connp,
-						   unsigned int obj_type,
-						   unsigned int fan_flags,
+						   unsigned int type,
 						   __kernel_fsid_t *fsid)
 {
-	struct ucounts *ucounts = group->fanotify_data.ucounts;
 	struct fsnotify_mark *mark;
 	int ret;
 
-	/*
-	 * Enforce per user marks limits per user in all containing user ns.
-	 * A group with FAN_UNLIMITED_MARKS does not contribute to mark count
-	 * in the limited groups account.
-	 */
-	if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) &&
-	    !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS))
+	if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
 		return ERR_PTR(-ENOSPC);
 
 	mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
-	if (!mark) {
-		ret = -ENOMEM;
-		goto out_dec_ucounts;
-	}
+	if (!mark)
+		return ERR_PTR(-ENOMEM);
 
 	fsnotify_init_mark(mark, group);
-	if (fan_flags & FAN_MARK_EVICTABLE)
-		mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF;
-
-	ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid);
+	ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
 	if (ret) {
 		fsnotify_put_mark(mark);
-		goto out_dec_ucounts;
+		return ERR_PTR(ret);
 	}
 
 	return mark;
-
-out_dec_ucounts:
-	if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS))
-		dec_ucount(ucounts, UCOUNT_FANOTIFY_MARKS);
-	return ERR_PTR(ret);
 }
 
-static int fanotify_group_init_error_pool(struct fsnotify_group *group)
-{
-	if (mempool_initialized(&group->fanotify_data.error_events_pool))
-		return 0;
-
-	return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool,
-					 FANOTIFY_DEFAULT_FEE_POOL_SIZE,
-					 sizeof(struct fanotify_error_event));
-}
-
-static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
-					      unsigned int fan_flags)
-{
-	/*
-	 * Non evictable mark cannot be downgraded to evictable mark.
-	 */
-	if (fan_flags & FAN_MARK_EVICTABLE &&
-	    !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
-		return -EEXIST;
-
-	/*
-	 * New ignore mask semantics cannot be downgraded to old semantics.
-	 */
-	if (fan_flags & FAN_MARK_IGNORED_MASK &&
-	    fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
-		return -EEXIST;
-
-	/*
-	 * An ignore mask that survives modify could never be downgraded to not
-	 * survive modify.  With new FAN_MARK_IGNORE semantics we make that rule
-	 * explicit and return an error when trying to update the ignore mask
-	 * without the original FAN_MARK_IGNORED_SURV_MODIFY value.
-	 */
-	if (fan_flags & FAN_MARK_IGNORE &&
-	    !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
-	    fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
-		return -EEXIST;
-
-	return 0;
-}
 
 static int fanotify_add_mark(struct fsnotify_group *group,
-			     fsnotify_connp_t *connp, unsigned int obj_type,
-			     __u32 mask, unsigned int fan_flags,
+			     fsnotify_connp_t *connp, unsigned int type,
+			     __u32 mask, unsigned int flags,
 			     __kernel_fsid_t *fsid)
 {
 	struct fsnotify_mark *fsn_mark;
-	bool recalc;
-	int ret = 0;
+	__u32 added;
 
-	fsnotify_group_lock(group);
+	mutex_lock(&group->mark_mutex);
 	fsn_mark = fsnotify_find_mark(connp, group);
 	if (!fsn_mark) {
-		fsn_mark = fanotify_add_new_mark(group, connp, obj_type,
-						 fan_flags, fsid);
+		fsn_mark = fanotify_add_new_mark(group, connp, type, fsid);
 		if (IS_ERR(fsn_mark)) {
-			fsnotify_group_unlock(group);
+			mutex_unlock(&group->mark_mutex);
 			return PTR_ERR(fsn_mark);
 		}
 	}
-
-	/*
-	 * Check if requested mark flags conflict with an existing mark flags.
-	 */
-	ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
-	if (ret)
-		goto out;
-
-	/*
-	 * Error events are pre-allocated per group, only if strictly
-	 * needed (i.e. FAN_FS_ERROR was requested).
-	 */
-	if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) &&
-	    (mask & FAN_FS_ERROR)) {
-		ret = fanotify_group_init_error_pool(group);
-		if (ret)
-			goto out;
-	}
-
-	recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags);
-	if (recalc)
+	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
+	if (added & ~fsnotify_conn_mask(fsn_mark->connector))
 		fsnotify_recalc_mask(fsn_mark->connector);
-
-out:
-	fsnotify_group_unlock(group);
+	mutex_unlock(&group->mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
-	return ret;
+	return 0;
 }
 
 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
@@ -1288,10 +893,10 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 
 	/*
 	 * If some other task has this inode open for write we should not add
-	 * an ignore mask, unless that ignore mask is supposed to survive
+	 * an ignored mark, unless that ignored mark is supposed to survive
 	 * modification changes anyway.
 	 */
-	if ((flags & FANOTIFY_MARK_IGNORE_BITS) &&
+	if ((flags & FAN_MARK_IGNORED_MASK) &&
 	    !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
 	    inode_is_open_for_write(inode))
 		return 0;
@@ -1314,49 +919,20 @@ static struct fsnotify_event *fanotify_alloc_overflow_event(void)
 	return &oevent->fse;
 }
 
-static struct hlist_head *fanotify_alloc_merge_hash(void)
-{
-	struct hlist_head *hash;
-
-	hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS,
-		       GFP_KERNEL_ACCOUNT);
-	if (!hash)
-		return NULL;
-
-	__hash_init(hash, FANOTIFY_HTABLE_SIZE);
-
-	return hash;
-}
-
 /* fanotify syscalls */
 SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 {
 	struct fsnotify_group *group;
 	int f_flags, fd;
+	struct user_struct *user;
 	unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
 	unsigned int class = flags & FANOTIFY_CLASS_BITS;
-	unsigned int internal_flags = 0;
 
 	pr_debug("%s: flags=%x event_f_flags=%x\n",
 		 __func__, flags, event_f_flags);
 
-	if (!capable(CAP_SYS_ADMIN)) {
-		/*
-		 * An unprivileged user can setup an fanotify group with
-		 * limited functionality - an unprivileged group is limited to
-		 * notification events with file handles and it cannot use
-		 * unlimited queue/marks.
-		 */
-		if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode)
-			return -EPERM;
-
-		/*
-		 * Setting the internal flag FANOTIFY_UNPRIV on the group
-		 * prevents setting mount/filesystem marks on this group and
-		 * prevents reporting pid and open fd in events.
-		 */
-		internal_flags |= FANOTIFY_UNPRIV;
-	}
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
 
 #ifdef CONFIG_AUDITSYSCALL
 	if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
@@ -1365,14 +941,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 #endif
 		return -EINVAL;
 
-	/*
-	 * A pidfd can only be returned for a thread-group leader; thus
-	 * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually
-	 * exclusive.
-	 */
-	if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
-		return -EINVAL;
-
 	if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
 		return -EINVAL;
 
@@ -1395,46 +963,30 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID))
 		return -EINVAL;
 
-	/*
-	 * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID
-	 * and is used as an indication to report both dir and child fid on all
-	 * dirent events.
-	 */
-	if ((fid_mode & FAN_REPORT_TARGET_FID) &&
-	    (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
-		return -EINVAL;
+	user = get_current_user();
+	if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
+		free_uid(user);
+		return -EMFILE;
+	}
 
-	f_flags = O_RDWR | __FMODE_NONOTIFY;
+	f_flags = O_RDWR | FMODE_NONOTIFY;
 	if (flags & FAN_CLOEXEC)
 		f_flags |= O_CLOEXEC;
 	if (flags & FAN_NONBLOCK)
 		f_flags |= O_NONBLOCK;
 
 	/* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
-	group = fsnotify_alloc_group(&fanotify_fsnotify_ops,
-				     FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS);
+	group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
 	if (IS_ERR(group)) {
+		free_uid(user);
 		return PTR_ERR(group);
 	}
 
-	/* Enforce groups limits per user in all containing user ns */
-	group->fanotify_data.ucounts = inc_ucount(current_user_ns(),
-						  current_euid(),
-						  UCOUNT_FANOTIFY_GROUPS);
-	if (!group->fanotify_data.ucounts) {
-		fd = -EMFILE;
-		goto out_destroy_group;
-	}
-
-	group->fanotify_data.flags = flags | internal_flags;
+	group->fanotify_data.user = user;
+	group->fanotify_data.flags = flags;
+	atomic_inc(&user->fanotify_listeners);
 	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
-	group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
-	if (!group->fanotify_data.merge_hash) {
-		fd = -ENOMEM;
-		goto out_destroy_group;
-	}
-
 	group->overflow_event = fanotify_alloc_overflow_event();
 	if (unlikely(!group->overflow_event)) {
 		fd = -ENOMEM;
@@ -1467,13 +1019,16 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 			goto out_destroy_group;
 		group->max_events = UINT_MAX;
 	} else {
-		group->max_events = fanotify_max_queued_events;
+		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
 	}
 
 	if (flags & FAN_UNLIMITED_MARKS) {
 		fd = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
 			goto out_destroy_group;
+		group->fanotify_data.max_marks = UINT_MAX;
+	} else {
+		group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
 	}
 
 	if (flags & FAN_ENABLE_AUDIT) {
@@ -1493,15 +1048,16 @@ out_destroy_group:
 	return fd;
 }
 
-static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
+/* Check if filesystem can encode a unique fid */
+static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
 {
 	__kernel_fsid_t root_fsid;
 	int err;
 
 	/*
-	 * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse).
+	 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
 	 */
-	err = vfs_get_fsid(dentry, fsid);
+	err = vfs_get_fsid(path->dentry, fsid);
 	if (err)
 		return err;
 
@@ -1509,10 +1065,10 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
 		return -ENODEV;
 
 	/*
-	 * Make sure dentry is not of a filesystem subvolume (e.g. btrfs)
+	 * Make sure path is not inside a filesystem subvolume (e.g. btrfs)
 	 * which uses a different fsid than sb root.
 	 */
-	err = vfs_get_fsid(dentry->d_sb->s_root, &root_fsid);
+	err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid);
 	if (err)
 		return err;
 
@@ -1520,12 +1076,6 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
 	    root_fsid.val[1] != fsid->val[1])
 		return -EXDEV;
 
-	return 0;
-}
-
-/* Check if filesystem can encode a unique fid */
-static int fanotify_test_fid(struct dentry *dentry)
-{
 	/*
 	 * We need to make sure that the file system supports at least
 	 * encoding a file handle so user can use name_to_handle_at() to
@@ -1533,22 +1083,17 @@ static int fanotify_test_fid(struct dentry *dentry)
 	 * objects. However, name_to_handle_at() requires that the
 	 * filesystem also supports decoding file handles.
 	 */
-	if (!dentry->d_sb->s_export_op ||
-	    !dentry->d_sb->s_export_op->fh_to_dentry)
+	if (!path->dentry->d_sb->s_export_op ||
+	    !path->dentry->d_sb->s_export_op->fh_to_dentry)
 		return -EOPNOTSUPP;
 
 	return 0;
 }
 
-static int fanotify_events_supported(struct fsnotify_group *group,
-				     const struct path *path, __u64 mask,
+static int fanotify_events_supported(struct path *path, __u64 mask,
 				     unsigned int flags)
 {
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
-	/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
-	bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
-				 (mask & FAN_RENAME) ||
-				 (flags & FAN_MARK_IGNORE);
 
 	/*
 	 * Some filesystems such as 'proc' acquire unusual locks when opening
@@ -1576,15 +1121,6 @@ static int fanotify_events_supported(struct fsnotify_group *group,
 	    path->mnt->mnt_sb->s_flags & SB_NOUSER)
 		return -EINVAL;
 
-	/*
-	 * We shouldn't have allowed setting dirent events and the directory
-	 * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode,
-	 * but because we always allowed it, error only when using new APIs.
-	 */
-	if (strict_dir_events && mark_type == FAN_MARK_INODE &&
-	    !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
-		return -ENOTDIR;
-
 	return 0;
 }
 
@@ -1599,8 +1135,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	__kernel_fsid_t __fsid, *fsid = NULL;
 	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
-	unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
-	unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
+	bool ignored = flags & FAN_MARK_IGNORED_MASK;
 	unsigned int obj_type, fid_mode;
 	u32 umask = 0;
 	int ret;
@@ -1609,7 +1144,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		 __func__, fanotify_fd, flags, dfd, pathname, mask);
 
 	/* we only use the lower 32 bits as of right now. */
-	if (upper_32_bits(mask))
+	if (mask & ((__u64)0xffffffff << 32))
 		return -EINVAL;
 
 	if (flags & ~FANOTIFY_MARK_FLAGS)
@@ -1629,7 +1164,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		return -EINVAL;
 	}
 
-	switch (mark_cmd) {
+	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
 	case FAN_MARK_ADD:
 	case FAN_MARK_REMOVE:
 		if (!mask)
@@ -1649,19 +1184,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	if (mask & ~valid_mask)
 		return -EINVAL;
 
-
-	/* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */
-	if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK))
-		return -EINVAL;
-
-	/*
-	 * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with
-	 * FAN_MARK_IGNORED_MASK.
-	 */
-	if (ignore == FAN_MARK_IGNORED_MASK) {
+	/* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */
+	if (ignored)
 		mask &= ~FANOTIFY_EVENT_FLAGS;
-		umask = FANOTIFY_EVENT_FLAGS;
-	}
 
 	f = fdget(fanotify_fd);
 	if (unlikely(!f.file))
@@ -1673,17 +1198,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		goto fput_and_out;
 	group = f.file->private_data;
 
-	/*
-	 * An unprivileged user is not allowed to setup mount nor filesystem
-	 * marks.  This also includes setting up such marks by a group that
-	 * was initialized by an unprivileged user.
-	 */
-	ret = -EPERM;
-	if ((!capable(CAP_SYS_ADMIN) ||
-	     FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) &&
-	    mark_type != FAN_MARK_INODE)
-		goto fput_and_out;
-
 	/*
 	 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF.  These are not
 	 * allowed to set permissions events.
@@ -1693,39 +1207,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	    group->priority == FS_PRIO_0)
 		goto fput_and_out;
 
-	if (mask & FAN_FS_ERROR &&
-	    mark_type != FAN_MARK_FILESYSTEM)
-		goto fput_and_out;
-
 	/*
-	 * Evictable is only relevant for inode marks, because only inode object
-	 * can be evicted on memory pressure.
-	 */
-	if (flags & FAN_MARK_EVICTABLE &&
-	     mark_type != FAN_MARK_INODE)
-		goto fput_and_out;
-
-	/*
-	 * Events that do not carry enough information to report
-	 * event->fd require a group that supports reporting fid.  Those
-	 * events are not supported on a mount mark, because they do not
-	 * carry enough information (i.e. path) to be filtered by mount
-	 * point.
+	 * Events with data type inode do not carry enough information to report
+	 * event->fd, so we do not allow setting a mask for inode events unless
+	 * group supports reporting fid.
+	 * inode events are not supported on a mount mark, because they do not
+	 * carry enough information (i.e. path) to be filtered by mount point.
 	 */
 	fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
-	if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
+	if (mask & FANOTIFY_INODE_EVENTS &&
 	    (!fid_mode || mark_type == FAN_MARK_MOUNT))
 		goto fput_and_out;
 
-	/*
-	 * FAN_RENAME uses special info type records to report the old and
-	 * new parent+name.  Reporting only old and new parent id is less
-	 * useful and was not implemented.
-	 */
-	if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
-		goto fput_and_out;
-
-	if (mark_cmd == FAN_MARK_FLUSH) {
+	if (flags & FAN_MARK_FLUSH) {
 		ret = 0;
 		if (mark_type == FAN_MARK_MOUNT)
 			fsnotify_clear_vfsmount_marks_by_group(group);
@@ -1741,18 +1235,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	if (ret)
 		goto fput_and_out;
 
-	if (mark_cmd == FAN_MARK_ADD) {
-		ret = fanotify_events_supported(group, &path, mask, flags);
+	if (flags & FAN_MARK_ADD) {
+		ret = fanotify_events_supported(&path, mask, flags);
 		if (ret)
 			goto path_put_and_out;
 	}
 
 	if (fid_mode) {
-		ret = fanotify_test_fsid(path.dentry, &__fsid);
-		if (ret)
-			goto path_put_and_out;
-
-		ret = fanotify_test_fid(path.dentry);
+		ret = fanotify_test_fid(&path, &__fsid);
 		if (ret)
 			goto path_put_and_out;
 
@@ -1765,13 +1255,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	else
 		mnt = path.mnt;
 
-	ret = mnt ? -EINVAL : -EISDIR;
-	/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
-	if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE &&
-	    (mnt || S_ISDIR(inode->i_mode)) &&
-	    !(flags & FAN_MARK_IGNORED_SURV_MODIFY))
-		goto path_put_and_out;
-
 	/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
 	if (mnt || !S_ISDIR(inode->i_mode)) {
 		mask &= ~FAN_EVENT_ON_CHILD;
@@ -1781,12 +1264,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		 * events with parent/name info for non-directory.
 		 */
 		if ((fid_mode & FAN_REPORT_DIR_FID) &&
-		    (flags & FAN_MARK_ADD) && !ignore)
+		    (flags & FAN_MARK_ADD) && !ignored)
 			mask |= FAN_EVENT_ON_CHILD;
 	}
 
 	/* create/update an inode mark */
-	switch (mark_cmd) {
+	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
 	case FAN_MARK_ADD:
 		if (mark_type == FAN_MARK_MOUNT)
 			ret = fanotify_add_vfsmount_mark(group, mnt, mask,
@@ -1847,24 +1330,8 @@ SYSCALL32_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-	struct sysinfo si;
-	int max_marks;
-
-	si_meminfo(&si);
-	/*
-	 * Allow up to 1% of addressable memory to be accounted for per user
-	 * marks limited to the range [8192, 1048576]. mount and sb marks are
-	 * a lot cheaper than inode marks, but there is no reason for a user
-	 * to have many of those, so calculate by the cost of inode marks.
-	 */
-	max_marks = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) /
-		    INODE_MARK_COST;
-	max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS,
-				     FANOTIFY_DEFAULT_MAX_USER_MARKS);
-
-	BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
 
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
 					 SLAB_PANIC|SLAB_ACCOUNT);
@@ -1877,11 +1344,6 @@ static int __init fanotify_user_setup(void)
 			KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
 	}
 
-	fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
-	init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
-					FANOTIFY_DEFAULT_MAX_GROUPS;
-	init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks;
-
 	return 0;
 }
 device_initcall(fanotify_user_setup);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 55081ae3a6ec..765b50aeadd2 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -14,7 +14,6 @@
 #include <linux/exportfs.h>
 
 #include "inotify/inotify.h"
-#include "fanotify/fanotify.h"
 #include "fdinfo.h"
 #include "fsnotify.h"
 
@@ -29,13 +28,13 @@ static void show_fdinfo(struct seq_file *m, struct file *f,
 	struct fsnotify_group *group = f->private_data;
 	struct fsnotify_mark *mark;
 
-	fsnotify_group_lock(group);
+	mutex_lock(&group->mark_mutex);
 	list_for_each_entry(mark, &group->marks_list, g_list) {
 		show(m, mark);
 		if (seq_has_overflowed(m))
 			break;
 	}
-	fsnotify_group_unlock(group);
+	mutex_unlock(&group->mark_mutex);
 }
 
 #if defined(CONFIG_EXPORTFS)
@@ -104,16 +103,19 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f)
 
 static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 {
-	unsigned int mflags = fanotify_mark_user_flags(mark);
+	unsigned int mflags = 0;
 	struct inode *inode;
 
+	if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
+		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
+
 	if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) {
 		inode = igrab(fsnotify_conn_inode(mark->connector));
 		if (!inode)
 			return;
 		seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
 			   inode->i_ino, inode->i_sb->s_dev,
-			   mflags, mark->mask, mark->ignore_mask);
+			   mflags, mark->mask, mark->ignored_mask);
 		show_mark_fhandle(m, inode);
 		seq_putc(m, '\n');
 		iput(inode);
@@ -121,12 +123,12 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		struct mount *mnt = fsnotify_conn_mount(mark->connector);
 
 		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
-			   mnt->mnt_id, mflags, mark->mask, mark->ignore_mask);
+			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
 	} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
 		struct super_block *sb = fsnotify_conn_sb(mark->connector);
 
 		seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
-			   sb->s_dev, mflags, mark->mask, mark->ignore_mask);
+			   sb->s_dev, mflags, mark->mask, mark->ignored_mask);
 	}
 }
 
@@ -135,8 +137,7 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 	struct fsnotify_group *group = f->private_data;
 
 	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
-		   group->fanotify_data.flags & FANOTIFY_INIT_FLAGS,
-		   group->fanotify_data.f_flags);
+		   group->fanotify_data.flags, group->fanotify_data.f_flags);
 
 	show_fdinfo(m, f, fanotify_fdinfo);
 }
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 7974e91ffe13..30d422b8c0fc 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -70,7 +70,8 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&sb->s_inode_list_lock);
 
-		iput(iput_inode);
+		if (iput_inode)
+			iput(iput_inode);
 
 		/* for each watch, send FS_UNMOUNT and then remove it */
 		fsnotify_inode(inode, FS_UNMOUNT);
@@ -84,23 +85,24 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
 	}
 	spin_unlock(&sb->s_inode_list_lock);
 
-	iput(iput_inode);
+	if (iput_inode)
+		iput(iput_inode);
+	/* Wait for outstanding inode references from connectors */
+	wait_var_event(&sb->s_fsnotify_inode_refs,
+		       !atomic_long_read(&sb->s_fsnotify_inode_refs));
 }
 
 void fsnotify_sb_delete(struct super_block *sb)
 {
 	fsnotify_unmount_inodes(sb);
 	fsnotify_clear_marks_by_sb(sb);
-	/* Wait for outstanding object references from connectors */
-	wait_var_event(&sb->s_fsnotify_connectors,
-		       !atomic_long_read(&sb->s_fsnotify_connectors));
 }
 
 /*
  * Given an inode, first check if we care what happens to our children.  Inotify
  * and dnotify both tell their parents about events.  If we care about any event
  * on a child we run all of our children and set a dentry flag saying that the
- * parent cares.  Thus when an event happens on a child it can quickly tell
+ * parent cares.  Thus when an event happens on a child it can quickly tell if
  * if there is a need to find a parent and send the event to the parent.
  */
 void __fsnotify_update_child_dentry_flags(struct inode *inode)
@@ -250,10 +252,7 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group,
 	if (WARN_ON_ONCE(!ops->handle_inode_event))
 		return 0;
 
-	if (WARN_ON_ONCE(!inode && !dir))
-		return 0;
-
-	if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) &&
+	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
 	    path && d_unlinked(path->dentry))
 		return 0;
 
@@ -277,28 +276,23 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
 	    WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
 		return 0;
 
-	/*
-	 * For FS_RENAME, 'dir' is old dir and 'data' is new dentry.
-	 * The only ->handle_inode_event() backend that supports FS_RENAME is
-	 * dnotify, where it means file was renamed within same parent.
-	 */
-	if (mask & FS_RENAME) {
-		struct dentry *moved = fsnotify_data_dentry(data, data_type);
-
-		if (dir != moved->d_parent->d_inode)
+	if (parent_mark) {
+		/*
+		 * parent_mark indicates that the parent inode is watching
+		 * children and interested in this event, which is an event
+		 * possible on child. But is *this mark* watching children and
+		 * interested in this event?
+		 */
+		if (parent_mark->mask & FS_EVENT_ON_CHILD) {
+			ret = fsnotify_handle_inode_event(group, parent_mark, mask,
+							  data, data_type, dir, name, 0);
+			if (ret)
+				return ret;
+		}
+		if (!inode_mark)
 			return 0;
 	}
 
-	if (parent_mark) {
-		ret = fsnotify_handle_inode_event(group, parent_mark, mask,
-						  data, data_type, dir, name, 0);
-		if (ret)
-			return ret;
-	}
-
-	if (!inode_mark)
-		return 0;
-
 	if (mask & FS_EVENT_ON_CHILD) {
 		/*
 		 * Some events can be sent on both parent dir and child marks
@@ -324,36 +318,42 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
 	struct fsnotify_group *group = NULL;
 	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
 	__u32 marks_mask = 0;
-	__u32 marks_ignore_mask = 0;
-	bool is_dir = mask & FS_ISDIR;
+	__u32 marks_ignored_mask = 0;
 	struct fsnotify_mark *mark;
 	int type;
 
-	if (!iter_info->report_mask)
+	if (WARN_ON(!iter_info->report_mask))
 		return 0;
 
 	/* clear ignored on inode modification */
 	if (mask & FS_MODIFY) {
-		fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
-			if (!(mark->flags &
-			      FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
-				mark->ignore_mask = 0;
+		fsnotify_foreach_obj_type(type) {
+			if (!fsnotify_iter_should_report_type(iter_info, type))
+				continue;
+			mark = iter_info->marks[type];
+			if (mark &&
+			    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+				mark->ignored_mask = 0;
 		}
 	}
 
-	/* Are any of the group marks interested in this event? */
-	fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
-		group = mark->group;
-		marks_mask |= mark->mask;
-		marks_ignore_mask |=
-			fsnotify_effective_ignore_mask(mark, is_dir, type);
+	fsnotify_foreach_obj_type(type) {
+		if (!fsnotify_iter_should_report_type(iter_info, type))
+			continue;
+		mark = iter_info->marks[type];
+		/* does the object mark tell us to do something? */
+		if (mark) {
+			group = mark->group;
+			marks_mask |= mark->mask;
+			marks_ignored_mask |= mark->ignored_mask;
+		}
 	}
 
-	pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
-		 __func__, group, mask, marks_mask, marks_ignore_mask,
+	pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
+		 __func__, group, mask, marks_mask, marks_ignored_mask,
 		 data, data_type, dir, cookie);
 
-	if (!(test_mask & marks_mask & ~marks_ignore_mask))
+	if (!(test_mask & marks_mask & ~marks_ignored_mask))
 		return 0;
 
 	if (group->ops->handle_event) {
@@ -390,11 +390,11 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
 
 /*
  * iter_info is a multi head priority queue of marks.
- * Pick a subset of marks from queue heads, all with the same group
- * and set the report_mask to a subset of the selected marks.
- * Returns false if there are no more groups to iterate.
+ * Pick a subset of marks from queue heads, all with the
+ * same group and set the report_mask for selected subset.
+ * Returns the report_mask of the selected subset.
  */
-static bool fsnotify_iter_select_report_types(
+static unsigned int fsnotify_iter_select_report_types(
 		struct fsnotify_iter_info *iter_info)
 {
 	struct fsnotify_group *max_prio_group = NULL;
@@ -402,7 +402,7 @@ static bool fsnotify_iter_select_report_types(
 	int type;
 
 	/* Choose max prio group among groups of all queue heads */
-	fsnotify_foreach_iter_type(type) {
+	fsnotify_foreach_obj_type(type) {
 		mark = iter_info->marks[type];
 		if (mark &&
 		    fsnotify_compare_groups(max_prio_group, mark->group) > 0)
@@ -410,49 +410,30 @@ static bool fsnotify_iter_select_report_types(
 	}
 
 	if (!max_prio_group)
-		return false;
+		return 0;
 
 	/* Set the report mask for marks from same group as max prio group */
-	iter_info->current_group = max_prio_group;
 	iter_info->report_mask = 0;
-	fsnotify_foreach_iter_type(type) {
+	fsnotify_foreach_obj_type(type) {
 		mark = iter_info->marks[type];
-		if (mark && mark->group == iter_info->current_group) {
-			/*
-			 * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode
-			 * is watching children and interested in this event,
-			 * which is an event possible on child.
-			 * But is *this mark* watching children?
-			 */
-			if (type == FSNOTIFY_ITER_TYPE_PARENT &&
-			    !(mark->mask & FS_EVENT_ON_CHILD) &&
-			    !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD))
-				continue;
-
+		if (mark &&
+		    fsnotify_compare_groups(max_prio_group, mark->group) == 0)
 			fsnotify_iter_set_report_type(iter_info, type);
-		}
 	}
 
-	return true;
+	return iter_info->report_mask;
 }
 
 /*
- * Pop from iter_info multi head queue, the marks that belong to the group of
+ * Pop from iter_info multi head queue, the marks that were iterated in the
  * current iteration step.
  */
 static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
 {
-	struct fsnotify_mark *mark;
 	int type;
 
-	/*
-	 * We cannot use fsnotify_foreach_iter_mark_type() here because we
-	 * may need to advance a mark of type X that belongs to current_group
-	 * but was not selected for reporting.
-	 */
-	fsnotify_foreach_iter_type(type) {
-		mark = iter_info->marks[type];
-		if (mark && mark->group == iter_info->current_group)
+	fsnotify_foreach_obj_type(type) {
+		if (fsnotify_iter_should_report_type(iter_info, type))
 			iter_info->marks[type] =
 				fsnotify_next_mark(iter_info->marks[type]);
 	}
@@ -474,20 +455,18 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
  *		@file_name is relative to
  * @file_name:	optional file name associated with event
  * @inode:	optional inode associated with event -
- *		If @dir and @inode are both non-NULL, event may be
- *		reported to both.
+ *		either @dir or @inode must be non-NULL.
+ *		if both are non-NULL event may be reported to both.
  * @cookie:	inotify rename cookie
  */
 int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	     const struct qstr *file_name, struct inode *inode, u32 cookie)
 {
 	const struct path *path = fsnotify_data_path(data, data_type);
-	struct super_block *sb = fsnotify_data_sb(data, data_type);
 	struct fsnotify_iter_info iter_info = {};
+	struct super_block *sb;
 	struct mount *mnt = NULL;
-	struct inode *inode2 = NULL;
-	struct dentry *moved;
-	int inode2_type;
+	struct inode *parent = NULL;
 	int ret = 0;
 	__u32 test_mask, marks_mask;
 
@@ -497,20 +476,14 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	if (!inode) {
 		/* Dirent event - report on TYPE_INODE to dir */
 		inode = dir;
-		/* For FS_RENAME, inode is old_dir and inode2 is new_dir */
-		if (mask & FS_RENAME) {
-			moved = fsnotify_data_dentry(data, data_type);
-			inode2 = moved->d_parent->d_inode;
-			inode2_type = FSNOTIFY_ITER_TYPE_INODE2;
-		}
 	} else if (mask & FS_EVENT_ON_CHILD) {
 		/*
 		 * Event on child - report on TYPE_PARENT to dir if it is
 		 * watching children and on TYPE_INODE to child.
 		 */
-		inode2 = dir;
-		inode2_type = FSNOTIFY_ITER_TYPE_PARENT;
+		parent = dir;
 	}
+	sb = inode->i_sb;
 
 	/*
 	 * Optimization: srcu_read_lock() has a memory barrier which can
@@ -522,7 +495,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	if (!sb->s_fsnotify_marks &&
 	    (!mnt || !mnt->mnt_fsnotify_marks) &&
 	    (!inode || !inode->i_fsnotify_marks) &&
-	    (!inode2 || !inode2->i_fsnotify_marks))
+	    (!parent || !parent->i_fsnotify_marks))
 		return 0;
 
 	marks_mask = sb->s_fsnotify_mask;
@@ -530,35 +503,33 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 		marks_mask |= mnt->mnt_fsnotify_mask;
 	if (inode)
 		marks_mask |= inode->i_fsnotify_mask;
-	if (inode2)
-		marks_mask |= inode2->i_fsnotify_mask;
+	if (parent)
+		marks_mask |= parent->i_fsnotify_mask;
 
 
 	/*
-	 * If this is a modify event we may need to clear some ignore masks.
-	 * In that case, the object with ignore masks will have the FS_MODIFY
-	 * event in its mask.
-	 * Otherwise, return if none of the marks care about this type of event.
+	 * if this is a modify event we may need to clear the ignored masks
+	 * otherwise return if none of the marks care about this type of event.
 	 */
 	test_mask = (mask & ALL_FSNOTIFY_EVENTS);
-	if (!(test_mask & marks_mask))
+	if (!(mask & FS_MODIFY) && !(test_mask & marks_mask))
 		return 0;
 
 	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
 
-	iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
+	iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
 		fsnotify_first_mark(&sb->s_fsnotify_marks);
 	if (mnt) {
-		iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
+		iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
 			fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
 	}
 	if (inode) {
-		iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] =
+		iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
 			fsnotify_first_mark(&inode->i_fsnotify_marks);
 	}
-	if (inode2) {
-		iter_info.marks[inode2_type] =
-			fsnotify_first_mark(&inode2->i_fsnotify_marks);
+	if (parent) {
+		iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] =
+			fsnotify_first_mark(&parent->i_fsnotify_marks);
 	}
 
 	/*
@@ -587,7 +558,7 @@ static __init int fsnotify_init(void)
 {
 	int ret;
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
 
 	ret = init_srcu_struct(&fsnotify_mark_srcu);
 	if (ret)
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index fde74eb333cc..ff2063ec6b0f 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -27,21 +27,6 @@ static inline struct super_block *fsnotify_conn_sb(
 	return container_of(conn->obj, struct super_block, s_fsnotify_marks);
 }
 
-static inline struct super_block *fsnotify_connector_sb(
-				struct fsnotify_mark_connector *conn)
-{
-	switch (conn->type) {
-	case FSNOTIFY_OBJ_TYPE_INODE:
-		return fsnotify_conn_inode(conn)->i_sb;
-	case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
-		return fsnotify_conn_mount(conn)->mnt.mnt_sb;
-	case FSNOTIFY_OBJ_TYPE_SB:
-		return fsnotify_conn_sb(conn);
-	default:
-		return NULL;
-	}
-}
-
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
@@ -76,6 +61,10 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
  */
 extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
 
+/* allocate and destroy and event holder to attach events to notification/access queues */
+extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
+extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
+
 extern struct kmem_cache *fsnotify_mark_connector_cachep;
 
 #endif	/* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 1de6631a3925..a4a4b1c64d32 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -58,7 +58,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
 	fsnotify_group_stop_queueing(group);
 
 	/* Clear all marks for this group and queue them for destruction */
-	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_ANY);
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES_MASK);
 
 	/*
 	 * Some marks can still be pinned when waiting for response from
@@ -88,7 +88,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
 	 * that deliberately ignores overflow events.
 	 */
 	if (group->overflow_event)
-		group->ops->free_event(group, group->overflow_event);
+		group->ops->free_event(group->overflow_event);
 
 	fsnotify_put_group(group);
 }
@@ -111,19 +111,20 @@ void fsnotify_put_group(struct fsnotify_group *group)
 }
 EXPORT_SYMBOL_GPL(fsnotify_put_group);
 
-static struct fsnotify_group *__fsnotify_alloc_group(
-				const struct fsnotify_ops *ops,
-				int flags, gfp_t gfp)
+/*
+ * Create a new fsnotify_group and hold a reference for the group returned.
+ */
+struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 {
-	static struct lock_class_key nofs_marks_lock;
 	struct fsnotify_group *group;
 
-	group = kzalloc(sizeof(struct fsnotify_group), gfp);
+	group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
 	if (!group)
 		return ERR_PTR(-ENOMEM);
 
 	/* set to 0 when there a no external references to this group */
 	refcount_set(&group->refcnt, 1);
+	atomic_set(&group->num_marks, 0);
 	atomic_set(&group->user_waits, 0);
 
 	spin_lock_init(&group->notification_lock);
@@ -135,32 +136,9 @@ static struct fsnotify_group *__fsnotify_alloc_group(
 	INIT_LIST_HEAD(&group->marks_list);
 
 	group->ops = ops;
-	group->flags = flags;
-	/*
-	 * For most backends, eviction of inode with a mark is not expected,
-	 * because marks hold a refcount on the inode against eviction.
-	 *
-	 * Use a different lockdep class for groups that support evictable
-	 * inode marks, because with evictable marks, mark_mutex is NOT
-	 * fs-reclaim safe - the mutex is taken when evicting inodes.
-	 */
-	if (flags & FSNOTIFY_GROUP_NOFS)
-		lockdep_set_class(&group->mark_mutex, &nofs_marks_lock);
 
 	return group;
 }
-
-/*
- * Create a new fsnotify_group and hold a reference for the group returned.
- */
-struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops,
-					    int flags)
-{
-	gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT :
-						    GFP_KERNEL;
-
-	return __fsnotify_alloc_group(ops, flags, gfp);
-}
 EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
 
 int fsnotify_fasync(int fd, struct file *file, int on)
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 7d5df7a21539..8f00151eb731 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -27,18 +27,11 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
  * userspace.  There is at least one bit (FS_EVENT_ON_CHILD) which is
  * used only internally to the kernel.
  */
-#define INOTIFY_USER_MASK (IN_ALL_EVENTS)
+#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)
 
 static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark)
 {
-	__u32 mask = fsn_mark->mask & INOTIFY_USER_MASK;
-
-	if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK)
-		mask |= IN_EXCL_UNLINK;
-	if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT)
-		mask |= IN_ONESHOT;
-
-	return mask;
+	return fsn_mark->mask & INOTIFY_USER_MASK;
 }
 
 extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 993375f0db67..66991c7fef9e 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -46,10 +46,9 @@ static bool event_compare(struct fsnotify_event *old_fsn,
 	return false;
 }
 
-static int inotify_merge(struct fsnotify_group *group,
-			 struct fsnotify_event *event)
+static int inotify_merge(struct list_head *list,
+			  struct fsnotify_event *event)
 {
-	struct list_head *list = &group->notification_list;
 	struct fsnotify_event *last_event;
 
 	last_event = list_entry(list->prev, struct fsnotify_event, list);
@@ -115,7 +114,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
 		mask &= ~IN_ISDIR;
 
 	fsn_event = &event->fse;
-	fsnotify_init_event(fsn_event);
+	fsnotify_init_event(fsn_event, 0);
 	event->mask = mask;
 	event->wd = wd;
 	event->sync_cookie = cookie;
@@ -129,7 +128,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
 		fsnotify_destroy_event(group, fsn_event);
 	}
 
-	if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT)
+	if (inode_mark->mask & IN_ONESHOT)
 		fsnotify_destroy_mark(inode_mark, group);
 
 	return 0;
@@ -184,8 +183,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
 		dec_inotify_instances(group->inotify_data.ucounts);
 }
 
-static void inotify_free_event(struct fsnotify_group *group,
-			       struct fsnotify_event *fsn_event)
+static void inotify_free_event(struct fsnotify_event *fsn_event)
 {
 	kfree(INOTIFY_E(fsn_event));
 }
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 67a9f3941f9b..9ea915e9d2a1 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -37,15 +37,6 @@
 
 #include <asm/ioctls.h>
 
-/*
- * An inotify watch requires allocating an inotify_inode_mark structure as
- * well as pinning the watched inode. Doubling the size of a VFS inode
- * should be more than enough to cover the additional filesystem inode
- * size increase.
- */
-#define INOTIFY_WATCH_COST	(sizeof(struct inotify_inode_mark) + \
-				 2 * sizeof(struct inode))
-
 /* configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_queued_events __read_mostly;
 
@@ -89,10 +80,10 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
 	__u32 mask;
 
 	/*
-	 * Everything should receive events when the inode is unmounted.
-	 * All directories care about children.
+	 * Everything should accept their own ignored and should receive events
+	 * when the inode is unmounted.  All directories care about children.
 	 */
-	mask = (FS_UNMOUNT);
+	mask = (FS_IN_IGNORED | FS_UNMOUNT);
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_EVENT_ON_CHILD;
 
@@ -102,28 +93,13 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
 	return mask;
 }
 
-#define INOTIFY_MARK_FLAGS \
-	(FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT)
-
-static inline unsigned int inotify_arg_to_flags(u32 arg)
-{
-	unsigned int flags = 0;
-
-	if (arg & IN_EXCL_UNLINK)
-		flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK;
-	if (arg & IN_ONESHOT)
-		flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT;
-
-	return flags;
-}
-
 static inline u32 inotify_mask_to_arg(__u32 mask)
 {
 	return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
 		       IN_Q_OVERFLOW);
 }
 
-/* inotify userspace file descriptor functions */
+/* intofiy userspace file descriptor functions */
 static __poll_t inotify_poll(struct file *file, poll_table *wait)
 {
 	struct fsnotify_group *group = file->private_data;
@@ -161,10 +137,11 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 	size_t event_size = sizeof(struct inotify_event);
 	struct fsnotify_event *event;
 
-	event = fsnotify_peek_first_event(group);
-	if (!event)
+	if (fsnotify_notify_queue_is_empty(group))
 		return NULL;
 
+	event = fsnotify_peek_first_event(group);
+
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
 	event_size += round_event_name_len(event);
@@ -366,7 +343,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path,
 	if (error)
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
-	error = path_permission(path, MAY_READ);
+	error = inode_permission(path->dentry->d_inode, MAY_READ);
 	if (error) {
 		path_put(path);
 		return error;
@@ -528,10 +505,13 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	struct fsnotify_mark *fsn_mark;
 	struct inotify_inode_mark *i_mark;
 	__u32 old_mask, new_mask;
-	int replace = !(arg & IN_MASK_ADD);
+	__u32 mask;
+	int add = (arg & IN_MASK_ADD);
 	int create = (arg & IN_MASK_CREATE);
 	int ret;
 
+	mask = inotify_arg_to_mask(inode, arg);
+
 	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
 	if (!fsn_mark)
 		return -ENOENT;
@@ -544,12 +524,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 
 	spin_lock(&fsn_mark->lock);
 	old_mask = fsn_mark->mask;
-	if (replace) {
-		fsn_mark->mask = 0;
-		fsn_mark->flags &= ~INOTIFY_MARK_FLAGS;
-	}
-	fsn_mark->mask |= inotify_arg_to_mask(inode, arg);
-	fsn_mark->flags |= inotify_arg_to_flags(arg);
+	if (add)
+		fsn_mark->mask |= mask;
+	else
+		fsn_mark->mask = mask;
 	new_mask = fsn_mark->mask;
 	spin_unlock(&fsn_mark->lock);
 
@@ -580,17 +558,19 @@ static int inotify_new_watch(struct fsnotify_group *group,
 			     u32 arg)
 {
 	struct inotify_inode_mark *tmp_i_mark;
+	__u32 mask;
 	int ret;
 	struct idr *idr = &group->inotify_data.idr;
 	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
 
+	mask = inotify_arg_to_mask(inode, arg);
+
 	tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
 	if (unlikely(!tmp_i_mark))
 		return -ENOMEM;
 
 	fsnotify_init_mark(&tmp_i_mark->fsn_mark, group);
-	tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg);
-	tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg);
+	tmp_i_mark->fsn_mark.mask = mask;
 	tmp_i_mark->wd = -1;
 
 	ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
@@ -627,13 +607,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
 {
 	int ret = 0;
 
-	fsnotify_group_lock(group);
+	mutex_lock(&group->mark_mutex);
 	/* try to update and existing watch with the new arg */
 	ret = inotify_update_existing_watch(group, inode, arg);
 	/* no mark present, try to add a new one */
 	if (ret == -ENOENT)
 		ret = inotify_new_watch(group, inode, arg);
-	fsnotify_group_unlock(group);
+	mutex_unlock(&group->mark_mutex);
 
 	return ret;
 }
@@ -643,18 +623,17 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 	struct fsnotify_group *group;
 	struct inotify_event_info *oevent;
 
-	group = fsnotify_alloc_group(&inotify_fsnotify_ops,
-				     FSNOTIFY_GROUP_USER);
+	group = fsnotify_alloc_group(&inotify_fsnotify_ops);
 	if (IS_ERR(group))
 		return group;
 
-	oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT);
+	oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL);
 	if (unlikely(!oevent)) {
 		fsnotify_destroy_group(group);
 		return ERR_PTR(-ENOMEM);
 	}
 	group->overflow_event = &oevent->fse;
-	fsnotify_init_event(group->overflow_event);
+	fsnotify_init_event(group->overflow_event, 0);
 	oevent->mask = FS_Q_OVERFLOW;
 	oevent->wd = -1;
 	oevent->sync_cookie = 0;
@@ -830,18 +809,6 @@ out:
  */
 static int __init inotify_user_setup(void)
 {
-	unsigned long watches_max;
-	struct sysinfo si;
-
-	si_meminfo(&si);
-	/*
-	 * Allow up to 1% of addressable memory to be allocated for inotify
-	 * watches (per user) limited to the range [8192, 1048576].
-	 */
-	watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) /
-			INOTIFY_WATCH_COST;
-	watches_max = clamp(watches_max, 8192UL, 1048576UL);
-
 	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
 	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
 	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
@@ -857,7 +824,9 @@ static int __init inotify_user_setup(void)
 	BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
 	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
 	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
+	BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
 	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
+	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 
 	BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
 
@@ -866,7 +835,7 @@ static int __init inotify_user_setup(void)
 
 	inotify_max_queued_events = 16384;
 	init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
-	init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;
+	init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
 
 	return 0;
 }
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index c74ef947447d..5b44be5f93dd 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -116,64 +116,20 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
 	return *fsnotify_conn_mask_p(conn);
 }
 
-static void fsnotify_get_inode_ref(struct inode *inode)
-{
-	ihold(inode);
-	atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
-}
-
-/*
- * Grab or drop inode reference for the connector if needed.
- *
- * When it's time to drop the reference, we only clear the HAS_IREF flag and
- * return the inode object. fsnotify_drop_object() will be resonsible for doing
- * iput() outside of spinlocks. This happens when last mark that wanted iref is
- * detached.
- */
-static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn,
-					  bool want_iref)
-{
-	bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF;
-	struct inode *inode = NULL;
-
-	if (conn->type != FSNOTIFY_OBJ_TYPE_INODE ||
-	    want_iref == has_iref)
-		return NULL;
-
-	if (want_iref) {
-		/* Pin inode if any mark wants inode refcount held */
-		fsnotify_get_inode_ref(fsnotify_conn_inode(conn));
-		conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF;
-	} else {
-		/* Unpin inode after detach of last mark that wanted iref */
-		inode = fsnotify_conn_inode(conn);
-		conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF;
-	}
-
-	return inode;
-}
-
-static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
+static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
-	bool want_iref = false;
 	struct fsnotify_mark *mark;
 
 	assert_spin_locked(&conn->lock);
 	/* We can get detached connector here when inode is getting unlinked. */
 	if (!fsnotify_valid_obj_type(conn->type))
-		return NULL;
+		return;
 	hlist_for_each_entry(mark, &conn->list, obj_list) {
-		if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
-			continue;
-		new_mask |= fsnotify_calc_mask(mark);
-		if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
-		    !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
-			want_iref = true;
+		if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
+			new_mask |= mark->mask;
 	}
 	*fsnotify_conn_mask_p(conn) = new_mask;
-
-	return fsnotify_update_iref(conn, want_iref);
 }
 
 /*
@@ -213,31 +169,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
 	}
 }
 
-static void fsnotify_put_inode_ref(struct inode *inode)
-{
-	struct super_block *sb = inode->i_sb;
-
-	iput(inode);
-	if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
-		wake_up_var(&sb->s_fsnotify_connectors);
-}
-
-static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn)
-{
-	struct super_block *sb = fsnotify_connector_sb(conn);
-
-	if (sb)
-		atomic_long_inc(&sb->s_fsnotify_connectors);
-}
-
-static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn)
-{
-	struct super_block *sb = fsnotify_connector_sb(conn);
-
-	if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
-		wake_up_var(&sb->s_fsnotify_connectors);
-}
-
 static void *fsnotify_detach_connector_from_object(
 					struct fsnotify_mark_connector *conn,
 					unsigned int *type)
@@ -251,17 +182,13 @@ static void *fsnotify_detach_connector_from_object(
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
 		inode = fsnotify_conn_inode(conn);
 		inode->i_fsnotify_mask = 0;
-
-		/* Unpin inode when detaching from connector */
-		if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF))
-			inode = NULL;
+		atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
 		fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
 	}
 
-	fsnotify_put_sb_connectors(conn);
 	rcu_assign_pointer(*(conn->obj), NULL);
 	conn->obj = NULL;
 	conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
@@ -282,12 +209,19 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
 /* Drop object reference originally held by a connector */
 static void fsnotify_drop_object(unsigned int type, void *objp)
 {
+	struct inode *inode;
+	struct super_block *sb;
+
 	if (!objp)
 		return;
 	/* Currently only inode references are passed to be dropped */
 	if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
 		return;
-	fsnotify_put_inode_ref(objp);
+	inode = objp;
+	sb = inode->i_sb;
+	iput(inode);
+	if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
+		wake_up_var(&sb->s_fsnotify_inode_refs);
 }
 
 void fsnotify_put_mark(struct fsnotify_mark *mark)
@@ -316,8 +250,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 		objp = fsnotify_detach_connector_from_object(conn, &type);
 		free_conn = true;
 	} else {
-		objp = __fsnotify_recalc_mask(conn);
-		type = conn->type;
+		__fsnotify_recalc_mask(conn);
 	}
 	WRITE_ONCE(mark->connector, NULL);
 	spin_unlock(&conn->lock);
@@ -396,7 +329,7 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
 {
 	int type;
 
-	fsnotify_foreach_iter_type(type) {
+	fsnotify_foreach_obj_type(type) {
 		/* This can fail if mark is being removed */
 		if (!fsnotify_get_mark_safe(iter_info->marks[type])) {
 			__release(&fsnotify_mark_srcu);
@@ -425,7 +358,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
 	int type;
 
 	iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
-	fsnotify_foreach_iter_type(type)
+	fsnotify_foreach_obj_type(type)
 		fsnotify_put_mark_wake(iter_info->marks[type]);
 }
 
@@ -441,7 +374,9 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
  */
 void fsnotify_detach_mark(struct fsnotify_mark *mark)
 {
-	fsnotify_group_assert_locked(mark->group);
+	struct fsnotify_group *group = mark->group;
+
+	WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
 	WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
 		     refcount_read(&mark->refcnt) < 1 +
 			!!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
@@ -456,6 +391,8 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
 	list_del_init(&mark->g_list);
 	spin_unlock(&mark->lock);
 
+	atomic_dec(&group->num_marks);
+
 	/* Drop mark reference acquired in fsnotify_add_mark_locked() */
 	fsnotify_put_mark(mark);
 }
@@ -493,9 +430,9 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
 void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 			   struct fsnotify_group *group)
 {
-	fsnotify_group_lock(group);
+	mutex_lock(&group->mark_mutex);
 	fsnotify_detach_mark(mark);
-	fsnotify_group_unlock(group);
+	mutex_unlock(&group->mark_mutex);
 	fsnotify_free_mark(mark);
 }
 EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
@@ -537,9 +474,10 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 }
 
 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
-					       unsigned int obj_type,
+					       unsigned int type,
 					       __kernel_fsid_t *fsid)
 {
+	struct inode *inode = NULL;
 	struct fsnotify_mark_connector *conn;
 
 	conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
@@ -547,8 +485,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 		return -ENOMEM;
 	spin_lock_init(&conn->lock);
 	INIT_HLIST_HEAD(&conn->list);
-	conn->flags = 0;
-	conn->type = obj_type;
+	conn->type = type;
 	conn->obj = connp;
 	/* Cache fsid of filesystem containing the object */
 	if (fsid) {
@@ -558,15 +495,16 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 		conn->fsid.val[0] = conn->fsid.val[1] = 0;
 		conn->flags = 0;
 	}
-	fsnotify_get_sb_connectors(conn);
-
+	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
+		inode = igrab(fsnotify_conn_inode(conn));
 	/*
 	 * cmpxchg() provides the barrier so that readers of *connp can see
 	 * only initialized structure
 	 */
 	if (cmpxchg(connp, NULL, conn)) {
 		/* Someone else created list structure for us */
-		fsnotify_put_sb_connectors(conn);
+		if (inode)
+			iput(inode);
 		kmem_cache_free(fsnotify_mark_connector_cachep, conn);
 	}
 
@@ -607,16 +545,15 @@ out:
  * priority, highest number first, and then by the group's location in memory.
  */
 static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
-				  fsnotify_connp_t *connp,
-				  unsigned int obj_type,
-				  int add_flags, __kernel_fsid_t *fsid)
+				  fsnotify_connp_t *connp, unsigned int type,
+				  int allow_dups, __kernel_fsid_t *fsid)
 {
 	struct fsnotify_mark *lmark, *last = NULL;
 	struct fsnotify_mark_connector *conn;
 	int cmp;
 	int err = 0;
 
-	if (WARN_ON(!fsnotify_valid_obj_type(obj_type)))
+	if (WARN_ON(!fsnotify_valid_obj_type(type)))
 		return -EINVAL;
 
 	/* Backend is expected to check for zero fsid (e.g. tmpfs) */
@@ -628,8 +565,7 @@ restart:
 	conn = fsnotify_grab_connector(connp);
 	if (!conn) {
 		spin_unlock(&mark->lock);
-		err = fsnotify_attach_connector_to_object(connp, obj_type,
-							  fsid);
+		err = fsnotify_attach_connector_to_object(connp, type, fsid);
 		if (err)
 			return err;
 		goto restart;
@@ -668,7 +604,7 @@ restart:
 
 		if ((lmark->group == mark->group) &&
 		    (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
-		    !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) {
+		    !allow_dups) {
 			err = -EEXIST;
 			goto out_err;
 		}
@@ -702,13 +638,13 @@ out_err:
  * event types should be delivered to which group.
  */
 int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
-			     fsnotify_connp_t *connp, unsigned int obj_type,
-			     int add_flags, __kernel_fsid_t *fsid)
+			     fsnotify_connp_t *connp, unsigned int type,
+			     int allow_dups, __kernel_fsid_t *fsid)
 {
 	struct fsnotify_group *group = mark->group;
 	int ret = 0;
 
-	fsnotify_group_assert_locked(group);
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 
 	/*
 	 * LOCKING ORDER!!!!
@@ -720,14 +656,16 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
 
 	list_add(&mark->g_list, &group->marks_list);
+	atomic_inc(&group->num_marks);
 	fsnotify_get_mark(mark); /* for g_list */
 	spin_unlock(&mark->lock);
 
-	ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid);
+	ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid);
 	if (ret)
 		goto err;
 
-	fsnotify_recalc_mask(mark->connector);
+	if (mark->mask)
+		fsnotify_recalc_mask(mark->connector);
 
 	return ret;
 err:
@@ -736,21 +674,21 @@ err:
 			 FSNOTIFY_MARK_FLAG_ATTACHED);
 	list_del_init(&mark->g_list);
 	spin_unlock(&mark->lock);
+	atomic_dec(&group->num_marks);
 
 	fsnotify_put_mark(mark);
 	return ret;
 }
 
 int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
-		      unsigned int obj_type, int add_flags,
-		      __kernel_fsid_t *fsid)
+		      unsigned int type, int allow_dups, __kernel_fsid_t *fsid)
 {
 	int ret;
 	struct fsnotify_group *group = mark->group;
 
-	fsnotify_group_lock(group);
-	ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid);
-	fsnotify_group_unlock(group);
+	mutex_lock(&group->mark_mutex);
+	ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid);
+	mutex_unlock(&group->mark_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fsnotify_add_mark);
@@ -784,14 +722,14 @@ EXPORT_SYMBOL_GPL(fsnotify_find_mark);
 
 /* Clear any marks in a group with given type mask */
 void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
-				   unsigned int obj_type)
+				   unsigned int type_mask)
 {
 	struct fsnotify_mark *lmark, *mark;
 	LIST_HEAD(to_free);
 	struct list_head *head = &to_free;
 
 	/* Skip selection step if we want to clear all marks. */
-	if (obj_type == FSNOTIFY_OBJ_TYPE_ANY) {
+	if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) {
 		head = &group->marks_list;
 		goto clear;
 	}
@@ -804,24 +742,24 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
 	 * move marks to free to to_free list in one go and then free marks in
 	 * to_free list one by one.
 	 */
-	fsnotify_group_lock(group);
+	mutex_lock(&group->mark_mutex);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
-		if (mark->connector->type == obj_type)
+		if ((1U << mark->connector->type) & type_mask)
 			list_move(&mark->g_list, &to_free);
 	}
-	fsnotify_group_unlock(group);
+	mutex_unlock(&group->mark_mutex);
 
 clear:
 	while (1) {
-		fsnotify_group_lock(group);
+		mutex_lock(&group->mark_mutex);
 		if (list_empty(head)) {
-			fsnotify_group_unlock(group);
+			mutex_unlock(&group->mark_mutex);
 			break;
 		}
 		mark = list_first_entry(head, struct fsnotify_mark, g_list);
 		fsnotify_get_mark(mark);
 		fsnotify_detach_mark(mark);
-		fsnotify_group_unlock(group);
+		mutex_unlock(&group->mark_mutex);
 		fsnotify_free_mark(mark);
 		fsnotify_put_mark(mark);
 	}
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 9022ae650cf8..75d79d6d3ef0 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -47,6 +47,13 @@ u32 fsnotify_get_cookie(void)
 }
 EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
 
+/* return true if the notify queue is empty, false otherwise */
+bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
+{
+	assert_spin_locked(&group->notification_lock);
+	return list_empty(&group->notification_list) ? true : false;
+}
+
 void fsnotify_destroy_event(struct fsnotify_group *group,
 			    struct fsnotify_event *event)
 {
@@ -64,26 +71,20 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
 		WARN_ON(!list_empty(&event->list));
 		spin_unlock(&group->notification_lock);
 	}
-	group->ops->free_event(group, event);
+	group->ops->free_event(event);
 }
 
 /*
- * Try to add an event to the notification queue.
- * The group can later pull this event off the queue to deal with.
- * The group can use the @merge hook to merge the event with a queued event.
- * The group can use the @insert hook to insert the event into hash table.
- * The function returns:
- * 0 if the event was added to a queue
- * 1 if the event was merged with some other queued event
+ * Add an event to the group notification queue.  The group can later pull this
+ * event off the queue to deal with.  The function returns 0 if the event was
+ * added to the queue, 1 if the event was merged with some other queued event,
  * 2 if the event was not queued - either the queue of events has overflown
- *   or the group is shutting down.
+ * or the group is shutting down.
  */
-int fsnotify_insert_event(struct fsnotify_group *group,
-			  struct fsnotify_event *event,
-			  int (*merge)(struct fsnotify_group *,
-				       struct fsnotify_event *),
-			  void (*insert)(struct fsnotify_group *,
-					 struct fsnotify_event *))
+int fsnotify_add_event(struct fsnotify_group *group,
+		       struct fsnotify_event *event,
+		       int (*merge)(struct list_head *,
+				    struct fsnotify_event *))
 {
 	int ret = 0;
 	struct list_head *list = &group->notification_list;
@@ -110,7 +111,7 @@ int fsnotify_insert_event(struct fsnotify_group *group,
 	}
 
 	if (!list_empty(list) && merge) {
-		ret = merge(group, event);
+		ret = merge(list, event);
 		if (ret) {
 			spin_unlock(&group->notification_lock);
 			return ret;
@@ -120,8 +121,6 @@ int fsnotify_insert_event(struct fsnotify_group *group,
 queue:
 	group->q_len++;
 	list_add_tail(&event->list, list);
-	if (insert)
-		insert(group, event);
 	spin_unlock(&group->notification_lock);
 
 	wake_up(&group->notification_waitq);
@@ -141,39 +140,36 @@ void fsnotify_remove_queued_event(struct fsnotify_group *group,
 	group->q_len--;
 }
 
-/*
- * Return the first event on the notification list without removing it.
- * Returns NULL if the list is empty.
- */
-struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
-{
-	assert_spin_locked(&group->notification_lock);
-
-	if (fsnotify_notify_queue_is_empty(group))
-		return NULL;
-
-	return list_first_entry(&group->notification_list,
-				struct fsnotify_event, list);
-}
-
 /*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
  */
 struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
 {
-	struct fsnotify_event *event = fsnotify_peek_first_event(group);
+	struct fsnotify_event *event;
 
-	if (!event)
-		return NULL;
+	assert_spin_locked(&group->notification_lock);
 
-	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+	pr_debug("%s: group=%p\n", __func__, group);
 
+	event = list_first_entry(&group->notification_list,
+				 struct fsnotify_event, list);
 	fsnotify_remove_queued_event(group, event);
-
 	return event;
 }
 
+/*
+ * This will not remove the event, that must be done with
+ * fsnotify_remove_first_event()
+ */
+struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
+{
+	assert_spin_locked(&group->notification_lock);
+
+	return list_first_entry(&group->notification_list,
+				struct fsnotify_event, list);
+}
+
 /*
  * Called when a group is being torn down to clean up any outstanding
  * event notifications.
diff --git a/fs/open.c b/fs/open.c
index 3ad0c6c8f5e7..965230a0710c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -493,7 +493,7 @@ retry:
 	if (error)
 		goto out;
 
-	error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
@@ -522,7 +522,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 	if (!d_can_lookup(f.file->f_path.dentry))
 		goto out_putf;
 
-	error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR);
 	if (!error)
 		set_fs_pwd(current->fs, &f.file->f_path);
 out_putf:
@@ -541,7 +541,7 @@ retry:
 	if (error)
 		goto out;
 
-	error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
 	if (error)
 		goto dput_and_out;
 
@@ -965,47 +965,6 @@ struct file *dentry_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(dentry_open);
 
-/**
- * dentry_create - Create and open a file
- * @path: path to create
- * @flags: O_ flags
- * @mode: mode bits for new file
- * @cred: credentials to use
- *
- * Caller must hold the parent directory's lock, and have prepared
- * a negative dentry, placed in @path->dentry, for the new file.
- *
- * Caller sets @path->mnt to the vfsmount of the filesystem where
- * the new file is to be created. The parent directory and the
- * negative dentry must reside on the same filesystem instance.
- *
- * On success, returns a "struct file *". Otherwise a ERR_PTR
- * is returned.
- */
-struct file *dentry_create(const struct path *path, int flags, umode_t mode,
-			   const struct cred *cred)
-{
-	struct file *f;
-	int error;
-
-	validate_creds(cred);
-	f = alloc_empty_file(flags, cred);
-	if (IS_ERR(f))
-		return f;
-
-	error = vfs_create(d_inode(path->dentry->d_parent),
-			   path->dentry, mode, true);
-	if (!error)
-		error = vfs_open(path, f);
-
-	if (unlikely(error)) {
-		fput(f);
-		return ERR_PTR(error);
-	}
-	return f;
-}
-EXPORT_SYMBOL(dentry_create);
-
 struct file *open_with_fake_path(const struct path *path, int flags,
 				struct inode *inode, const struct cred *cred)
 {
@@ -1382,7 +1341,7 @@ EXPORT_SYMBOL(filp_close);
  */
 SYSCALL_DEFINE1(close, unsigned int, fd)
 {
-	int retval = close_fd(fd);
+	int retval = __close_fd(current->files, fd);
 
 	/* can't restart close syscall because file table entry was cleared */
 	if (unlikely(retval == -ERESTARTSYS ||
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 520a6bdaf429..a9f9923a725d 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -214,16 +214,9 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
 				unsigned int flags)
 {
 	int err;
-	struct renamedata rd = {
-		.old_dir 	= olddir,
-		.old_dentry 	= olddentry,
-		.new_dir 	= newdir,
-		.new_dentry 	= newdentry,
-		.flags 		= flags,
-	};
 
 	pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
-	err = vfs_rename(&rd);
+	err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
 	if (err) {
 		pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
 			 olddentry, newdentry, err);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 35b92009c1cc..6b634c0a9b6e 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -29,13 +29,14 @@ static int seq_show(struct seq_file *m, void *v)
 	if (!task)
 		return -ENOENT;
 
-	task_lock(task);
-	files = task->files;
+	files = get_files_struct(task);
+	put_task_struct(task);
+
 	if (files) {
 		unsigned int fd = proc_fd(m->private);
 
 		spin_lock(&files->file_lock);
-		file = files_lookup_fd_locked(files, fd);
+		file = fcheck_files(files, fd);
 		if (file) {
 			struct fdtable *fdt = files_fdtable(files);
 
@@ -47,9 +48,8 @@ static int seq_show(struct seq_file *m, void *v)
 			ret = 0;
 		}
 		spin_unlock(&files->file_lock);
+		put_files_struct(files);
 	}
-	task_unlock(task);
-	put_task_struct(task);
 
 	if (ret)
 		return ret;
@@ -59,7 +59,6 @@ static int seq_show(struct seq_file *m, void *v)
 		   real_mount(file->f_path.mnt)->mnt_id,
 		   file_inode(file)->i_ino);
 
-	/* show_fd_locks() never deferences files so a stale value is safe */
 	show_fd_locks(m, file, files);
 	if (seq_has_overflowed(m))
 		goto out;
@@ -108,13 +107,18 @@ static const struct file_operations proc_fdinfo_file_operations = {
 
 static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
 {
+	struct files_struct *files = get_files_struct(task);
 	struct file *file;
 
+	if (!files)
+		return false;
+
 	rcu_read_lock();
-	file = task_lookup_fd_rcu(task, fd);
+	file = fcheck_files(files, fd);
 	if (file)
 		*mode = file->f_mode;
 	rcu_read_unlock();
+	put_files_struct(files);
 	return !!file;
 }
 
@@ -166,22 +170,29 @@ static const struct dentry_operations tid_fd_dentry_operations = {
 
 static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
+	struct files_struct *files = NULL;
 	struct task_struct *task;
 	int ret = -ENOENT;
 
 	task = get_proc_task(d_inode(dentry));
 	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+
+	if (files) {
 		unsigned int fd = proc_fd(d_inode(dentry));
 		struct file *fd_file;
 
-		fd_file = fget_task(task, fd);
+		spin_lock(&files->file_lock);
+		fd_file = fcheck_files(files, fd);
 		if (fd_file) {
 			*path = fd_file->f_path;
 			path_get(&fd_file->f_path);
 			ret = 0;
-			fput(fd_file);
 		}
-		put_task_struct(task);
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
 	}
 
 	return ret;
@@ -242,6 +253,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
 			      instantiate_t instantiate)
 {
 	struct task_struct *p = get_proc_task(file_inode(file));
+	struct files_struct *files;
 	unsigned int fd;
 
 	if (!p)
@@ -249,18 +261,22 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
 
 	if (!dir_emit_dots(file, ctx))
 		goto out;
+	files = get_files_struct(p);
+	if (!files)
+		goto out;
 
 	rcu_read_lock();
-	for (fd = ctx->pos - 2;; fd++) {
+	for (fd = ctx->pos - 2;
+	     fd < files_fdtable(files)->max_fds;
+	     fd++, ctx->pos++) {
 		struct file *f;
 		struct fd_data data;
 		char name[10 + 1];
 		unsigned int len;
 
-		f = task_lookup_next_fd_rcu(p, &fd);
-		ctx->pos = fd + 2LL;
+		f = fcheck_files(files, fd);
 		if (!f)
-			break;
+			continue;
 		data.mode = f->f_mode;
 		rcu_read_unlock();
 		data.fd = fd;
@@ -269,11 +285,13 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
 		if (!proc_fill_cache(file, ctx,
 				     name, len, instantiate, p,
 				     &data))
-			goto out;
+			goto out_fd_loop;
 		cond_resched();
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
+out_fd_loop:
+	put_files_struct(files);
 out:
 	put_task_struct(p);
 	return 0;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 25f7c915f22b..e283a62701b8 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -181,7 +181,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	long old_block, new_block;
 	int result;
 
-	if (file_permission(filp, MAY_READ) != 0) {
+	if (inode_permission(inode, MAY_READ) != 0) {
 		udf_debug("no permission to access inode %lu\n", inode->i_ino);
 		return -EPERM;
 	}
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index dfe8acc32df6..dbabea77efc0 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -369,7 +369,7 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
 	 * has verity enabled, and to stabilize the data being hashed.
 	 */
 
-	err = file_permission(filp, MAY_WRITE);
+	err = inode_permission(inode, MAY_WRITE);
 	if (err)
 		return err;
 
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index b87c3b85a166..0aad774beaec 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -26,7 +26,7 @@ struct dnotify_struct {
 			    FS_MODIFY | FS_MODIFY_CHILD |\
 			    FS_ACCESS | FS_ACCESS_CHILD |\
 			    FS_ATTRIB | FS_ATTRIB_CHILD |\
-			    FS_CREATE | FS_RENAME |\
+			    FS_CREATE | FS_DN_RENAME |\
 			    FS_MOVED_FROM | FS_MOVED_TO)
 
 extern int dir_notify_enable;
diff --git a/include/linux/errno.h b/include/linux/errno.h
index 8b0c754bab02..d73f597a2484 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -31,6 +31,5 @@
 #define EJUKEBOX	528	/* Request initiated, but will not complete before timeout */
 #define EIOCBQUEUED	529	/* iocb queued, will get completion event */
 #define ERECALLCONFLICT	530	/* conflict with recalled state */
-#define ENOGRACE	531	/* NFS file lock reclaim refused */
 
 #endif
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 218fc5c54e90..3ceb72b67a7a 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -213,27 +213,12 @@ struct export_operations {
 			  bool write, u32 *device_generation);
 	int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
 			     int nr_iomaps, struct iattr *iattr);
-	u64 (*fetch_iversion)(struct inode *);
-#define	EXPORT_OP_NOWCC			(0x1) /* don't collect v3 wcc data */
-#define	EXPORT_OP_NOSUBTREECHK		(0x2) /* no subtree checking */
-#define	EXPORT_OP_CLOSE_BEFORE_UNLINK	(0x4) /* close files before unlink */
-#define EXPORT_OP_REMOTE_FS		(0x8) /* Filesystem is remote */
-#define EXPORT_OP_NOATOMIC_ATTR		(0x10) /* Filesystem cannot supply
-						  atomic attribute updates
-						*/
-#define EXPORT_OP_FLUSH_ON_CLOSE	(0x20) /* fs flushes file data on close */
-	unsigned long	flags;
 };
 
 extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
 				    int *max_len, struct inode *parent);
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
 	int *max_len, int connectable);
-extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt,
-					     struct fid *fid, int fh_len,
-					     int fileid_type,
-					     int (*acceptable)(void *, struct dentry *),
-					     void *context);
 extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
 	int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *),
 	void *context);
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 558844c8d259..3e9c56ee651f 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -2,11 +2,8 @@
 #ifndef _LINUX_FANOTIFY_H
 #define _LINUX_FANOTIFY_H
 
-#include <linux/sysctl.h>
 #include <uapi/linux/fanotify.h>
 
-extern struct ctl_table fanotify_table[]; /* for sysctl */
-
 #define FAN_GROUP_FLAG(group, flag) \
 	((group)->fanotify_data.flags & (flag))
 
@@ -18,62 +15,27 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
  * these constant, the programs may break if re-compiled with new uapi headers
  * and then run on an old kernel.
  */
-
-/* Group classes where permission events are allowed */
-#define FANOTIFY_PERM_CLASSES	(FAN_CLASS_CONTENT | \
+#define FANOTIFY_CLASS_BITS	(FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
 				 FAN_CLASS_PRE_CONTENT)
 
-#define FANOTIFY_CLASS_BITS	(FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES)
+#define FANOTIFY_FID_BITS	(FAN_REPORT_FID | FAN_REPORT_DFID_NAME)
 
-#define FANOTIFY_FID_BITS	(FAN_REPORT_DFID_NAME_TARGET)
-
-#define FANOTIFY_INFO_MODES	(FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
-
-/*
- * fanotify_init() flags that require CAP_SYS_ADMIN.
- * We do not allow unprivileged groups to request permission events.
- * We do not allow unprivileged groups to get other process pid in events.
- * We do not allow unprivileged groups to use unlimited resources.
- */
-#define FANOTIFY_ADMIN_INIT_FLAGS	(FANOTIFY_PERM_CLASSES | \
-					 FAN_REPORT_TID | \
-					 FAN_REPORT_PIDFD | \
-					 FAN_UNLIMITED_QUEUE | \
-					 FAN_UNLIMITED_MARKS)
-
-/*
- * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN.
- * FAN_CLASS_NOTIF is the only class we allow for unprivileged group.
- * We do not allow unprivileged groups to get file descriptors in events,
- * so one of the flags for reporting file handles is required.
- */
-#define FANOTIFY_USER_INIT_FLAGS	(FAN_CLASS_NOTIF | \
-					 FANOTIFY_FID_BITS | \
-					 FAN_CLOEXEC | FAN_NONBLOCK)
-
-#define FANOTIFY_INIT_FLAGS	(FANOTIFY_ADMIN_INIT_FLAGS | \
-				 FANOTIFY_USER_INIT_FLAGS)
-
-/* Internal group flags */
-#define FANOTIFY_UNPRIV		0x80000000
-#define FANOTIFY_INTERNAL_GROUP_FLAGS	(FANOTIFY_UNPRIV)
+#define FANOTIFY_INIT_FLAGS	(FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \
+				 FAN_REPORT_TID | \
+				 FAN_CLOEXEC | FAN_NONBLOCK | \
+				 FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
 
 #define FANOTIFY_MARK_TYPE_BITS	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
 				 FAN_MARK_FILESYSTEM)
 
-#define FANOTIFY_MARK_CMD_BITS	(FAN_MARK_ADD | FAN_MARK_REMOVE | \
-				 FAN_MARK_FLUSH)
-
-#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \
-				   FAN_MARK_IGNORE)
-
 #define FANOTIFY_MARK_FLAGS	(FANOTIFY_MARK_TYPE_BITS | \
-				 FANOTIFY_MARK_CMD_BITS | \
-				 FANOTIFY_MARK_IGNORE_BITS | \
+				 FAN_MARK_ADD | \
+				 FAN_MARK_REMOVE | \
 				 FAN_MARK_DONT_FOLLOW | \
 				 FAN_MARK_ONLYDIR | \
+				 FAN_MARK_IGNORED_MASK | \
 				 FAN_MARK_IGNORED_SURV_MODIFY | \
-				 FAN_MARK_EVICTABLE)
+				 FAN_MARK_FLUSH)
 
 /*
  * Events that can be reported with data type FSNOTIFY_EVENT_PATH.
@@ -87,23 +49,15 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
  * Directory entry modification events - reported only to directory
  * where entry is modified and not to a watching parent.
  */
-#define FANOTIFY_DIRENT_EVENTS	(FAN_MOVE | FAN_CREATE | FAN_DELETE | \
-				 FAN_RENAME)
-
-/* Events that can be reported with event->fd */
-#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
+#define FANOTIFY_DIRENT_EVENTS	(FAN_MOVE | FAN_CREATE | FAN_DELETE)
 
 /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */
 #define FANOTIFY_INODE_EVENTS	(FANOTIFY_DIRENT_EVENTS | \
 				 FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)
 
-/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
-#define FANOTIFY_ERROR_EVENTS	(FAN_FS_ERROR)
-
 /* Events that user can request to be notified on */
 #define FANOTIFY_EVENTS		(FANOTIFY_PATH_EVENTS | \
-				 FANOTIFY_INODE_EVENTS | \
-				 FANOTIFY_ERROR_EVENTS)
+				 FANOTIFY_INODE_EVENTS)
 
 /* Events that require a permission response from user */
 #define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM | \
@@ -117,10 +71,6 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
 					 FANOTIFY_PERM_EVENTS | \
 					 FAN_Q_OVERFLOW | FAN_ONDIR)
 
-/* Events and flags relevant only for directories */
-#define FANOTIFY_DIRONLY_EVENT_BITS	(FANOTIFY_DIRENT_EVENTS | \
-					 FAN_EVENT_ON_CHILD | FAN_ONDIR)
-
 #define ALL_FANOTIFY_EVENT_BITS		(FANOTIFY_OUTGOING_EVENTS | \
 					 FANOTIFY_EVENT_FLAGS)
 
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 4ed3589f9294..f1a99d3e5570 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -80,7 +80,7 @@ struct dentry;
 /*
  * The caller must ensure that fd table isn't shared or hold rcu or file lock
  */
-static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
+static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd)
 {
 	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
 
@@ -91,40 +91,37 @@ static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsig
 	return NULL;
 }
 
-static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
+static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd)
 {
-	RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock),
+	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
+			   !lockdep_is_held(&files->file_lock),
 			   "suspicious rcu_dereference_check() usage");
-	return files_lookup_fd_raw(files, fd);
+	return __fcheck_files(files, fd);
 }
 
-static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd)
-{
-	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
-			   "suspicious rcu_dereference_check() usage");
-	return files_lookup_fd_raw(files, fd);
-}
-
-static inline struct file *lookup_fd_rcu(unsigned int fd)
-{
-	return files_lookup_fd_rcu(current->files, fd);
-}
-
-struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd);
-struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd);
+/*
+ * Check whether the specified fd has an open file.
+ */
+#define fcheck(fd)	fcheck_files(current->files, fd)
 
 struct task_struct;
 
 struct files_struct *get_files_struct(struct task_struct *);
 void put_files_struct(struct files_struct *fs);
-int unshare_files(void);
+void reset_files_struct(struct files_struct *);
+int unshare_files(struct files_struct **);
 struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
 void do_close_on_exec(struct files_struct *);
 int iterate_fd(struct files_struct *, unsigned,
 		int (*)(const void *, struct file *, unsigned),
 		const void *);
 
-extern int close_fd(unsigned int fd);
+extern int __alloc_fd(struct files_struct *files,
+		      unsigned start, unsigned end, unsigned flags);
+extern void __fd_install(struct files_struct *files,
+		      unsigned int fd, struct file *file);
+extern int __close_fd(struct files_struct *files,
+		      unsigned int fd);
 extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
 extern int close_fd_get_file(unsigned int fd, struct file **res);
 extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ec6de06ead4c..c61553833fb9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1020,7 +1020,6 @@ static inline struct file *get_file(struct file *f)
 #define FL_UNLOCK_PENDING	512 /* Lease is being broken */
 #define FL_OFDLCK	1024	/* lock is "owned" by struct file */
 #define FL_LAYOUT	2048	/* outstanding pNFS layout */
-#define FL_RECLAIM	4096	/* reclaiming from a reboot server */
 
 #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
 
@@ -1044,7 +1043,6 @@ struct file_lock_operations {
 };
 
 struct lock_manager_operations {
-	void *lm_mod_owner;
 	fl_owner_t (*lm_get_owner)(fl_owner_t);
 	void (*lm_put_owner)(fl_owner_t);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
@@ -1053,8 +1051,6 @@ struct lock_manager_operations {
 	int (*lm_change)(struct file_lock *, int, struct list_head *);
 	void (*lm_setup)(struct file_lock *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
-	bool (*lm_lock_expirable)(struct file_lock *cfl);
-	void (*lm_expire_lock)(void);
 
 	ANDROID_KABI_RESERVE(1);
 	ANDROID_KABI_RESERVE(2);
@@ -1200,15 +1196,6 @@ extern void lease_unregister_notifier(struct notifier_block *);
 struct files_struct;
 extern void show_fd_locks(struct seq_file *f,
 			 struct file *filp, struct files_struct *files);
-extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
-			fl_owner_t owner);
-
-static inline struct file_lock_context *
-locks_inode_context(const struct inode *inode)
-{
-	return smp_load_acquire(&inode->i_flctx);
-}
-
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, unsigned int cmd,
 			      struct flock __user *user)
@@ -1349,18 +1336,6 @@ static inline int lease_modify(struct file_lock *fl, int arg,
 struct files_struct;
 static inline void show_fd_locks(struct seq_file *f,
 			struct file *filp, struct files_struct *files) {}
-static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
-			fl_owner_t owner)
-{
-	return false;
-}
-
-static inline struct file_lock_context *
-locks_inode_context(const struct inode *inode)
-{
-	return NULL;
-}
-
 #endif /* !CONFIG_FILE_LOCKING */
 
 static inline struct inode *file_inode(const struct file *f)
@@ -1580,11 +1555,8 @@ struct super_block {
 	/* Number of inodes with nlink == 0 but still referenced */
 	atomic_long_t s_remove_count;
 
-	/*
-	 * Number of inode/mount/sb objects that are being watched, note that
-	 * inodes objects are currently double-accounted.
-	 */
-	atomic_long_t s_fsnotify_connectors;
+	/* Pending fsnotify inode refs */
+	atomic_long_t s_fsnotify_inode_refs;
 
 	/* Being remounted read-only */
 	int s_readonly_remount;
@@ -1856,17 +1828,7 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *);
 extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
-
-struct renamedata {
-	struct inode *old_dir;
-	struct dentry *old_dentry;
-	struct inode *new_dir;
-	struct dentry *new_dentry;
-	struct inode **delegated_inode;
-	unsigned int flags;
-} __randomize_layout;
-
-int vfs_rename(struct renamedata *);
+extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 
 static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
 {
@@ -2700,8 +2662,6 @@ extern struct file *filp_open_block(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern struct file *dentry_create(const struct path *path, int flags,
-				  umode_t mode, const struct cred *cred);
 extern struct file * open_with_fake_path(const struct path *, int,
 					 struct inode*, const struct cred *);
 static inline struct file *file_clone_open(struct file *file)
@@ -2932,14 +2892,6 @@ static inline int bmap(struct inode *inode,  sector_t *block)
 extern int notify_change(struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int);
-static inline int file_permission(struct file *file, int mask)
-{
-	return inode_permission(file_inode(file), mask);
-}
-static inline int path_permission(const struct path *path, int mask)
-{
-	return inode_permission(d_inode(path->dentry), mask);
-}
 extern int __check_sticky(struct inode *dir, struct inode *inode);
 
 static inline bool execute_ok(struct inode *inode)
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index bb8467cd11ae..79add91eaa04 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -26,27 +26,21 @@
  * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only
  * the child is interested and not the parent.
  */
-static inline int fsnotify_name(__u32 mask, const void *data, int data_type,
-				struct inode *dir, const struct qstr *name,
-				u32 cookie)
+static inline void fsnotify_name(struct inode *dir, __u32 mask,
+				 struct inode *child,
+				 const struct qstr *name, u32 cookie)
 {
-	if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0)
-		return 0;
-
-	return fsnotify(mask, data, data_type, dir, name, NULL, cookie);
+	fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie);
 }
 
 static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
 				   __u32 mask)
 {
-	fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0);
+	fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0);
 }
 
 static inline void fsnotify_inode(struct inode *inode, __u32 mask)
 {
-	if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
-		return;
-
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_ISDIR;
 
@@ -59,9 +53,6 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask,
 {
 	struct inode *inode = d_inode(dentry);
 
-	if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
-		return 0;
-
 	if (S_ISDIR(inode->i_mode)) {
 		mask |= FS_ISDIR;
 
@@ -86,7 +77,7 @@ notify_child:
  */
 static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
 {
-	fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY);
+	fsnotify_parent(dentry, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE);
 }
 
 static inline int fsnotify_file(struct file *file, __u32 mask)
@@ -144,23 +135,18 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	u32 fs_cookie = fsnotify_get_cookie();
 	__u32 old_dir_mask = FS_MOVED_FROM;
 	__u32 new_dir_mask = FS_MOVED_TO;
-	__u32 rename_mask = FS_RENAME;
 	const struct qstr *new_name = &moved->d_name;
 
+	if (old_dir == new_dir)
+		old_dir_mask |= FS_DN_RENAME;
+
 	if (isdir) {
 		old_dir_mask |= FS_ISDIR;
 		new_dir_mask |= FS_ISDIR;
-		rename_mask |= FS_ISDIR;
 	}
 
-	/* Event with information about both old and new parent+name */
-	fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY,
-		      old_dir, old_name, 0);
-
-	fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE,
-		      old_dir, old_name, fs_cookie);
-	fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE,
-		      new_dir, new_name, fs_cookie);
+	fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie);
+	fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie);
 
 	if (target)
 		fsnotify_link_count(target);
@@ -195,22 +181,16 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 
 /*
  * fsnotify_create - 'name' was linked in
- *
- * Caller must make sure that dentry->d_name is stable.
- * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate
- * ->d_inode later
  */
-static inline void fsnotify_create(struct inode *dir, struct dentry *dentry)
+static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
-	audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE);
+	audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE);
 
-	fsnotify_dirent(dir, dentry, FS_CREATE);
+	fsnotify_dirent(inode, dentry, FS_CREATE);
 }
 
 /*
  * fsnotify_link - new hardlink in 'inode' directory
- *
- * Caller must make sure that new_dentry->d_name is stable.
  * Note: We have to pass also the linked inode ptr as some filesystems leave
  *   new_dentry->d_inode NULL and instantiate inode pointer later
  */
@@ -220,8 +200,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode,
 	fsnotify_link_count(inode);
 	audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE);
 
-	fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE,
-		      dir, &new_dentry->d_name, 0);
+	fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0);
 }
 
 /*
@@ -240,8 +219,7 @@ static inline void fsnotify_delete(struct inode *dir, struct inode *inode,
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_ISDIR;
 
-	fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name,
-		      0);
+	fsnotify_name(dir, mask, inode, &dentry->d_name, 0);
 }
 
 /**
@@ -276,16 +254,12 @@ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry)
 
 /*
  * fsnotify_mkdir - directory 'name' was created
- *
- * Caller must make sure that dentry->d_name is stable.
- * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate
- * ->d_inode later
  */
-static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry)
+static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 {
-	audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE);
+	audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE);
 
-	fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR);
+	fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR);
 }
 
 /*
@@ -379,17 +353,4 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		fsnotify_dentry(dentry, mask);
 }
 
-static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
-				    int error)
-{
-	struct fs_error_report report = {
-		.error = error,
-		.inode = inode,
-		.sb = sb,
-	};
-
-	return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR,
-			NULL, NULL, NULL, 0);
-}
-
 #endif	/* _LINUX_FS_NOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d7d96c806bff..a2e42d3cd87c 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -19,8 +19,6 @@
 #include <linux/atomic.h>
 #include <linux/user_namespace.h>
 #include <linux/refcount.h>
-#include <linux/mempool.h>
-#include <linux/sched/mm.h>
 
 /*
  * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@ -44,18 +42,13 @@
 
 #define FS_UNMOUNT		0x00002000	/* inode on umount fs */
 #define FS_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
-#define FS_ERROR		0x00008000	/* Filesystem Error (fanotify) */
-
-/*
- * FS_IN_IGNORED overloads FS_ERROR.  It is only used internally by inotify
- * which does not support FS_ERROR.
- */
 #define FS_IN_IGNORED		0x00008000	/* last inotify event here */
 
 #define FS_OPEN_PERM		0x00010000	/* open event in an permission hook */
 #define FS_ACCESS_PERM		0x00020000	/* access event in a permissions hook */
 #define FS_OPEN_EXEC_PERM	0x00040000	/* open/exec event in a permission hook */
 
+#define FS_EXCL_UNLINK		0x04000000	/* do not send events if object is unlinked */
 /*
  * Set on inode mark that cares about things that happen to its children.
  * Always set for dnotify and inotify.
@@ -63,9 +56,10 @@
  */
 #define FS_EVENT_ON_CHILD	0x08000000
 
-#define FS_RENAME		0x10000000	/* File was renamed */
+#define FS_DN_RENAME		0x10000000	/* file renamed */
 #define FS_DN_MULTISHOT		0x20000000	/* dnotify multishot */
 #define FS_ISDIR		0x40000000	/* event occurred against dir */
+#define FS_IN_ONESHOT		0x80000000	/* only send event once */
 
 #define FS_MOVE			(FS_MOVED_FROM | FS_MOVED_TO)
 
@@ -75,7 +69,7 @@
  * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event
  * when a directory entry inside a child subdir changes.
  */
-#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
+#define ALL_FSNOTIFY_DIRENT_EVENTS	(FS_CREATE | FS_DELETE | FS_MOVE)
 
 #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \
 				  FS_OPEN_EXEC_PERM)
@@ -100,12 +94,12 @@
 /* Events that can be reported to backends */
 #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
 			     FS_EVENTS_POSS_ON_CHILD | \
-			     FS_DELETE_SELF | FS_MOVE_SELF | \
-			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
-			     FS_ERROR)
+			     FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \
+			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED)
 
 /* Extra flags that may be reported with event or control handling of events */
-#define ALL_FSNOTIFY_FLAGS  (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)
+#define ALL_FSNOTIFY_FLAGS  (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \
+			     FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
 
 #define ALL_FSNOTIFY_BITS   (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS)
 
@@ -142,7 +136,6 @@ struct mem_cgroup;
  * @dir:	optional directory associated with event -
  *		if @file_name is not NULL, this is the directory that
  *		@file_name is relative to.
- *		Either @inode or @dir must be non-NULL.
  * @file_name:	optional file name associated with event
  * @cookie:	inotify rename cookie
  *
@@ -162,7 +155,7 @@ struct fsnotify_ops {
 			    const struct qstr *file_name, u32 cookie);
 	void (*free_group_priv)(struct fsnotify_group *group);
 	void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
-	void (*free_event)(struct fsnotify_group *group, struct fsnotify_event *event);
+	void (*free_event)(struct fsnotify_event *event);
 	/* called on final put+free to free memory */
 	void (*free_mark)(struct fsnotify_mark *mark);
 };
@@ -174,6 +167,7 @@ struct fsnotify_ops {
  */
 struct fsnotify_event {
 	struct list_head list;
+	unsigned long objectid;	/* identifier for queue merges */
 };
 
 /*
@@ -211,14 +205,11 @@ struct fsnotify_group {
 	unsigned int priority;
 	bool shutdown;		/* group is being shut down, don't queue more events */
 
-#define FSNOTIFY_GROUP_USER	0x01 /* user allocated group */
-#define FSNOTIFY_GROUP_DUPS	0x02 /* allow multiple marks per object */
-#define FSNOTIFY_GROUP_NOFS	0x04 /* group lock is not direct reclaim safe */
-	int flags;
-	unsigned int owner_flags;	/* stored flags of mark_mutex owner */
-
 	/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
 	struct mutex mark_mutex;	/* protect marks_list */
+	atomic_t num_marks;		/* 1 for each mark and 1 for not being
+					 * past the point of no return when freeing
+					 * a group */
 	atomic_t user_waits;		/* Number of tasks waiting for user
 					 * response */
 	struct list_head marks_list;	/* all inode marks for this group */
@@ -243,58 +234,23 @@ struct fsnotify_group {
 #endif
 #ifdef CONFIG_FANOTIFY
 		struct fanotify_group_private_data {
-			/* Hash table of events for merge */
-			struct hlist_head *merge_hash;
 			/* allows a group to block waiting for a userspace response */
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
 			int flags;           /* flags from fanotify_init() */
 			int f_flags; /* event_f_flags from fanotify_init() */
-			struct ucounts *ucounts;
-			mempool_t error_events_pool;
+			unsigned int max_marks;
+			struct user_struct *user;
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
 };
 
-/*
- * These helpers are used to prevent deadlock when reclaiming inodes with
- * evictable marks of the same group that is allocating a new mark.
- */
-static inline void fsnotify_group_lock(struct fsnotify_group *group)
-{
-	mutex_lock(&group->mark_mutex);
-	if (group->flags & FSNOTIFY_GROUP_NOFS)
-		group->owner_flags = memalloc_nofs_save();
-}
-
-static inline void fsnotify_group_unlock(struct fsnotify_group *group)
-{
-	if (group->flags & FSNOTIFY_GROUP_NOFS)
-		memalloc_nofs_restore(group->owner_flags);
-	mutex_unlock(&group->mark_mutex);
-}
-
-static inline void fsnotify_group_assert_locked(struct fsnotify_group *group)
-{
-	WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
-	if (group->flags & FSNOTIFY_GROUP_NOFS)
-		WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
-}
-
 /* When calling fsnotify tell it if the data is a path or inode */
 enum fsnotify_data_type {
 	FSNOTIFY_EVENT_NONE,
 	FSNOTIFY_EVENT_PATH,
 	FSNOTIFY_EVENT_INODE,
-	FSNOTIFY_EVENT_DENTRY,
-	FSNOTIFY_EVENT_ERROR,
-};
-
-struct fs_error_report {
-	int error;
-	struct inode *inode;
-	struct super_block *sb;
 };
 
 static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
@@ -302,25 +258,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
 	switch (data_type) {
 	case FSNOTIFY_EVENT_INODE:
 		return (struct inode *)data;
-	case FSNOTIFY_EVENT_DENTRY:
-		return d_inode(data);
 	case FSNOTIFY_EVENT_PATH:
 		return d_inode(((const struct path *)data)->dentry);
-	case FSNOTIFY_EVENT_ERROR:
-		return ((struct fs_error_report *)data)->inode;
-	default:
-		return NULL;
-	}
-}
-
-static inline struct dentry *fsnotify_data_dentry(const void *data, int data_type)
-{
-	switch (data_type) {
-	case FSNOTIFY_EVENT_DENTRY:
-		/* Non const is needed for dget() */
-		return (struct dentry *)data;
-	case FSNOTIFY_EVENT_PATH:
-		return ((const struct path *)data)->dentry;
 	default:
 		return NULL;
 	}
@@ -337,110 +276,58 @@ static inline const struct path *fsnotify_data_path(const void *data,
 	}
 }
 
-static inline struct super_block *fsnotify_data_sb(const void *data,
-						   int data_type)
-{
-	switch (data_type) {
-	case FSNOTIFY_EVENT_INODE:
-		return ((struct inode *)data)->i_sb;
-	case FSNOTIFY_EVENT_DENTRY:
-		return ((struct dentry *)data)->d_sb;
-	case FSNOTIFY_EVENT_PATH:
-		return ((const struct path *)data)->dentry->d_sb;
-	case FSNOTIFY_EVENT_ERROR:
-		return ((struct fs_error_report *) data)->sb;
-	default:
-		return NULL;
-	}
-}
-
-static inline struct fs_error_report *fsnotify_data_error_report(
-							const void *data,
-							int data_type)
-{
-	switch (data_type) {
-	case FSNOTIFY_EVENT_ERROR:
-		return (struct fs_error_report *) data;
-	default:
-		return NULL;
-	}
-}
-
-/*
- * Index to merged marks iterator array that correlates to a type of watch.
- * The type of watched object can be deduced from the iterator type, but not
- * the other way around, because an event can match different watched objects
- * of the same object type.
- * For example, both parent and child are watching an object of type inode.
- */
-enum fsnotify_iter_type {
-	FSNOTIFY_ITER_TYPE_INODE,
-	FSNOTIFY_ITER_TYPE_VFSMOUNT,
-	FSNOTIFY_ITER_TYPE_SB,
-	FSNOTIFY_ITER_TYPE_PARENT,
-	FSNOTIFY_ITER_TYPE_INODE2,
-	FSNOTIFY_ITER_TYPE_COUNT
-};
-
-/* The type of object that a mark is attached to */
 enum fsnotify_obj_type {
-	FSNOTIFY_OBJ_TYPE_ANY = -1,
 	FSNOTIFY_OBJ_TYPE_INODE,
+	FSNOTIFY_OBJ_TYPE_PARENT,
 	FSNOTIFY_OBJ_TYPE_VFSMOUNT,
 	FSNOTIFY_OBJ_TYPE_SB,
 	FSNOTIFY_OBJ_TYPE_COUNT,
 	FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
 };
 
-static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
+#define FSNOTIFY_OBJ_TYPE_INODE_FL	(1U << FSNOTIFY_OBJ_TYPE_INODE)
+#define FSNOTIFY_OBJ_TYPE_PARENT_FL	(1U << FSNOTIFY_OBJ_TYPE_PARENT)
+#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL	(1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+#define FSNOTIFY_OBJ_TYPE_SB_FL		(1U << FSNOTIFY_OBJ_TYPE_SB)
+#define FSNOTIFY_OBJ_ALL_TYPES_MASK	((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
+
+static inline bool fsnotify_valid_obj_type(unsigned int type)
 {
-	return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT);
+	return (type < FSNOTIFY_OBJ_TYPE_COUNT);
 }
 
 struct fsnotify_iter_info {
-	struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT];
-	struct fsnotify_group *current_group;
+	struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT];
 	unsigned int report_mask;
 	int srcu_idx;
 };
 
 static inline bool fsnotify_iter_should_report_type(
-		struct fsnotify_iter_info *iter_info, int iter_type)
+		struct fsnotify_iter_info *iter_info, int type)
 {
-	return (iter_info->report_mask & (1U << iter_type));
+	return (iter_info->report_mask & (1U << type));
 }
 
 static inline void fsnotify_iter_set_report_type(
-		struct fsnotify_iter_info *iter_info, int iter_type)
+		struct fsnotify_iter_info *iter_info, int type)
 {
-	iter_info->report_mask |= (1U << iter_type);
+	iter_info->report_mask |= (1U << type);
 }
 
-static inline struct fsnotify_mark *fsnotify_iter_mark(
-		struct fsnotify_iter_info *iter_info, int iter_type)
+static inline void fsnotify_iter_set_report_type_mark(
+		struct fsnotify_iter_info *iter_info, int type,
+		struct fsnotify_mark *mark)
 {
-	if (fsnotify_iter_should_report_type(iter_info, iter_type))
-		return iter_info->marks[iter_type];
-	return NULL;
-}
-
-static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type,
-				     struct fsnotify_mark **markp)
-{
-	while (type < FSNOTIFY_ITER_TYPE_COUNT) {
-		*markp = fsnotify_iter_mark(iter, type);
-		if (*markp)
-			break;
-		type++;
-	}
-	return type;
+	iter_info->marks[type] = mark;
+	iter_info->report_mask |= (1U << type);
 }
 
 #define FSNOTIFY_ITER_FUNCS(name, NAME) \
 static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
 		struct fsnotify_iter_info *iter_info) \
 { \
-	return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \
+	return (iter_info->report_mask & FSNOTIFY_OBJ_TYPE_##NAME##_FL) ? \
+		iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \
 }
 
 FSNOTIFY_ITER_FUNCS(inode, INODE)
@@ -448,13 +335,8 @@ FSNOTIFY_ITER_FUNCS(parent, PARENT)
 FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
 FSNOTIFY_ITER_FUNCS(sb, SB)
 
-#define fsnotify_foreach_iter_type(type) \
-	for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++)
-#define fsnotify_foreach_iter_mark_type(iter, mark, type) \
-	for (type = 0; \
-	     type = fsnotify_iter_step(iter, type, &mark), \
-	     type < FSNOTIFY_ITER_TYPE_COUNT; \
-	     type++)
+#define fsnotify_foreach_obj_type(type) \
+	for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++)
 
 /*
  * fsnotify_connp_t is what we embed in objects which connector can be attached
@@ -473,7 +355,6 @@ struct fsnotify_mark_connector {
 	spinlock_t lock;
 	unsigned short type;	/* Type of object [lock] */
 #define FSNOTIFY_CONN_FLAG_HAS_FSID	0x01
-#define FSNOTIFY_CONN_FLAG_HAS_IREF	0x02
 	unsigned short flags;	/* flags [lock] */
 	__kernel_fsid_t fsid;	/* fsid of filesystem containing object */
 	union {
@@ -518,18 +399,11 @@ struct fsnotify_mark {
 	struct hlist_node obj_list;
 	/* Head of list of marks for an object [mark ref] */
 	struct fsnotify_mark_connector *connector;
-	/* Events types and flags to ignore [mark->lock, group->mark_mutex] */
-	__u32 ignore_mask;
-	/* General fsnotify mark flags */
-#define FSNOTIFY_MARK_FLAG_ALIVE		0x0001
-#define FSNOTIFY_MARK_FLAG_ATTACHED		0x0002
-	/* inotify mark flags */
-#define FSNOTIFY_MARK_FLAG_EXCL_UNLINK		0x0010
-#define FSNOTIFY_MARK_FLAG_IN_ONESHOT		0x0020
-	/* fanotify mark flags */
-#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY	0x0100
-#define FSNOTIFY_MARK_FLAG_NO_IREF		0x0200
-#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS	0x0400
+	/* Events types to ignore [mark->lock, group->mark_mutex] */
+	__u32 ignored_mask;
+#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY	0x01
+#define FSNOTIFY_MARK_FLAG_ALIVE		0x02
+#define FSNOTIFY_MARK_FLAG_ATTACHED		0x04
 	unsigned int flags;		/* flags [mark->lock] */
 };
 
@@ -595,9 +469,7 @@ static inline void fsnotify_update_flags(struct dentry *dentry)
 /* called from fsnotify listeners, such as fanotify or dnotify */
 
 /* create a new group */
-extern struct fsnotify_group *fsnotify_alloc_group(
-				const struct fsnotify_ops *ops,
-				int flags);
+extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
 /* get reference to a group */
 extern void fsnotify_get_group(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_alloc_group */
@@ -612,39 +484,17 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
 extern void fsnotify_destroy_event(struct fsnotify_group *group,
 				   struct fsnotify_event *event);
 /* attach the event to the group notification queue */
-extern int fsnotify_insert_event(struct fsnotify_group *group,
-				 struct fsnotify_event *event,
-				 int (*merge)(struct fsnotify_group *,
-					      struct fsnotify_event *),
-				 void (*insert)(struct fsnotify_group *,
-						struct fsnotify_event *));
-
-static inline int fsnotify_add_event(struct fsnotify_group *group,
-				     struct fsnotify_event *event,
-				     int (*merge)(struct fsnotify_group *,
-						  struct fsnotify_event *))
-{
-	return fsnotify_insert_event(group, event, merge, NULL);
-}
-
+extern int fsnotify_add_event(struct fsnotify_group *group,
+			      struct fsnotify_event *event,
+			      int (*merge)(struct list_head *,
+					   struct fsnotify_event *));
 /* Queue overflow event to a notification group */
 static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
 {
 	fsnotify_add_event(group, group->overflow_event, NULL);
 }
 
-static inline bool fsnotify_is_overflow_event(u32 mask)
-{
-	return mask & FS_Q_OVERFLOW;
-}
-
-static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
-{
-	assert_spin_locked(&group->notification_lock);
-
-	return list_empty(&group->notification_list);
-}
-
+/* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
 extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
@@ -656,101 +506,6 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group,
 
 /* functions used to manipulate the marks attached to inodes */
 
-/*
- * Canonical "ignore mask" including event flags.
- *
- * Note the subtle semantic difference from the legacy ->ignored_mask.
- * ->ignored_mask traditionally only meant which events should be ignored,
- * while ->ignore_mask also includes flags regarding the type of objects on
- * which events should be ignored.
- */
-static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark)
-{
-	__u32 ignore_mask = mark->ignore_mask;
-
-	/* The event flags in ignore mask take effect */
-	if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
-		return ignore_mask;
-
-	/*
-	 * Legacy behavior:
-	 * - Always ignore events on dir
-	 * - Ignore events on child if parent is watching children
-	 */
-	ignore_mask |= FS_ISDIR;
-	ignore_mask &= ~FS_EVENT_ON_CHILD;
-	ignore_mask |= mark->mask & FS_EVENT_ON_CHILD;
-
-	return ignore_mask;
-}
-
-/* Legacy ignored_mask - only event types to ignore */
-static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark)
-{
-	return mark->ignore_mask & ALL_FSNOTIFY_EVENTS;
-}
-
-/*
- * Check if mask (or ignore mask) should be applied depending if victim is a
- * directory and whether it is reported to a watching parent.
- */
-static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir,
-					    int iter_type)
-{
-	/* Should mask be applied to a directory? */
-	if (is_dir && !(mask & FS_ISDIR))
-		return false;
-
-	/* Should mask be applied to a child? */
-	if (iter_type == FSNOTIFY_ITER_TYPE_PARENT &&
-	    !(mask & FS_EVENT_ON_CHILD))
-		return false;
-
-	return true;
-}
-
-/*
- * Effective ignore mask taking into account if event victim is a
- * directory and whether it is reported to a watching parent.
- */
-static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark,
-						   bool is_dir, int iter_type)
-{
-	__u32 ignore_mask = fsnotify_ignored_events(mark);
-
-	if (!ignore_mask)
-		return 0;
-
-	/* For non-dir and non-child, no need to consult the event flags */
-	if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT)
-		return ignore_mask;
-
-	ignore_mask = fsnotify_ignore_mask(mark);
-	if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type))
-		return 0;
-
-	return ignore_mask & ALL_FSNOTIFY_EVENTS;
-}
-
-/* Get mask for calculating object interest taking ignore mask into account */
-static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
-{
-	__u32 mask = mark->mask;
-
-	if (!fsnotify_ignored_events(mark))
-		return mask;
-
-	/* Interest in FS_MODIFY may be needed for clearing ignore mask */
-	if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
-		mask |= FS_MODIFY;
-
-	/*
-	 * If mark is interested in ignoring events on children, the object must
-	 * show interest in those events for fsnotify_parent() to notice it.
-	 */
-	return mask | mark->ignore_mask;
-}
-
 /* Get mask of events for a list of marks */
 extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn);
 /* Calculate mask of events for a list of marks */
@@ -765,27 +520,27 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
 				  __kernel_fsid_t *fsid);
 /* attach the mark to the object */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark,
-			     fsnotify_connp_t *connp, unsigned int obj_type,
-			     int add_flags, __kernel_fsid_t *fsid);
+			     fsnotify_connp_t *connp, unsigned int type,
+			     int allow_dups, __kernel_fsid_t *fsid);
 extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 				    fsnotify_connp_t *connp,
-				    unsigned int obj_type, int add_flags,
+				    unsigned int type, int allow_dups,
 				    __kernel_fsid_t *fsid);
 
 /* attach the mark to the inode */
 static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 					  struct inode *inode,
-					  int add_flags)
+					  int allow_dups)
 {
 	return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
-				 FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL);
+				 FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL);
 }
 static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
 						 struct inode *inode,
-						 int add_flags)
+						 int allow_dups)
 {
 	return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
-					FSNOTIFY_OBJ_TYPE_INODE, add_flags,
+					FSNOTIFY_OBJ_TYPE_INODE, allow_dups,
 					NULL);
 }
 
@@ -798,32 +553,33 @@ extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
 extern void fsnotify_free_mark(struct fsnotify_mark *mark);
 /* Wait until all marks queued for destruction are destroyed */
 extern void fsnotify_wait_marks_destroyed(void);
-/* Clear all of the marks of a group attached to a given object type */
-extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
-					  unsigned int obj_type);
+/* run all the marks in a group, and clear all of the marks attached to given object type */
+extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
 {
-	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT);
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL);
 }
 /* run all the marks in a group, and clear all of the inode marks */
 static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
 {
-	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE);
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL);
 }
 /* run all the marks in a group, and clear all of the sn marks */
 static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group)
 {
-	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB);
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL);
 }
 extern void fsnotify_get_mark(struct fsnotify_mark *mark);
 extern void fsnotify_put_mark(struct fsnotify_mark *mark);
 extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
 extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
 
-static inline void fsnotify_init_event(struct fsnotify_event *event)
+static inline void fsnotify_init_event(struct fsnotify_event *event,
+				       unsigned long objectid)
 {
 	INIT_LIST_HEAD(&event->list);
+	event->objectid = objectid;
 }
 
 #else
diff --git a/include/linux/iversion.h b/include/linux/iversion.h
index 3bfebde5a1a6..2917ef990d43 100644
--- a/include/linux/iversion.h
+++ b/include/linux/iversion.h
@@ -328,19 +328,6 @@ inode_query_iversion(struct inode *inode)
 	return cur >> I_VERSION_QUERIED_SHIFT;
 }
 
-/*
- * For filesystems without any sort of change attribute, the best we can
- * do is fake one up from the ctime:
- */
-static inline u64 time_to_chattr(struct timespec64 *t)
-{
-	u64 chattr = t->tv_sec;
-
-	chattr <<= 32;
-	chattr += t->tv_nsec;
-	return chattr;
-}
-
 /**
  * inode_eq_iversion_raw - check whether the raw i_version counter has changed
  * @inode: inode to check
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 465060acc981..481273f0c72d 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -71,14 +71,15 @@ static inline void *dereference_symbol_descriptor(void *ptr)
 	return ptr;
 }
 
-int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
-				      unsigned long),
-			    void *data);
-
 #ifdef CONFIG_KALLSYMS
 /* Lookup the address for a symbol. Returns 0 if not found. */
 unsigned long kallsyms_lookup_name(const char *name);
 
+/* Call a function on each kallsyms symbol in the core kernel */
+int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
+				      unsigned long),
+			    void *data);
+
 extern int kallsyms_lookup_size_offset(unsigned long addr,
 				  unsigned long *symbolsize,
 				  unsigned long *offset);
@@ -107,6 +108,14 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
 	return 0;
 }
 
+static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+						    struct module *,
+						    unsigned long),
+					  void *data)
+{
+	return 0;
+}
+
 static inline int kallsyms_lookup_size_offset(unsigned long addr,
 					      unsigned long *symbolsize,
 					      unsigned long *offset)
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 9dae77a97a03..2484ed97e72f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -68,7 +68,6 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
-void kthread_exit(long result) __noreturn;
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 3bc9f7410e21..0520c0cd73f4 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -27,8 +27,7 @@ struct rpc_task;
 struct nlmsvc_binding {
 	__be32			(*fopen)(struct svc_rqst *,
 						struct nfs_fh *,
-						struct file **,
-						int mode);
+						struct file **);
 	void			(*fclose)(struct file *);
 };
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 70ce419e2709..666f5f310a04 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -10,8 +10,6 @@
 #ifndef LINUX_LOCKD_LOCKD_H
 #define LINUX_LOCKD_LOCKD_H
 
-/* XXX: a lot of this should really be under fs/lockd. */
-
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <net/ipv6.h>
@@ -156,8 +154,7 @@ struct nlm_rqst {
 struct nlm_file {
 	struct hlist_node	f_list;		/* linked list */
 	struct nfs_fh		f_handle;	/* NFS file handle */
-	struct file *		f_file[2];	/* VFS file pointers,
-						   indexed by O_ flags */
+	struct file *		f_file;		/* VFS file pointer */
 	struct nlm_share *	f_shares;	/* DOS shares */
 	struct list_head	f_blocks;	/* blocked locks */
 	unsigned int		f_locks;	/* guesstimate # of locks */
@@ -270,7 +267,6 @@ typedef int	  (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
 /*
  * Server-side lock handling
  */
-int		  lock_to_openmode(struct file_lock *);
 __be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
 			      struct nlm_host *, struct nlm_lock *, int,
 			      struct nlm_cookie *, int);
@@ -290,9 +286,8 @@ void		  nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
  * File handling for the server personality
  */
 __be32		  nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
-					struct nlm_lock *);
+					struct nfs_fh *);
 void		  nlm_release_file(struct nlm_file *);
-void		  nlmsvc_put_lockowner(struct nlm_lockowner *);
 void		  nlmsvc_release_lockowner(struct nlm_lock *);
 void		  nlmsvc_mark_resources(struct net *);
 void		  nlmsvc_free_host_resources(struct nlm_host *);
@@ -304,15 +299,9 @@ void		  nlmsvc_invalidate_all(void);
 int           nlmsvc_unlock_all_by_sb(struct super_block *sb);
 int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
 
-static inline struct file *nlmsvc_file_file(struct nlm_file *file)
-{
-	return file->f_file[O_RDONLY] ?
-	       file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
-}
-
 static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 {
-	return locks_inode(nlmsvc_file_file(file));
+	return locks_inode(file->f_file);
 }
 
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 67e4a2c5500b..7ab9f264313f 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -41,8 +41,6 @@ struct nlm_lock {
 	struct nfs_fh		fh;
 	struct xdr_netobj	oh;
 	u32			svid;
-	u64			lock_start;
-	u64			lock_len;
 	struct file_lock	fl;
 };
 
@@ -98,19 +96,24 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-bool	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_void(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
+/*
+int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
+int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
+int	nlmclt_encode_cancargs(struct rpc_rqst *, u32 *, struct nlm_args *);
+int	nlmclt_encode_unlockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
+ */
 
 #endif /* LOCKD_XDR_H */
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 72831e35dca3..e709fe5924f2 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -22,22 +22,27 @@
 #define	nlm4_fbig		cpu_to_be32(NLM_FBIG)
 #define	nlm4_failed		cpu_to_be32(NLM_FAILED)
 
-void	nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
-bool	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-bool	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
+int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_void(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
+/*
+int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
+int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
+int	nlmclt_encode_cancargs(struct rpc_rqst *, u32 *, struct nlm_args *);
+int	nlmclt_encode_unlockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
+ */
 extern const struct rpc_version nlm_version4;
 
 #endif /* LOCKD_XDR4_H */
diff --git a/include/linux/module.h b/include/linux/module.h
index de07fbf3a125..4cd6d889d5ba 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -599,7 +599,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod)
 	return within_module_init(addr, mod) || within_module_core(addr, mod);
 }
 
-/* Search for module by name: must be in a RCU-sched critical section. */
+/* Search for module by name: must hold module_mutex. */
 struct module *find_module(const char *name);
 
 struct symsearch {
@@ -621,9 +621,13 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 /* Look for this name: can be of form module:name. */
 unsigned long module_kallsyms_lookup_name(const char *name);
 
-extern void __noreturn __module_put_and_kthread_exit(struct module *mod,
+int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+					     struct module *, unsigned long),
+				   void *data);
+
+extern void __noreturn __module_put_and_exit(struct module *mod,
 			long code);
-#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code)
+#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code)
 
 #ifdef CONFIG_MODULE_UNLOAD
 int module_refcount(struct module *mod);
@@ -804,6 +808,14 @@ static inline unsigned long module_kallsyms_lookup_name(const char *name)
 	return 0;
 }
 
+static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+							   struct module *,
+							   unsigned long),
+						 void *data)
+{
+	return 0;
+}
+
 static inline int register_module_notifier(struct notifier_block *nb)
 {
 	/* no events will happen anyway, so this can always succeed */
@@ -815,7 +827,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb)
 	return 0;
 }
 
-#define module_put_and_kthread_exit(code) kthread_exit(code)
+#define module_put_and_exit(code) do_exit(code)
 
 static inline void print_modules(void)
 {
@@ -892,8 +904,4 @@ static inline bool module_sig_ok(struct module *module)
 }
 #endif	/* CONFIG_MODULE_SIG */
 
-int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
-					     struct module *, unsigned long),
-				   void *data);
-
 #endif /* _LINUX_MODULE_H */
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index b06375e88e58..0dc7ad38a0da 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -36,6 +36,14 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc
 	memcpy(target->data, source->data, source->size);
 }
 
+
+/*
+ * This is really a general kernel constant, but since nothing like
+ * this is defined in the kernel headers, I have to do it here.
+ */
+#define NFS_OFFSET_MAX		((__s64)((~(__u64)0) >> 1))
+
+
 enum nfs3_stable_how {
 	NFS_UNSTABLE = 0,
 	NFS_DATA_SYNC = 1,
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index ea88d0f462c9..9dc7eeac924f 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -385,6 +385,13 @@ enum lock_type4 {
 	NFS4_WRITEW_LT = 4
 };
 
+enum change_attr_type4 {
+	NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+	NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
+	NFS4_CHANGE_TYPE_IS_UNDEFINED = 4
+};
 
 /* Mandatory Attributes */
 #define FATTR4_WORD0_SUPPORTED_ATTRS    (1UL << 0)
@@ -452,6 +459,7 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_CLONE_BLKSIZE	(1UL << 13)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE	(1UL << 15)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 #define FATTR4_WORD2_MODE_UMASK		(1UL << 17)
 #define FATTR4_WORD2_XATTR_SUPPORT	(1UL << 18)
@@ -717,17 +725,4 @@ enum nfs4_setxattr_options {
 	SETXATTR4_CREATE	= 1,
 	SETXATTR4_REPLACE	= 2,
 };
-
-enum {
-	RCA4_TYPE_MASK_RDATA_DLG	= 0,
-	RCA4_TYPE_MASK_WDATA_DLG	= 1,
-	RCA4_TYPE_MASK_DIR_DLG		= 2,
-	RCA4_TYPE_MASK_FILE_LAYOUT	= 3,
-	RCA4_TYPE_MASK_BLK_LAYOUT	= 4,
-	RCA4_TYPE_MASK_OBJ_LAYOUT_MIN	= 8,
-	RCA4_TYPE_MASK_OBJ_LAYOUT_MAX	= 9,
-	RCA4_TYPE_MASK_OTHER_LAYOUT_MIN	= 12,
-	RCA4_TYPE_MASK_OTHER_LAYOUT_MAX	= 15,
-};
-
 #endif
diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
index 22265b1ff080..f5ba0fbff72f 100644
--- a/include/linux/nfs_ssc.h
+++ b/include/linux/nfs_ssc.h
@@ -8,7 +8,6 @@
  */
 
 #include <linux/nfs_fs.h>
-#include <linux/sunrpc/svc.h>
 
 extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
 
@@ -55,19 +54,6 @@ static inline void nfs42_ssc_close(struct file *filep)
 }
 #endif
 
-struct nfsd4_ssc_umount_item {
-	struct list_head nsui_list;
-	bool nsui_busy;
-	/*
-	 * nsui_refcnt inited to 2, 1 on list and 1 for consumer. Entry
-	 * is removed when refcnt drops to 1 and nsui_expire expires.
-	 */
-	refcount_t nsui_refcnt;
-	unsigned long nsui_expire;
-	struct vfsmount *nsui_vfsmount;
-	char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1];
-};
-
 /*
  * NFS_FS
  */
diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h
index 8e76a79cdc6a..103d44695323 100644
--- a/include/linux/nfsacl.h
+++ b/include/linux/nfsacl.h
@@ -38,11 +38,5 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
 extern int
 nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
 	      struct posix_acl **pacl);
-extern bool
-nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt,
-		      struct posix_acl **pacl);
-extern bool
-nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode,
-		      struct posix_acl *acl, int encode_entries, int typeflag);
 
 #endif  /* __LINUX_NFSACL_H */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index af308e15f174..fa10acb8d6a4 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -78,7 +78,6 @@ struct file;
 
 extern struct pid *pidfd_pid(const struct file *file);
 struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
-int pidfd_create(struct pid *pid, unsigned int flags);
 
 static inline struct pid *get_pid(struct pid *pid)
 {
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 52eabe6797ee..6d63a5260130 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -15,6 +15,9 @@ struct user_struct {
 	refcount_t __count;	/* reference count */
 	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
+#ifdef CONFIG_FANOTIFY
+	atomic_t fanotify_listeners;
+#endif
 #ifdef CONFIG_EPOLL
 	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
 #endif
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 938c2bf29db8..43f854487539 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -10,6 +10,9 @@
 
 #define RPC_VERSION 2
 
+/* size of an XDR encoding unit in bytes, i.e. 32bit */
+#define XDR_UNIT	(4)
+
 /* spec defines authentication flavor as an unsigned 32 bit integer */
 typedef u32	rpc_authflavor_t;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 1cf7a7799cc0..386628b36bc7 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -19,7 +19,6 @@
 #include <linux/sunrpc/svcauth.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
-#include <linux/pagevec.h>
 
 /* statistics for svc_pool structures */
 struct svc_pool_stats {
@@ -52,6 +51,25 @@ struct svc_pool {
 	unsigned long		sp_flags;
 } ____cacheline_aligned_in_smp;
 
+struct svc_serv;
+
+struct svc_serv_ops {
+	/* Callback to use when last thread exits. */
+	void		(*svo_shutdown)(struct svc_serv *, struct net *);
+
+	/* function for service threads to run */
+	int		(*svo_function)(void *);
+
+	/* queue up a transport for servicing */
+	void		(*svo_enqueue_xprt)(struct svc_xprt *);
+
+	/* set up thread (or whatever) execution context */
+	int		(*svo_setup)(struct svc_serv *, struct svc_pool *, int);
+
+	/* optional module to count when adding threads (pooled svcs only) */
+	struct module	*svo_module;
+};
+
 /*
  * RPC service.
  *
@@ -66,7 +84,6 @@ struct svc_serv {
 	struct svc_program *	sv_program;	/* RPC program */
 	struct svc_stat *	sv_stats;	/* RPC statistics */
 	spinlock_t		sv_lock;
-	struct kref		sv_refcnt;
 	unsigned int		sv_nrthreads;	/* # of server threads */
 	unsigned int		sv_maxconn;	/* max connections allowed or
 						 * '0' causing max to be based
@@ -84,8 +101,7 @@ struct svc_serv {
 
 	unsigned int		sv_nrpools;	/* number of thread pools */
 	struct svc_pool *	sv_pools;	/* array of thread pools */
-	int			(*sv_threadfn)(void *data);
-
+	const struct svc_serv_ops *sv_ops;	/* server operations */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct list_head	sv_cb_list;	/* queue for callback requests
 						 * that arrive over the same
@@ -97,30 +113,15 @@ struct svc_serv {
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 };
 
-/**
- * svc_get() - increment reference count on a SUNRPC serv
- * @serv:  the svc_serv to have count incremented
- *
- * Returns: the svc_serv that was passed in.
+/*
+ * We use sv_nrthreads as a reference count.  svc_destroy() drops
+ * this refcount, so we need to bump it up around operations that
+ * change the number of threads.  Horrible, but there it is.
+ * Should be called with the "service mutex" held.
  */
-static inline struct svc_serv *svc_get(struct svc_serv *serv)
+static inline void svc_get(struct svc_serv *serv)
 {
-	kref_get(&serv->sv_refcnt);
-	return serv;
-}
-
-void svc_destroy(struct kref *);
-
-/**
- * svc_put - decrement reference count on a SUNRPC serv
- * @serv:  the svc_serv to have count decremented
- *
- * When the reference count reaches zero, svc_destroy()
- * is called to clean up and free the serv.
- */
-static inline void svc_put(struct svc_serv *serv)
-{
-	kref_put(&serv->sv_refcnt, svc_destroy);
+	serv->sv_nrthreads++;
 }
 
 /*
@@ -246,16 +247,12 @@ struct svc_rqst {
 
 	size_t			rq_xprt_hlen;	/* xprt header len */
 	struct xdr_buf		rq_arg;
-	struct xdr_stream	rq_arg_stream;
-	struct xdr_stream	rq_res_stream;
-	struct page		*rq_scratch_page;
 	struct xdr_buf		rq_res;
 	struct page		*rq_pages[RPCSVC_MAXPAGES + 1];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	struct page *		*rq_next_page; /* next reply page to use */
 	struct page *		*rq_page_end;  /* one past the last page */
 
-	struct pagevec		rq_pvec;
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 	struct bio_vec		rq_bvec[RPCSVC_MAXPAGES];
 
@@ -275,13 +272,13 @@ struct svc_rqst {
 #define	RQ_VICTIM	(5)			/* about to be shut down */
 #define	RQ_BUSY		(6)			/* request is busy */
 #define	RQ_DATA		(7)			/* request has data */
+#define RQ_AUTHERR	(8)			/* Request status is auth error */
 	unsigned long		rq_flags;	/* flags field */
 	ktime_t			rq_qtime;	/* enqueue time */
 
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
 	void *			rq_auth_data;	/* flavor-specific data */
-	__be32			rq_auth_stat;	/* authentication status */
 	int			rq_auth_slack;	/* extra space xdr code
 						 * should leave in head
 						 * for krb5i, krb5p.
@@ -455,21 +452,40 @@ struct svc_procedure {
 	/* process the request: */
 	__be32			(*pc_func)(struct svc_rqst *);
 	/* XDR decode args: */
-	bool			(*pc_decode)(struct svc_rqst *rqstp,
-					     struct xdr_stream *xdr);
+	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
 	/* XDR encode result: */
-	bool			(*pc_encode)(struct svc_rqst *rqstp,
-					     struct xdr_stream *xdr);
+	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
-	unsigned int		pc_argzero;	/* how much of argument to clear */
 	unsigned int		pc_ressize;	/* result struct size */
 	unsigned int		pc_cachetype;	/* cache info (NFS) */
 	unsigned int		pc_xdrressize;	/* maximum size of XDR reply */
-	const char *		pc_name;	/* for display */
 };
 
+/*
+ * Mode for mapping cpus to pools.
+ */
+enum {
+	SVC_POOL_AUTO = -1,	/* choose one of the others */
+	SVC_POOL_GLOBAL,	/* no mapping, just a single global pool
+				 * (legacy & UP mode) */
+	SVC_POOL_PERCPU,	/* one pool per cpu */
+	SVC_POOL_PERNODE	/* one pool per numa node */
+};
+
+struct svc_pool_map {
+	int count;			/* How many svc_servs use us */
+	int mode;			/* Note: int not enum to avoid
+					 * warnings about "enumeration value
+					 * not handled in switch" */
+	unsigned int npools;
+	unsigned int *pool_to;		/* maps pool id to cpu or node */
+	unsigned int *to_pool;		/* maps cpu or node to pool id */
+};
+
+extern struct svc_pool_map svc_pool_map;
+
 /*
  * Function prototypes.
  */
@@ -477,17 +493,22 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
 int svc_bind(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
-			    int (*threadfn)(void *data));
+			    const struct svc_serv_ops *);
 struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
 					struct svc_pool *pool, int node);
-void		   svc_rqst_replace_page(struct svc_rqst *rqstp,
-					 struct page *page);
+struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
+					struct svc_pool *pool, int node);
 void		   svc_rqst_free(struct svc_rqst *);
 void		   svc_exit_thread(struct svc_rqst *);
+unsigned int	   svc_pool_map_get(void);
+void		   svc_pool_map_put(void);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-				     int (*threadfn)(void *data));
+			const struct svc_serv_ops *);
 int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+int		   svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
 int		   svc_pool_stats_open(struct svc_serv *serv, struct file *file);
+void		   svc_destroy(struct svc_serv *);
+void		   svc_shutdown_net(struct svc_serv *, struct net *);
 int		   svc_process(struct svc_rqst *);
 int		   bc_svc_process(struct svc_serv *, struct rpc_rqst *,
 			struct svc_rqst *);
@@ -498,14 +519,16 @@ void		   svc_wake_up(struct svc_serv *);
 void		   svc_reserve(struct svc_rqst *rqstp, int space);
 struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
 char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
-int		   svc_encode_result_payload(struct svc_rqst *rqstp,
-					     unsigned int offset,
-					     unsigned int length);
+int		   svc_encode_read_payload(struct svc_rqst *rqstp,
+					   unsigned int offset,
+					   unsigned int length);
 unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
-					 struct xdr_buf *payload);
+					 struct page **pages,
+					 struct kvec *first, size_t total);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
 					     struct kvec *first, void *p,
 					     size_t total);
+__be32		   svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err);
 __be32		   svc_generic_init_request(struct svc_rqst *rqstp,
 					    const struct svc_program *progp,
 					    struct svc_process_info *procinfo);
@@ -534,42 +557,4 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
 	svc_reserve(rqstp, space + rqstp->rq_auth_slack);
 }
 
-/**
- * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding
- * @rqstp: controlling server RPC transaction context
- *
- */
-static inline void svcxdr_init_decode(struct svc_rqst *rqstp)
-{
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
-	struct kvec *argv = rqstp->rq_arg.head;
-
-	xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL);
-	xdr_set_scratch_page(xdr, rqstp->rq_scratch_page);
-}
-
-/**
- * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding
- * @rqstp: controlling server RPC transaction context
- *
- */
-static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
-{
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
-	struct xdr_buf *buf = &rqstp->rq_res;
-	struct kvec *resv = buf->head;
-
-	xdr_reset_scratch_buffer(xdr);
-
-	xdr->buf = buf;
-	xdr->iov = resv;
-	xdr->p   = resv->iov_base + resv->iov_len;
-	xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
-	buf->len = resv->iov_len;
-	xdr->page_ptr = buf->pages - 1;
-	buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages);
-	buf->buflen -= rqstp->rq_auth_slack;
-	xdr->rqst = NULL;
-}
-
 #endif /* SUNRPC_SVC_H */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 2b870a3f391b..9dc3a3b88391 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -207,8 +207,8 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
 				    struct svc_rdma_recv_ctxt *rctxt,
 				    int status);
 extern int svc_rdma_sendto(struct svc_rqst *);
-extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
-				   unsigned int length);
+extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset,
+				 unsigned int length);
 
 /* svc_rdma_transport.c */
 extern struct svc_xprt_class svc_rdma_class;
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index dbffb92511ef..aca35ab5cff2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -21,8 +21,8 @@ struct svc_xprt_ops {
 	int		(*xpo_has_wspace)(struct svc_xprt *);
 	int		(*xpo_recvfrom)(struct svc_rqst *);
 	int		(*xpo_sendto)(struct svc_rqst *);
-	int		(*xpo_result_payload)(struct svc_rqst *, unsigned int,
-					      unsigned int);
+	int		(*xpo_read_payload)(struct svc_rqst *, unsigned int,
+					    unsigned int);
 	void		(*xpo_release_rqst)(struct svc_rqst *);
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
@@ -127,16 +127,14 @@ int	svc_reg_xprt_class(struct svc_xprt_class *);
 void	svc_unreg_xprt_class(struct svc_xprt_class *);
 void	svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
 		      struct svc_serv *);
-int	svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
-			struct net *net, const int family,
-			const unsigned short port, int flags,
-			const struct cred *cred);
-void	svc_xprt_destroy_all(struct svc_serv *serv, struct net *net);
-void	svc_xprt_received(struct svc_xprt *xprt);
+int	svc_create_xprt(struct svc_serv *, const char *, struct net *,
+			const int, const unsigned short, int,
+			const struct cred *);
+void	svc_xprt_do_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_enqueue(struct svc_xprt *xprt);
 void	svc_xprt_put(struct svc_xprt *xprt);
 void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
-void	svc_xprt_close(struct svc_xprt *xprt);
+void	svc_close_xprt(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 int	svc_print_xprts(char *buf, int maxlen);
 struct	svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 6d9cc9080aca..b0003866a249 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -127,7 +127,7 @@ struct auth_ops {
 	char *	name;
 	struct module *owner;
 	int	flavour;
-	int	(*accept)(struct svc_rqst *rq);
+	int	(*accept)(struct svc_rqst *rq, __be32 *authp);
 	int	(*release)(struct svc_rqst *rq);
 	void	(*domain_release)(struct auth_domain *);
 	int	(*set_client)(struct svc_rqst *rq);
@@ -149,7 +149,7 @@ struct auth_ops {
 
 struct svc_xprt;
 
-extern int	svc_authenticate(struct svc_rqst *rqstp);
+extern int	svc_authenticate(struct svc_rqst *rqstp, __be32 *authp);
 extern int	svc_authorise(struct svc_rqst *rqstp);
 extern int	svc_set_client(struct svc_rqst *rqstp);
 extern int	svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a366d3eb0531..b7ac7fe68306 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -57,9 +57,10 @@ int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
 void		svc_sock_update_bufs(struct svc_serv *serv);
-int		svc_addsock(struct svc_serv *serv, struct net *net,
-			    const int fd, char *name_return, const size_t len,
-			    const struct cred *cred);
+bool		svc_alien_sock(struct net *net, int fd);
+int		svc_addsock(struct svc_serv *serv, const int fd,
+					char *name_return, const size_t len,
+					const struct cred *cred);
 void		svc_init_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index c1c50eaae472..6d9d1520612b 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -19,13 +19,6 @@
 struct bio_vec;
 struct rpc_rqst;
 
-/*
- * Size of an XDR encoding unit in bytes, i.e. 32 bits,
- * as defined in Section 3 of RFC 4506. All encoded
- * XDR data items are aligned on a boundary of 32 bits.
- */
-#define XDR_UNIT		sizeof(__be32)
-
 /*
  * Buffer adjustment
  */
@@ -239,12 +232,10 @@ typedef int	(*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
 			    __be32 *p, struct rpc_rqst *rqst);
-extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
-			   struct page **pages, struct rpc_rqst *rqst);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,
 		size_t nbytes);
-extern void __xdr_commit_encode(struct xdr_stream *xdr);
+extern void xdr_commit_encode(struct xdr_stream *xdr);
 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
 extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
@@ -255,71 +246,13 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
 			    __be32 *p, struct rpc_rqst *rqst);
 extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
 		struct page **pages, unsigned int len);
+extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
 extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
 extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t);
 extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t);
-extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
-				  unsigned int len);
-
-/**
- * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
- * @xdr: pointer to xdr_stream struct
- * @buf: pointer to an empty buffer
- * @buflen: size of 'buf'
- *
- * The scratch buffer is used when decoding from an array of pages.
- * If an xdr_inline_decode() call spans across page boundaries, then
- * we copy the data into the scratch buffer in order to allow linear
- * access.
- */
-static inline void
-xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
-{
-	xdr->scratch.iov_base = buf;
-	xdr->scratch.iov_len = buflen;
-}
-
-/**
- * xdr_set_scratch_page - Attach a scratch buffer for decoding data
- * @xdr: pointer to xdr_stream struct
- * @page: an anonymous page
- *
- * See xdr_set_scratch_buffer().
- */
-static inline void
-xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page)
-{
-	xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE);
-}
-
-/**
- * xdr_reset_scratch_buffer - Clear scratch buffer information
- * @xdr: pointer to xdr_stream struct
- *
- * See xdr_set_scratch_buffer().
- */
-static inline void
-xdr_reset_scratch_buffer(struct xdr_stream *xdr)
-{
-	xdr_set_scratch_buffer(xdr, NULL, 0);
-}
-
-/**
- * xdr_commit_encode - Ensure all data is written to xdr->buf
- * @xdr: pointer to xdr_stream
- *
- * Handle encoding across page boundaries by giving the caller a
- * temporary location to write to, then later copying the data into
- * place. __xdr_commit_encode() does that copying.
- */
-static inline void xdr_commit_encode(struct xdr_stream *xdr)
-{
-	if (unlikely(xdr->scratch.iov_len))
-		__xdr_commit_encode(xdr);
-}
 
 /**
  * xdr_stream_remaining - Return the number of bytes remaining in the stream
@@ -352,7 +285,7 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
 static inline size_t
 xdr_align_size(size_t n)
 {
-	const size_t mask = XDR_UNIT - 1;
+	const size_t mask = sizeof(__u32) - 1;
 
 	return (n + mask) & ~mask;
 }
@@ -382,7 +315,7 @@ static inline size_t xdr_pad_size(size_t n)
  */
 static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr)
 {
-	const size_t len = XDR_UNIT;
+	const size_t len = sizeof(__be32);
 	__be32 *p = xdr_reserve_space(xdr, len);
 
 	if (unlikely(!p))
@@ -401,7 +334,7 @@ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr)
  */
 static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
 {
-	const size_t len = XDR_UNIT;
+	const size_t len = sizeof(__be32);
 	__be32 *p = xdr_reserve_space(xdr, len);
 
 	if (unlikely(!p))
@@ -410,40 +343,6 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
 	return len;
 }
 
-/**
- * xdr_encode_bool - Encode a boolean item
- * @p: address in a buffer into which to encode
- * @n: boolean value to encode
- *
- * Return value:
- *   Address of item following the encoded boolean
- */
-static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
-{
-	*p++ = n ? xdr_one : xdr_zero;
-	return p;
-}
-
-/**
- * xdr_stream_encode_bool - Encode a boolean item
- * @xdr: pointer to xdr_stream
- * @n: boolean value to encode
- *
- * Return values:
- *   On success, returns length in bytes of XDR buffer consumed
- *   %-EMSGSIZE on XDR buffer overflow
- */
-static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n)
-{
-	const size_t len = XDR_UNIT;
-	__be32 *p = xdr_reserve_space(xdr, len);
-
-	if (unlikely(!p))
-		return -EMSGSIZE;
-	xdr_encode_bool(p, n);
-	return len;
-}
-
 /**
  * xdr_stream_encode_u32 - Encode a 32-bit integer
  * @xdr: pointer to xdr_stream
@@ -605,27 +504,6 @@ static inline bool xdr_item_is_present(const __be32 *p)
 	return *p != xdr_zero;
 }
 
-/**
- * xdr_stream_decode_bool - Decode a boolean
- * @xdr: pointer to xdr_stream
- * @ptr: pointer to a u32 in which to store the result
- *
- * Return values:
- *   %0 on success
- *   %-EBADMSG on XDR buffer overflow
- */
-static inline ssize_t
-xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr)
-{
-	const size_t count = sizeof(*ptr);
-	__be32 *p = xdr_inline_decode(xdr, count);
-
-	if (unlikely(!p))
-		return -EBADMSG;
-	*ptr = (*p != xdr_zero);
-	return 0;
-}
-
 /**
  * xdr_stream_decode_u32 - Decode a 32-bit integer
  * @xdr: pointer to xdr_stream
@@ -647,27 +525,6 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
 	return 0;
 }
 
-/**
- * xdr_stream_decode_u64 - Decode a 64-bit integer
- * @xdr: pointer to xdr_stream
- * @ptr: location to store 64-bit integer
- *
- * Return values:
- *   %0 on success
- *   %-EBADMSG on XDR buffer overflow
- */
-static inline ssize_t
-xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
-{
-	const size_t count = sizeof(*ptr);
-	__be32 *p = xdr_inline_decode(xdr, count);
-
-	if (unlikely(!p))
-		return -EBADMSG;
-	xdr_decode_hyper(p, ptr);
-	return 0;
-}
-
 /**
  * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data
  * @xdr: pointer to xdr_stream
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index ec4cea5dd222..61d0315a42ab 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1321,6 +1321,18 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length)
 	return do_sys_ftruncate(fd, length, 1);
 }
 
+extern int __close_fd(struct files_struct *files, unsigned int fd);
+
+/*
+ * In contrast to sys_close(), this stub does not check whether the syscall
+ * should or should not be restarted, but returns the raw error codes from
+ * __close_fd().
+ */
+static inline int ksys_close(unsigned int fd)
+{
+	return __close_fd(current->files, fd);
+}
+
 extern long do_sys_truncate(const char __user *pathname, loff_t length);
 
 static inline long ksys_truncate(const char __user *pathname, loff_t length)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index da861f2e34ce..6ee587d0aeaa 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -56,8 +56,6 @@ typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 
 int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_dobool(struct ctl_table *table, int write, void *buffer,
-		size_t *lenp, loff_t *ppos);
 int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *);
 int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *);
 int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 29f55fadc362..71cc05ddaa21 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -50,10 +50,6 @@ enum ucount_type {
 #ifdef CONFIG_INOTIFY_USER
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
-#endif
-#ifdef CONFIG_FANOTIFY
-	UCOUNT_FANOTIFY_GROUPS,
-	UCOUNT_FANOTIFY_MARKS,
 #endif
 	UCOUNT_COUNTS,
 };
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 56e4a57d2538..8220369ee610 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -394,7 +394,6 @@ DEFINE_RPC_RUNNING_EVENT(complete);
 DEFINE_RPC_RUNNING_EVENT(timeout);
 DEFINE_RPC_RUNNING_EVENT(signalled);
 DEFINE_RPC_RUNNING_EVENT(end);
-DEFINE_RPC_RUNNING_EVENT(call_done);
 
 DECLARE_EVENT_CLASS(rpc_task_queued,
 
@@ -1481,7 +1480,8 @@ DEFINE_SVCXDRBUF_EVENT(sendto);
 	svc_rqst_flag(SPLICE_OK)					\
 	svc_rqst_flag(VICTIM)						\
 	svc_rqst_flag(BUSY)						\
-	svc_rqst_flag_end(DATA)
+	svc_rqst_flag(DATA)						\
+	svc_rqst_flag_end(AUTHERR)
 
 #undef svc_rqst_flag
 #undef svc_rqst_flag_end
@@ -1547,9 +1547,9 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE);
 		{ SVC_COMPLETE,	"SVC_COMPLETE" })
 
 TRACE_EVENT(svc_authenticate,
-	TP_PROTO(const struct svc_rqst *rqst, int auth_res),
+	TP_PROTO(const struct svc_rqst *rqst, int auth_res, __be32 auth_stat),
 
-	TP_ARGS(rqst, auth_res),
+	TP_ARGS(rqst, auth_res, auth_stat),
 
 	TP_STRUCT__entry(
 		__field(u32, xid)
@@ -1560,7 +1560,7 @@ TRACE_EVENT(svc_authenticate,
 	TP_fast_assign(
 		__entry->xid = be32_to_cpu(rqst->rq_xid);
 		__entry->svc_status = auth_res;
-		__entry->auth_stat = be32_to_cpu(rqst->rq_auth_stat);
+		__entry->auth_stat = be32_to_cpu(auth_stat);
 	),
 
 	TP_printk("xid=0x%08x auth_res=%s auth_stat=%s",
@@ -1578,7 +1578,6 @@ TRACE_EVENT(svc_process,
 		__field(u32, vers)
 		__field(u32, proc)
 		__string(service, name)
-		__string(procedure, rqst->rq_procinfo->pc_name)
 		__string(addr, rqst->rq_xprt ?
 			 rqst->rq_xprt->xpt_remotebuf : "(null)")
 	),
@@ -1588,16 +1587,13 @@ TRACE_EVENT(svc_process,
 		__entry->vers = rqst->rq_vers;
 		__entry->proc = rqst->rq_proc;
 		__assign_str(service, name);
-		__assign_str(procedure, rqst->rq_procinfo->pc_name);
 		__assign_str(addr, rqst->rq_xprt ?
 			     rqst->rq_xprt->xpt_remotebuf : "(null)");
 	),
 
-	TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%s",
+	TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%u",
 			__get_str(addr), __entry->xid,
-			__get_str(service), __entry->vers,
-			__get_str(procedure)
-	)
+			__get_str(service), __entry->vers, __entry->proc)
 );
 
 DECLARE_EVENT_CLASS(svc_rqst_event,
@@ -1756,7 +1752,6 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
 			), \
 			TP_ARGS(xprt))
 
-DEFINE_SVC_XPRT_EVENT(received);
 DEFINE_SVC_XPRT_EVENT(no_write_space);
 DEFINE_SVC_XPRT_EVENT(close);
 DEFINE_SVC_XPRT_EVENT(detach);
@@ -1854,7 +1849,6 @@ TRACE_EVENT(svc_stats_latency,
 	TP_STRUCT__entry(
 		__field(u32, xid)
 		__field(unsigned long, execute)
-		__string(procedure, rqst->rq_procinfo->pc_name)
 		__string(addr, rqst->rq_xprt->xpt_remotebuf)
 	),
 
@@ -1862,13 +1856,11 @@ TRACE_EVENT(svc_stats_latency,
 		__entry->xid = be32_to_cpu(rqst->rq_xid);
 		__entry->execute = ktime_to_us(ktime_sub(ktime_get(),
 							 rqst->rq_stime));
-		__assign_str(procedure, rqst->rq_procinfo->pc_name);
 		__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
 	),
 
-	TP_printk("addr=%s xid=0x%08x proc=%s execute-us=%lu",
-		__get_str(addr), __entry->xid, __get_str(procedure),
-		__entry->execute)
+	TP_printk("addr=%s xid=0x%08x execute-us=%lu",
+		__get_str(addr), __entry->xid, __entry->execute)
 );
 
 DECLARE_EVENT_CLASS(svc_deferred_event,
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index d8536d77fea1..fbf9c5c7dd59 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -20,7 +20,6 @@
 #define FAN_OPEN_EXEC		0x00001000	/* File was opened for exec */
 
 #define FAN_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
-#define FAN_FS_ERROR		0x00008000	/* Filesystem error */
 
 #define FAN_OPEN_PERM		0x00010000	/* File open in perm check */
 #define FAN_ACCESS_PERM		0x00020000	/* File accessed in perm check */
@@ -28,8 +27,6 @@
 
 #define FAN_EVENT_ON_CHILD	0x08000000	/* Interested in child events */
 
-#define FAN_RENAME		0x10000000	/* File was renamed */
-
 #define FAN_ONDIR		0x40000000	/* Event occurred against dir */
 
 /* helper events */
@@ -54,18 +51,13 @@
 #define FAN_ENABLE_AUDIT	0x00000040
 
 /* Flags to determine fanotify event format */
-#define FAN_REPORT_PIDFD	0x00000080	/* Report pidfd for event->pid */
 #define FAN_REPORT_TID		0x00000100	/* event->pid is thread id */
 #define FAN_REPORT_FID		0x00000200	/* Report unique file id */
 #define FAN_REPORT_DIR_FID	0x00000400	/* Report unique directory id */
 #define FAN_REPORT_NAME		0x00000800	/* Report events with name */
-#define FAN_REPORT_TARGET_FID	0x00001000	/* Report dirent target id  */
 
 /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
 #define FAN_REPORT_DFID_NAME	(FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
-/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */
-#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \
-				     FAN_REPORT_FID | FAN_REPORT_TARGET_FID)
 
 /* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
@@ -82,21 +74,12 @@
 #define FAN_MARK_IGNORED_SURV_MODIFY	0x00000040
 #define FAN_MARK_FLUSH		0x00000080
 /* FAN_MARK_FILESYSTEM is	0x00000100 */
-#define FAN_MARK_EVICTABLE	0x00000200
-/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
-#define FAN_MARK_IGNORE		0x00000400
 
 /* These are NOT bitwise flags.  Both bits can be used togther.  */
 #define FAN_MARK_INODE		0x00000000
 #define FAN_MARK_MOUNT		0x00000010
 #define FAN_MARK_FILESYSTEM	0x00000100
 
-/*
- * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
- * for non-inode mark types.
- */
-#define FAN_MARK_IGNORE_SURV	(FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
-
 /* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_MARK_FLAGS	(FAN_MARK_ADD |\
 				 FAN_MARK_REMOVE |\
@@ -140,14 +123,6 @@ struct fanotify_event_metadata {
 #define FAN_EVENT_INFO_TYPE_FID		1
 #define FAN_EVENT_INFO_TYPE_DFID_NAME	2
 #define FAN_EVENT_INFO_TYPE_DFID	3
-#define FAN_EVENT_INFO_TYPE_PIDFD	4
-#define FAN_EVENT_INFO_TYPE_ERROR	5
-
-/* Special info types for FAN_RENAME */
-#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME	10
-/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID	11 */
-#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME	12
-/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID	13 */
 
 /* Variable length info record following event metadata */
 struct fanotify_event_info_header {
@@ -173,21 +148,6 @@ struct fanotify_event_info_fid {
 	unsigned char handle[0];
 };
 
-/*
- * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD.
- * It holds a pidfd for the pid that was responsible for generating an event.
- */
-struct fanotify_event_info_pidfd {
-	struct fanotify_event_info_header hdr;
-	__s32 pidfd;
-};
-
-struct fanotify_event_info_error {
-	struct fanotify_event_info_header hdr;
-	__s32 error;
-	__u32 error_count;
-};
-
 struct fanotify_response {
 	__s32 fd;
 	__u32 response;
@@ -200,8 +160,6 @@ struct fanotify_response {
 
 /* No fd set in event */
 #define FAN_NOFD	-1
-#define FAN_NOPIDFD	FAN_NOFD
-#define FAN_EPIDFD	-2
 
 /* Helper functions to deal with fanotify_event_metadata buffers */
 #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
diff --git a/include/uapi/linux/nfs3.h b/include/uapi/linux/nfs3.h
index c22ab77713bd..37e4b34e6b43 100644
--- a/include/uapi/linux/nfs3.h
+++ b/include/uapi/linux/nfs3.h
@@ -63,12 +63,6 @@ enum nfs3_ftype {
 	NF3BAD  = 8
 };
 
-enum nfs3_time_how {
-	DONT_CHANGE		= 0,
-	SET_TO_SERVER_TIME	= 1,
-	SET_TO_CLIENT_TIME	= 2,
-};
-
 struct nfs3_fh {
 	unsigned short size;
 	unsigned char  data[NFS3_FHSIZE];
diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h
new file mode 100644
index 000000000000..ff0ca88b1c8f
--- /dev/null
+++ b/include/uapi/linux/nfsd/nfsfh.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This file describes the layout of the file handles as passed
+ * over the wire.
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _UAPI_LINUX_NFSD_FH_H
+#define _UAPI_LINUX_NFSD_FH_H
+
+#include <linux/types.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+
+/*
+ * This is the old "dentry style" Linux NFSv2 file handle.
+ *
+ * The xino and xdev fields are currently used to transport the
+ * ino/dev of the exported inode.
+ */
+struct nfs_fhbase_old {
+	__u32		fb_dcookie;	/* dentry cookie - always 0xfeebbaca */
+	__u32		fb_ino;		/* our inode number */
+	__u32		fb_dirino;	/* dir inode number, 0 for directories */
+	__u32		fb_dev;		/* our device */
+	__u32		fb_xdev;
+	__u32		fb_xino;
+	__u32		fb_generation;
+};
+
+/*
+ * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
+ * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
+ *
+ * The file handle starts with a sequence of four-byte words.
+ * The first word contains a version number (1) and three descriptor bytes
+ * that tell how the remaining 3 variable length fields should be handled.
+ * These three bytes are auth_type, fsid_type and fileid_type.
+ *
+ * All four-byte values are in host-byte-order.
+ *
+ * The auth_type field is deprecated and must be set to 0.
+ *
+ * The fsid_type identifies how the filesystem (or export point) is
+ *    encoded.
+ *  Current values:
+ *     0  - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number
+ *        NOTE: we cannot use the kdev_t device id value, because kdev_t.h
+ *              says we mustn't.  We must break it up and reassemble.
+ *     1  - 4 byte user specified identifier
+ *     2  - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED
+ *     3  - 4 byte device id, encoded for user-space, 4 byte inode number
+ *     4  - 4 byte inode number and 4 byte uuid
+ *     5  - 8 byte uuid
+ *     6  - 16 byte uuid
+ *     7  - 8 byte inode number and 16 byte uuid
+ *
+ * The fileid_type identified how the file within the filesystem is encoded.
+ *   The values for this field are filesystem specific, exccept that
+ *   filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
+ *   in include/linux/exportfs.h for currently registered values.
+ */
+struct nfs_fhbase_new {
+	__u8		fb_version;	/* == 1, even => nfs_fhbase_old */
+	__u8		fb_auth_type;
+	__u8		fb_fsid_type;
+	__u8		fb_fileid_type;
+	__u32		fb_auth[1];
+/*	__u32		fb_fsid[0]; floating */
+/*	__u32		fb_fileid[0]; floating */
+};
+
+struct knfsd_fh {
+	unsigned int	fh_size;	/* significant for NFSv3.
+					 * Points to the current size while building
+					 * a new file handle
+					 */
+	union {
+		struct nfs_fhbase_old	fh_old;
+		__u32			fh_pad[NFS4_FHSIZE/4];
+		struct nfs_fhbase_new	fh_new;
+	} fh_base;
+};
+
+#define ofh_dcookie		fh_base.fh_old.fb_dcookie
+#define ofh_ino			fh_base.fh_old.fb_ino
+#define ofh_dirino		fh_base.fh_old.fb_dirino
+#define ofh_dev			fh_base.fh_old.fb_dev
+#define ofh_xdev		fh_base.fh_old.fb_xdev
+#define ofh_xino		fh_base.fh_old.fb_xino
+#define ofh_generation		fh_base.fh_old.fb_generation
+
+#define	fh_version		fh_base.fh_new.fb_version
+#define	fh_fsid_type		fh_base.fh_new.fb_fsid_type
+#define	fh_auth_type		fh_base.fh_new.fb_auth_type
+#define	fh_fileid_type		fh_base.fh_new.fb_fileid_type
+#define	fh_fsid			fh_base.fh_new.fb_auth
+
+/* Do not use, provided for userspace compatiblity. */
+#define	fh_auth			fh_base.fh_new.fb_auth
+
+#endif /* _UAPI_LINUX_NFSD_FH_H */
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 691f90dd09d2..b2ebacd2f309 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -100,7 +100,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
 	audit_update_mark(audit_mark, dentry->d_inode);
 	audit_mark->rule = krule;
 
-	ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0);
+	ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true);
 	if (ret < 0) {
 		audit_mark->path = NULL;
 		fsnotify_put_mark(&audit_mark->mark);
@@ -161,7 +161,8 @@ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
 
 	audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark);
 
-	if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group))
+	if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group) ||
+	    WARN_ON_ONCE(!inode))
 		return 0;
 
 	if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) {
@@ -182,8 +183,7 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = {
 
 static int __init audit_fsnotify_init(void)
 {
-	audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops,
-						    FSNOTIFY_GROUP_DUPS);
+	audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops);
 	if (IS_ERR(audit_fsnotify_group)) {
 		audit_fsnotify_group = NULL;
 		audit_panic("cannot create audit fsnotify group");
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0c35879bbf7c..39241207ec04 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -1077,7 +1077,7 @@ static int __init audit_tree_init(void)
 
 	audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
 
-	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops, 0);
+	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
 	if (IS_ERR(audit_tree_group))
 		audit_panic("cannot initialize fsnotify group for rectree watches");
 
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 5cf22fe30149..edbeffee64b8 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -472,7 +472,8 @@ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
 
 	parent = container_of(inode_mark, struct audit_parent, mark);
 
-	if (WARN_ON_ONCE(inode_mark->group != audit_watch_group))
+	if (WARN_ON_ONCE(inode_mark->group != audit_watch_group) ||
+	    WARN_ON_ONCE(!inode))
 		return 0;
 
 	if (mask & (FS_CREATE|FS_MOVED_TO) && inode)
@@ -492,7 +493,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = {
 
 static int __init audit_watch_init(void)
 {
-	audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops, 0);
+	audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops);
 	if (IS_ERR(audit_watch_group)) {
 		audit_watch_group = NULL;
 		audit_panic("cannot create audit fsnotify group");
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 5966013bc788..6b14b4c4068c 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -507,7 +507,7 @@ static void *bpf_obj_do_get(const char __user *pathname,
 		return ERR_PTR(ret);
 
 	inode = d_backing_inode(path.dentry);
-	ret = path_permission(&path, ACC_MODE(flags));
+	ret = inode_permission(inode, ACC_MODE(flags));
 	if (ret)
 		goto out;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f3b7856eadb6..2b3b1a687d36 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3909,6 +3909,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 	pid_t pid = attr->task_fd_query.pid;
 	u32 fd = attr->task_fd_query.fd;
 	const struct perf_event *event;
+	struct files_struct *files;
 	struct task_struct *task;
 	struct file *file;
 	int err;
@@ -3928,11 +3929,23 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 	if (!task)
 		return -ENOENT;
 
-	err = 0;
-	file = fget_task(task, fd);
+	files = get_files_struct(task);
 	put_task_struct(task);
+	if (!files)
+		return -ENOENT;
+
+	err = 0;
+	spin_lock(&files->file_lock);
+	file = fcheck_files(files, fd);
 	if (!file)
-		return -EBADF;
+		err = -EBADF;
+	else
+		get_file(file);
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+
+	if (err)
+		goto out;
 
 	if (file->f_op == &bpf_link_fops) {
 		struct bpf_link *link = file->private_data;
@@ -3972,6 +3985,7 @@ out_not_supp:
 	err = -ENOTSUPP;
 put_file:
 	fput(file);
+out:
 	return err;
 }
 
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 762b4d7c3779..f3d3a562a802 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -185,7 +185,7 @@ again:
 	for (; curr_fd < max_fds; curr_fd++) {
 		struct file *f;
 
-		f = files_lookup_fd_rcu(curr_files, curr_fd);
+		f = fcheck_files(curr_files, curr_fd);
 		if (!f)
 			continue;
 		if (!get_file_rcu(f))
diff --git a/kernel/fork.c b/kernel/fork.c
index 9b2428865267..c47ad81c627c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -3138,21 +3138,21 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
  *	the exec layer of the kernel.
  */
 
-int unshare_files(void)
+int unshare_files(struct files_struct **displaced)
 {
 	struct task_struct *task = current;
-	struct files_struct *old, *copy = NULL;
+	struct files_struct *copy = NULL;
 	int error;
 
 	error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
-	if (error || !copy)
+	if (error || !copy) {
+		*displaced = NULL;
 		return error;
-
-	old = task->files;
+	}
+	*displaced = task->files;
 	task_lock(task);
 	task->files = copy;
 	task_unlock(task);
-	put_files_struct(old);
 	return 0;
 }
 
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 17d3a704bafa..96505113b907 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -200,11 +200,6 @@ unsigned long kallsyms_lookup_name(const char *name)
 	return module_kallsyms_lookup_name(name);
 }
 
-#ifdef CONFIG_LIVEPATCH
-/*
- * Iterate over all symbols in vmlinux.  For symbols from modules use
- * module_kallsyms_on_each_symbol instead.
- */
 int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
 				      unsigned long),
 			    void *data)
@@ -220,9 +215,8 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
 		if (ret != 0)
 			return ret;
 	}
-	return 0;
+	return module_kallsyms_on_each_symbol(fn, data);
 }
-#endif /* CONFIG_LIVEPATCH */
 
 static unsigned long get_symbol_pos(unsigned long addr,
 				    unsigned long *symbolsize,
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 5353edfad8e1..c0d2ad9b4705 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -61,11 +61,16 @@ static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
 static struct file *
 get_file_raw_ptr(struct task_struct *task, unsigned int idx)
 {
-	struct file *file;
+	struct file *file = NULL;
 
+	task_lock(task);
 	rcu_read_lock();
-	file = task_lookup_fd_rcu(task, idx);
+
+	if (task->files)
+		file = fcheck_files(task->files, idx);
+
 	rcu_read_unlock();
+	task_unlock(task);
 
 	return file;
 }
@@ -102,6 +107,7 @@ static int kcmp_epoll_target(struct task_struct *task1,
 {
 	struct file *filp, *filp_epoll, *filp_tgt;
 	struct kcmp_epoll_slot slot;
+	struct files_struct *files;
 
 	if (copy_from_user(&slot, uslot, sizeof(slot)))
 		return -EFAULT;
@@ -110,12 +116,23 @@ static int kcmp_epoll_target(struct task_struct *task1,
 	if (!filp)
 		return -EBADF;
 
-	filp_epoll = fget_task(task2, slot.efd);
-	if (!filp_epoll)
+	files = get_files_struct(task2);
+	if (!files)
 		return -EBADF;
 
-	filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
-	fput(filp_epoll);
+	spin_lock(&files->file_lock);
+	filp_epoll = fcheck_files(files, slot.efd);
+	if (filp_epoll)
+		get_file(filp_epoll);
+	else
+		filp_tgt = ERR_PTR(-EBADF);
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+
+	if (filp_epoll) {
+		filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+		fput(filp_epoll);
+	}
 
 	if (IS_ERR(filp_tgt))
 		return PTR_ERR(filp_tgt);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ec9f61995004..9d736f57b84f 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -262,21 +262,6 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
-/**
- * kthread_exit - Cause the current kthread return @result to kthread_stop().
- * @result: The integer value to return to kthread_stop().
- *
- * While kthread_exit can be called directly, it exists so that
- * functions which do some additional work in non-modular code such as
- * module_put_and_kthread_exit can be implemented.
- *
- * Does not return.
- */
-void __noreturn kthread_exit(long result)
-{
-	do_exit(result);
-}
-
 static int kthread(void *_create)
 {
 	/* Copy data: it's on kthread's stack */
@@ -294,13 +279,13 @@ static int kthread(void *_create)
 	done = xchg(&create->done, NULL);
 	if (!done) {
 		kfree(create);
-		kthread_exit(-EINTR);
+		do_exit(-EINTR);
 	}
 
 	if (!self) {
 		create->result = ERR_PTR(-ENOMEM);
 		complete(done);
-		kthread_exit(-ENOMEM);
+		do_exit(-ENOMEM);
 	}
 
 	self->threadfn = threadfn;
@@ -327,7 +312,7 @@ static int kthread(void *_create)
 		__kthread_parkme(self);
 		ret = threadfn(data);
 	}
-	kthread_exit(ret);
+	do_exit(ret);
 }
 
 /* called from do_fork() to get node information for about to be created task */
@@ -637,7 +622,7 @@ EXPORT_SYMBOL_GPL(kthread_park);
  * instead of calling wake_up_process(): the thread will exit without
  * calling threadfn().
  *
- * If threadfn() may call kthread_exit() itself, the caller must ensure
+ * If threadfn() may call do_exit() itself, the caller must ensure
  * task_struct can't go away.
  *
  * Returns the result of threadfn(), or %-EINTR if wake_up_process()
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 147ed154ebc7..f5faf935c2d8 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -19,7 +19,6 @@
 #include <linux/moduleloader.h>
 #include <linux/completion.h>
 #include <linux/memory.h>
-#include <linux/rcupdate.h>
 #include <asm/cacheflush.h>
 #include "core.h"
 #include "patch.h"
@@ -58,7 +57,7 @@ static void klp_find_object_module(struct klp_object *obj)
 	if (!klp_is_module(obj))
 		return;
 
-	rcu_read_lock_sched();
+	mutex_lock(&module_mutex);
 	/*
 	 * We do not want to block removal of patched modules and therefore
 	 * we do not take a reference here. The patches are removed by
@@ -75,7 +74,7 @@ static void klp_find_object_module(struct klp_object *obj)
 	if (mod && mod->klp_alive)
 		obj->mod = mod;
 
-	rcu_read_unlock_sched();
+	mutex_unlock(&module_mutex);
 }
 
 static bool klp_initialized(void)
@@ -164,10 +163,12 @@ static int klp_find_object_symbol(const char *objname, const char *name,
 		.pos = sympos,
 	};
 
+	mutex_lock(&module_mutex);
 	if (objname)
 		module_kallsyms_on_each_symbol(klp_find_callback, &args);
 	else
 		kallsyms_on_each_symbol(klp_find_callback, &args);
+	mutex_unlock(&module_mutex);
 
 	/*
 	 * Ensure an address was found. If sympos is 0, ensure symbol is unique;
diff --git a/kernel/module.c b/kernel/module.c
index 1ea89ae7c2cc..93fade94f108 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -259,6 +259,11 @@ static void mod_update_bounds(struct module *mod)
 struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
 #endif /* CONFIG_KGDB_KDB */
 
+static void module_assert_mutex(void)
+{
+	lockdep_assert_held(&module_mutex);
+}
+
 static void module_assert_mutex_or_preempt(void)
 {
 #ifdef CONFIG_LOCKDEP
@@ -338,14 +343,14 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
 
 /*
  * A thread that wants to hold a reference to a module only while it
- * is running can call this to safely exit.
+ * is running can call this to safely exit.  nfsd and lockd use this.
  */
-void __noreturn __module_put_and_kthread_exit(struct module *mod, long code)
+void __noreturn __module_put_and_exit(struct module *mod, long code)
 {
 	module_put(mod);
-	kthread_exit(code);
+	do_exit(code);
 }
-EXPORT_SYMBOL(__module_put_and_kthread_exit);
+EXPORT_SYMBOL(__module_put_and_exit);
 
 /* Find a module section: 0 means not found. */
 static unsigned int find_sec(const struct load_info *info, const char *name)
@@ -640,6 +645,7 @@ static struct module *find_module_all(const char *name, size_t len,
 
 struct module *find_module(const char *name)
 {
+	module_assert_mutex();
 	return find_module_all(name, strlen(name), false);
 }
 
@@ -4489,7 +4495,6 @@ unsigned long module_kallsyms_lookup_name(const char *name)
 	return ret;
 }
 
-#ifdef CONFIG_LIVEPATCH
 int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
 					     struct module *, unsigned long),
 				   void *data)
@@ -4498,7 +4503,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
 	unsigned int i;
 	int ret;
 
-	mutex_lock(&module_mutex);
+	module_assert_mutex();
+
 	list_for_each_entry(mod, &modules, list) {
 		/* We hold module_mutex: no need for rcu_dereference_sched */
 		struct mod_kallsyms *kallsyms = mod->kallsyms;
@@ -4514,13 +4520,11 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
 			ret = fn(data, kallsyms_symbol_name(kallsyms, i),
 				 mod, kallsyms_symbol_value(sym));
 			if (ret != 0)
-				break;
+				return ret;
 		}
 	}
-	mutex_unlock(&module_mutex);
-	return ret;
+	return 0;
 }
-#endif /* CONFIG_LIVEPATCH */
 #endif /* CONFIG_KALLSYMS */
 
 static void cfi_init(struct module *mod)
diff --git a/kernel/pid.c b/kernel/pid.c
index 15bbb9ddb2bf..48babb1dd3e1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -551,21 +551,13 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
  * Note, that this function can only be called after the fd table has
  * been unshared to avoid leaking the pidfd to the new process.
  *
- * This symbol should not be explicitly exported to loadable modules.
- *
  * Return: On success, a cloexec pidfd is returned.
  *         On error, a negative errno number will be returned.
  */
-int pidfd_create(struct pid *pid, unsigned int flags)
+static int pidfd_create(struct pid *pid, unsigned int flags)
 {
 	int fd;
 
-	if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
-		return -EINVAL;
-
-	if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
-		return -EINVAL;
-
 	fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
 			      flags | O_RDWR | O_CLOEXEC);
 	if (fd < 0)
@@ -605,7 +597,10 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
 	if (!p)
 		return -ESRCH;
 
-	fd = pidfd_create(p, flags);
+	if (pid_has_task(p, PIDTYPE_TGID))
+		fd = pidfd_create(p, flags);
+	else
+		fd = -EINVAL;
 
 	put_pid(p);
 	return fd;
diff --git a/kernel/sys.c b/kernel/sys.c
index f268f24a87ec..1de01fab5788 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1883,7 +1883,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 	if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path))
 		goto exit;
 
-	err = file_permission(exe.file, MAY_EXEC);
+	err = inode_permission(inode, MAY_EXEC);
 	if (err)
 		goto exit;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index abd37b81e9d8..4deacde2e3ee 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -145,9 +145,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 #ifdef CONFIG_INOTIFY_USER
 #include <linux/inotify.h>
 #endif
-#ifdef CONFIG_FANOTIFY
-#include <linux/fanotify.h>
-#endif
 
 #ifdef CONFIG_PROC_SYSCTL
 
@@ -549,21 +546,6 @@ static void proc_put_char(void **buf, size_t *size, char c)
 	}
 }
 
-static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
-				int *valp,
-				int write, void *data)
-{
-	if (write) {
-		*(bool *)valp = *lvalp;
-	} else {
-		int val = *(bool *)valp;
-
-		*lvalp = (unsigned long)val;
-		*negp = false;
-	}
-	return 0;
-}
-
 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 				 int *valp,
 				 int write, void *data)
@@ -826,26 +808,6 @@ static int do_proc_douintvec(struct ctl_table *table, int write,
 				   buffer, lenp, ppos, conv, data);
 }
 
-/**
- * proc_dobool - read/write a bool
- * @table: the sysctl table
- * @write: %TRUE if this is a write to the sysctl file
- * @buffer: the user buffer
- * @lenp: the size of the user buffer
- * @ppos: file position
- *
- * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
- * values from/to the user buffer, treated as an ASCII string.
- *
- * Returns 0 on success.
- */
-int proc_dobool(struct ctl_table *table, int write, void *buffer,
-		size_t *lenp, loff_t *ppos)
-{
-	return do_proc_dointvec(table, write, buffer, lenp, ppos,
-				do_proc_dobool_conv, NULL);
-}
-
 /**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
@@ -1682,12 +1644,6 @@ int proc_dostring(struct ctl_table *table, int write,
 	return -ENOSYS;
 }
 
-int proc_dobool(struct ctl_table *table, int write,
-		void *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
-
 int proc_dointvec(struct ctl_table *table, int write,
 		  void *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -3394,14 +3350,7 @@ static struct ctl_table fs_table[] = {
 		.mode		= 0555,
 		.child		= inotify_table,
 	},
-#endif
-#ifdef CONFIG_FANOTIFY
-	{
-		.procname	= "fanotify",
-		.mode		= 0555,
-		.child		= fanotify_table,
-	},
-#endif
+#endif	
 #ifdef CONFIG_EPOLL
 	{
 		.procname	= "epoll",
@@ -3564,7 +3513,6 @@ int __init sysctl_init(void)
  * No sense putting this after each symbol definition, twice,
  * exception granted :-)
  */
-EXPORT_SYMBOL(proc_dobool);
 EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5453af26ff76..718357289899 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -124,9 +124,9 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
 	if (!p)
 		return true;
 	*p = '\0';
-	rcu_read_lock_sched();
+	mutex_lock(&module_mutex);
 	ret = !!find_module(tk->symbol);
-	rcu_read_unlock_sched();
+	mutex_unlock(&module_mutex);
 	*p = ':';
 
 	return ret;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 8d8874f1c35e..11b1596e2542 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -73,10 +73,6 @@ static struct ctl_table user_table[] = {
 #ifdef CONFIG_INOTIFY_USER
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
-#endif
-#ifdef CONFIG_FANOTIFY
-	UCOUNT_ENTRY("max_fanotify_groups"),
-	UCOUNT_ENTRY("max_fanotify_marks"),
 #endif
 	{ }
 };
diff --git a/mm/madvise.c b/mm/madvise.c
index ddecb4434ccf..410f366b5df4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -568,7 +568,7 @@ static inline bool can_do_file_pageout(struct vm_area_struct *vma)
 	 * opens a side channel.
 	 */
 	return inode_owner_or_capable(file_inode(vma->vm_file)) ||
-	       file_permission(vma->vm_file, MAY_WRITE) == 0;
+		inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
 }
 
 static long madvise_pageout(struct vm_area_struct *vma,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 67b098ec9453..c4077f277ffc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4954,7 +4954,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
 
 	/* the process need read permission on control file */
 	/* AV: shouldn't we check that it's been opened for read instead? */
-	ret = file_permission(cfile.file, MAY_READ);
+	ret = inode_permission(file_inode(cfile.file), MAY_READ);
 	if (ret < 0)
 		goto out_put_cfile;
 
diff --git a/mm/mincore.c b/mm/mincore.c
index 7bdb4673f776..02db1a834021 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -167,7 +167,7 @@ static inline bool can_do_mincore(struct vm_area_struct *vma)
 	 * mappings, which opens a side channel.
 	 */
 	return inode_owner_or_capable(file_inode(vma->vm_file)) ||
-	       file_permission(vma->vm_file, MAY_WRITE) == 0;
+		inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
 }
 
 static const struct mm_walk_ops mincore_walk_ops = {
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 09b6d825124e..43c284158f63 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -535,7 +535,7 @@ static int bnep_session(void *arg)
 
 	up_write(&bnep_session_sem);
 	free_netdev(dev);
-	module_put_and_kthread_exit(0);
+	module_put_and_exit(0);
 	return 0;
 }
 
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 90d130588a3e..83eb84e8e688 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -323,7 +323,7 @@ static int cmtp_session(void *arg)
 	up_write(&cmtp_session_sem);
 
 	kfree(session);
-	module_put_and_kthread_exit(0);
+	module_put_and_exit(0);
 	return 0;
 }
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 3ff870599eb7..b946a6379433 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg)
 	l2cap_unregister_user(session->conn, &session->user);
 	hidp_session_put(session);
 
-	module_put_and_kthread_exit(0);
+	module_put_and_exit(0);
 	return 0;
 }
 
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index a857fc99431c..e265b8d38aa1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -800,7 +800,7 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 	scratch = alloc_page(GFP_KERNEL);
 	if (!scratch)
 		return -ENOMEM;
-	xdr_set_scratch_page(xdr, scratch);
+	xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE);
 
 	/* res->status */
 	err = gssx_dec_status(xdr, &res->status);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 329eac782cc5..784c8b24f164 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -707,11 +707,11 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
 /*
  * Verify the checksum on the header and return SVC_OK on success.
  * Otherwise, return SVC_DROP (in the case of a bad sequence number)
- * or return SVC_DENIED and indicate error in rqstp->rq_auth_stat.
+ * or return SVC_DENIED and indicate error in authp.
  */
 static int
 gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
-		  __be32 *rpcstart, struct rpc_gss_wire_cred *gc)
+		  __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp)
 {
 	struct gss_ctx		*ctx_id = rsci->mechctx;
 	struct xdr_buf		rpchdr;
@@ -725,7 +725,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
 	iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
 	xdr_buf_from_iov(&iov, &rpchdr);
 
-	rqstp->rq_auth_stat = rpc_autherr_badverf;
+	*authp = rpc_autherr_badverf;
 	if (argv->iov_len < 4)
 		return SVC_DENIED;
 	flavor = svc_getnl(argv);
@@ -737,13 +737,13 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
 	if (rqstp->rq_deferred) /* skip verification of revisited request */
 		return SVC_OK;
 	if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) {
-		rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
+		*authp = rpcsec_gsserr_credproblem;
 		return SVC_DENIED;
 	}
 
 	if (gc->gc_seq > MAXSEQ) {
 		trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq);
-		rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
+		*authp = rpcsec_gsserr_ctxproblem;
 		return SVC_DENIED;
 	}
 	if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq))
@@ -1038,8 +1038,6 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
 	struct rpc_gss_wire_cred *gc = &svcdata->clcred;
 	int stat;
 
-	rqstp->rq_auth_stat = rpc_autherr_badcred;
-
 	/*
 	 * A gss export can be specified either by:
 	 * 	export	*(sec=krb5,rw)
@@ -1055,8 +1053,6 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
 	stat = svcauth_unix_set_client(rqstp);
 	if (stat == SVC_DROP || stat == SVC_CLOSE)
 		return stat;
-
-	rqstp->rq_auth_stat = rpc_auth_ok;
 	return SVC_OK;
 }
 
@@ -1140,7 +1136,7 @@ static void gss_free_in_token_pages(struct gssp_in_token *in_token)
 }
 
 static int gss_read_proxy_verf(struct svc_rqst *rqstp,
-			       struct rpc_gss_wire_cred *gc,
+			       struct rpc_gss_wire_cred *gc, __be32 *authp,
 			       struct xdr_netobj *in_handle,
 			       struct gssp_in_token *in_token)
 {
@@ -1149,7 +1145,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
 	int pages, i, res, pgto, pgfrom;
 	size_t inlen, to_offs, from_offs;
 
-	res = gss_read_common_verf(gc, argv, &rqstp->rq_auth_stat, in_handle);
+	res = gss_read_common_verf(gc, argv, authp, in_handle);
 	if (res)
 		return res;
 
@@ -1230,7 +1226,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
  * Otherwise, drop the request pending an answer to the upcall.
  */
 static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
-				   struct rpc_gss_wire_cred *gc)
+			struct rpc_gss_wire_cred *gc, __be32 *authp)
 {
 	struct kvec *argv = &rqstp->rq_arg.head[0];
 	struct kvec *resv = &rqstp->rq_res.head[0];
@@ -1239,7 +1235,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
 	struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
 
 	memset(&rsikey, 0, sizeof(rsikey));
-	ret = gss_read_verf(gc, argv, &rqstp->rq_auth_stat,
+	ret = gss_read_verf(gc, argv, authp,
 			    &rsikey.in_handle, &rsikey.in_token);
 	if (ret)
 		return ret;
@@ -1342,7 +1338,7 @@ out:
 }
 
 static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
-				  struct rpc_gss_wire_cred *gc)
+			struct rpc_gss_wire_cred *gc, __be32 *authp)
 {
 	struct kvec *resv = &rqstp->rq_res.head[0];
 	struct xdr_netobj cli_handle;
@@ -1354,7 +1350,8 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
 	memset(&ud, 0, sizeof(ud));
-	ret = gss_read_proxy_verf(rqstp, gc, &ud.in_handle, &ud.in_token);
+	ret = gss_read_proxy_verf(rqstp, gc, authp,
+				  &ud.in_handle, &ud.in_token);
 	if (ret)
 		return ret;
 
@@ -1527,7 +1524,7 @@ static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
  * response here and return SVC_COMPLETE.
  */
 static int
-svcauth_gss_accept(struct svc_rqst *rqstp)
+svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -1540,7 +1537,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
 	int		ret;
 	struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
 
-	rqstp->rq_auth_stat = rpc_autherr_badcred;
+	*authp = rpc_autherr_badcred;
 	if (!svcdata)
 		svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
 	if (!svcdata)
@@ -1577,22 +1574,22 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
 	if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
 		goto auth_err;
 
-	rqstp->rq_auth_stat = rpc_autherr_badverf;
+	*authp = rpc_autherr_badverf;
 	switch (gc->gc_proc) {
 	case RPC_GSS_PROC_INIT:
 	case RPC_GSS_PROC_CONTINUE_INIT:
 		if (use_gss_proxy(SVC_NET(rqstp)))
-			return svcauth_gss_proxy_init(rqstp, gc);
+			return svcauth_gss_proxy_init(rqstp, gc, authp);
 		else
-			return svcauth_gss_legacy_init(rqstp, gc);
+			return svcauth_gss_legacy_init(rqstp, gc, authp);
 	case RPC_GSS_PROC_DATA:
 	case RPC_GSS_PROC_DESTROY:
 		/* Look up the context, and check the verifier: */
-		rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
+		*authp = rpcsec_gsserr_credproblem;
 		rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx);
 		if (!rsci)
 			goto auth_err;
-		switch (gss_verify_header(rqstp, rsci, rpcstart, gc)) {
+		switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) {
 		case SVC_OK:
 			break;
 		case SVC_DENIED:
@@ -1602,7 +1599,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
 		}
 		break;
 	default:
-		rqstp->rq_auth_stat = rpc_autherr_rejectedcred;
+		*authp = rpc_autherr_rejectedcred;
 		goto auth_err;
 	}
 
@@ -1618,13 +1615,13 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
 		svc_putnl(resv, RPC_SUCCESS);
 		goto complete;
 	case RPC_GSS_PROC_DATA:
-		rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
+		*authp = rpcsec_gsserr_ctxproblem;
 		svcdata->verf_start = resv->iov_base + resv->iov_len;
 		if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
 			goto auth_err;
 		rqstp->rq_cred = rsci->cred;
 		get_group_info(rsci->cred.cr_group_info);
-		rqstp->rq_auth_stat = rpc_autherr_badcred;
+		*authp = rpc_autherr_badcred;
 		switch (gc->gc_svc) {
 		case RPC_GSS_SVC_NONE:
 			break;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a4c9d410eb8d..a00890962e11 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -821,7 +821,6 @@ void rpc_exit_task(struct rpc_task *task)
 	else if (task->tk_client)
 		rpc_count_iostats(task, task->tk_client->cl_metrics);
 	if (task->tk_ops->rpc_call_done != NULL) {
-		trace_rpc_task_call_done(task, task->tk_ops->rpc_call_done);
 		task->tk_ops->rpc_call_done(task, task->tk_calldata);
 		if (task->tk_action != NULL) {
 			/* Always release the RPC slot and buffer memory */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 26d972c54a59..cfe8b911ca01 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -35,37 +35,18 @@
 
 static void svc_unregister(const struct svc_serv *serv, struct net *net);
 
-#define SVC_POOL_DEFAULT	SVC_POOL_GLOBAL
+#define svc_serv_is_pooled(serv)    ((serv)->sv_ops->svo_function)
 
-/*
- * Mode for mapping cpus to pools.
- */
-enum {
-	SVC_POOL_AUTO = -1,	/* choose one of the others */
-	SVC_POOL_GLOBAL,	/* no mapping, just a single global pool
-				 * (legacy & UP mode) */
-	SVC_POOL_PERCPU,	/* one pool per cpu */
-	SVC_POOL_PERNODE	/* one pool per numa node */
-};
+#define SVC_POOL_DEFAULT	SVC_POOL_GLOBAL
 
 /*
  * Structure for mapping cpus to pools and vice versa.
  * Setup once during sunrpc initialisation.
  */
-
-struct svc_pool_map {
-	int count;			/* How many svc_servs use us */
-	int mode;			/* Note: int not enum to avoid
-					 * warnings about "enumeration value
-					 * not handled in switch" */
-	unsigned int npools;
-	unsigned int *pool_to;		/* maps pool id to cpu or node */
-	unsigned int *to_pool;		/* maps cpu or node to pool id */
-};
-
-static struct svc_pool_map svc_pool_map = {
+struct svc_pool_map svc_pool_map = {
 	.mode = SVC_POOL_DEFAULT
 };
+EXPORT_SYMBOL_GPL(svc_pool_map);
 
 static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
 
@@ -236,12 +217,10 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
 
 /*
  * Add a reference to the global map of cpus to pools (and
- * vice versa) if pools are in use.
- * Initialise the map if we're the first user.
- * Returns the number of pools. If this is '1', no reference
- * was taken.
+ * vice versa).  Initialise the map if we're the first user.
+ * Returns the number of pools.
  */
-static unsigned int
+unsigned int
 svc_pool_map_get(void)
 {
 	struct svc_pool_map *m = &svc_pool_map;
@@ -251,7 +230,6 @@ svc_pool_map_get(void)
 
 	if (m->count++) {
 		mutex_unlock(&svc_pool_map_mutex);
-		WARN_ON_ONCE(m->npools <= 1);
 		return m->npools;
 	}
 
@@ -267,36 +245,30 @@ svc_pool_map_get(void)
 		break;
 	}
 
-	if (npools <= 0) {
+	if (npools < 0) {
 		/* default, or memory allocation failure */
 		npools = 1;
 		m->mode = SVC_POOL_GLOBAL;
 	}
 	m->npools = npools;
 
-	if (npools == 1)
-		/* service is unpooled, so doesn't hold a reference */
-		m->count--;
-
 	mutex_unlock(&svc_pool_map_mutex);
-	return npools;
+	return m->npools;
 }
+EXPORT_SYMBOL_GPL(svc_pool_map_get);
 
 /*
- * Drop a reference to the global map of cpus to pools, if
- * pools were in use, i.e. if npools > 1.
+ * Drop a reference to the global map of cpus to pools.
  * When the last reference is dropped, the map data is
  * freed; this allows the sysadmin to change the pool
  * mode using the pool_mode module option without
  * rebooting or re-loading sunrpc.ko.
  */
-static void
-svc_pool_map_put(int npools)
+void
+svc_pool_map_put(void)
 {
 	struct svc_pool_map *m = &svc_pool_map;
 
-	if (npools <= 1)
-		return;
 	mutex_lock(&svc_pool_map_mutex);
 
 	if (!--m->count) {
@@ -309,6 +281,7 @@ svc_pool_map_put(int npools)
 
 	mutex_unlock(&svc_pool_map_mutex);
 }
+EXPORT_SYMBOL_GPL(svc_pool_map_put);
 
 static int svc_pool_map_get_node(unsigned int pidx)
 {
@@ -365,18 +338,21 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
 	struct svc_pool_map *m = &svc_pool_map;
 	unsigned int pidx = 0;
 
-	if (serv->sv_nrpools <= 1)
-		return serv->sv_pools;
-
-	switch (m->mode) {
-	case SVC_POOL_PERCPU:
-		pidx = m->to_pool[cpu];
-		break;
-	case SVC_POOL_PERNODE:
-		pidx = m->to_pool[cpu_to_node(cpu)];
-		break;
+	/*
+	 * An uninitialised map happens in a pure client when
+	 * lockd is brought up, so silently treat it the
+	 * same as SVC_POOL_GLOBAL.
+	 */
+	if (svc_serv_is_pooled(serv)) {
+		switch (m->mode) {
+		case SVC_POOL_PERCPU:
+			pidx = m->to_pool[cpu];
+			break;
+		case SVC_POOL_PERNODE:
+			pidx = m->to_pool[cpu_to_node(cpu)];
+			break;
+		}
 	}
-
 	return &serv->sv_pools[pidx % serv->sv_nrpools];
 }
 
@@ -446,7 +422,7 @@ __svc_init_bc(struct svc_serv *serv)
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-	     int (*threadfn)(void *data))
+	     const struct svc_serv_ops *ops)
 {
 	struct svc_serv	*serv;
 	unsigned int vers;
@@ -457,13 +433,13 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 		return NULL;
 	serv->sv_name      = prog->pg_name;
 	serv->sv_program   = prog;
-	kref_init(&serv->sv_refcnt);
+	serv->sv_nrthreads = 1;
 	serv->sv_stats     = prog->pg_stats;
 	if (bufsize > RPCSVC_MAXPAYLOAD)
 		bufsize = RPCSVC_MAXPAYLOAD;
 	serv->sv_max_payload = bufsize? bufsize : 4096;
 	serv->sv_max_mesg  = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
-	serv->sv_threadfn = threadfn;
+	serv->sv_ops = ops;
 	xdrsize = 0;
 	while (prog) {
 		prog->pg_lovers = prog->pg_nvers-1;
@@ -509,56 +485,59 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 	return serv;
 }
 
-/**
- * svc_create - Create an RPC service
- * @prog: the RPC program the new service will handle
- * @bufsize: maximum message size for @prog
- * @threadfn: a function to service RPC requests for @prog
- *
- * Returns an instantiated struct svc_serv object or NULL.
- */
-struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
-			    int (*threadfn)(void *data))
+struct svc_serv *
+svc_create(struct svc_program *prog, unsigned int bufsize,
+	   const struct svc_serv_ops *ops)
 {
-	return __svc_create(prog, bufsize, 1, threadfn);
+	return __svc_create(prog, bufsize, /*npools*/1, ops);
 }
 EXPORT_SYMBOL_GPL(svc_create);
 
-/**
- * svc_create_pooled - Create an RPC service with pooled threads
- * @prog: the RPC program the new service will handle
- * @bufsize: maximum message size for @prog
- * @threadfn: a function to service RPC requests for @prog
- *
- * Returns an instantiated struct svc_serv object or NULL.
- */
-struct svc_serv *svc_create_pooled(struct svc_program *prog,
-				   unsigned int bufsize,
-				   int (*threadfn)(void *data))
+struct svc_serv *
+svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
+		  const struct svc_serv_ops *ops)
 {
 	struct svc_serv *serv;
 	unsigned int npools = svc_pool_map_get();
 
-	serv = __svc_create(prog, bufsize, npools, threadfn);
+	serv = __svc_create(prog, bufsize, npools, ops);
 	if (!serv)
 		goto out_err;
 	return serv;
 out_err:
-	svc_pool_map_put(npools);
+	svc_pool_map_put();
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(svc_create_pooled);
 
+void svc_shutdown_net(struct svc_serv *serv, struct net *net)
+{
+	svc_close_net(serv, net);
+
+	if (serv->sv_ops->svo_shutdown)
+		serv->sv_ops->svo_shutdown(serv, net);
+}
+EXPORT_SYMBOL_GPL(svc_shutdown_net);
+
 /*
  * Destroy an RPC service. Should be called with appropriate locking to
- * protect sv_permsocks and sv_tempsocks.
+ * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
  */
 void
-svc_destroy(struct kref *ref)
+svc_destroy(struct svc_serv *serv)
 {
-	struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+	dprintk("svc: svc_destroy(%s, %d)\n",
+				serv->sv_program->pg_name,
+				serv->sv_nrthreads);
+
+	if (serv->sv_nrthreads) {
+		if (--(serv->sv_nrthreads) != 0) {
+			svc_sock_update_bufs(serv);
+			return;
+		}
+	} else
+		printk("svc_destroy: no threads for serv=%p!\n", serv);
 
-	dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
 	del_timer_sync(&serv->sv_temptimer);
 
 	/*
@@ -570,7 +549,8 @@ svc_destroy(struct kref *ref)
 
 	cache_clean_deferred(serv);
 
-	svc_pool_map_put(serv->sv_nrpools);
+	if (svc_serv_is_pooled(serv))
+		svc_pool_map_put();
 
 	kfree(serv->sv_pools);
 	kfree(serv);
@@ -634,10 +614,6 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
 	rqstp->rq_server = serv;
 	rqstp->rq_pool = pool;
 
-	rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0);
-	if (!rqstp->rq_scratch_page)
-		goto out_enomem;
-
 	rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
 	if (!rqstp->rq_argp)
 		goto out_enomem;
@@ -656,7 +632,7 @@ out_enomem:
 }
 EXPORT_SYMBOL_GPL(svc_rqst_alloc);
 
-static struct svc_rqst *
+struct svc_rqst *
 svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 {
 	struct svc_rqst	*rqstp;
@@ -665,17 +641,14 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	if (!rqstp)
 		return ERR_PTR(-ENOMEM);
 
-	svc_get(serv);
-	spin_lock_bh(&serv->sv_lock);
-	serv->sv_nrthreads += 1;
-	spin_unlock_bh(&serv->sv_lock);
-
+	serv->sv_nrthreads++;
 	spin_lock_bh(&pool->sp_lock);
 	pool->sp_nrthreads++;
 	list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
 	spin_unlock_bh(&pool->sp_lock);
 	return rqstp;
 }
+EXPORT_SYMBOL_GPL(svc_prepare_thread);
 
 /*
  * Choose a pool in which to create a new thread, for svc_set_num_threads
@@ -749,9 +722,11 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 		if (IS_ERR(rqstp))
 			return PTR_ERR(rqstp);
 
-		task = kthread_create_on_node(serv->sv_threadfn, rqstp,
+		__module_get(serv->sv_ops->svo_module);
+		task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
 					      node, "%s", serv->sv_name);
 		if (IS_ERR(task)) {
+			module_put(serv->sv_ops->svo_module);
 			svc_exit_thread(rqstp);
 			return PTR_ERR(task);
 		}
@@ -767,13 +742,59 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	return 0;
 }
 
+
+/* destroy old threads */
+static int
+svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	struct task_struct *task;
+	unsigned int state = serv->sv_nrthreads-1;
+
+	/* destroy old threads */
+	do {
+		task = choose_victim(serv, pool, &state);
+		if (task == NULL)
+			break;
+		send_sig(SIGINT, task, 1);
+		nrservs++;
+	} while (nrservs < 0);
+
+	return 0;
+}
+
 /*
  * Create or destroy enough new threads to make the number
  * of threads the given number.  If `pool' is non-NULL, applies
  * only to threads in that pool, otherwise round-robins between
  * all pools.  Caller must ensure that mutual exclusion between this and
  * server startup or shutdown.
+ *
+ * Destroying threads relies on the service threads filling in
+ * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
+ * has been created using svc_create_pooled().
+ *
+ * Based on code that used to be in nfsd_svc() but tweaked
+ * to be pool-aware.
  */
+int
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+	if (pool == NULL) {
+		/* The -1 assumes caller has done a svc_get() */
+		nrservs -= (serv->sv_nrthreads-1);
+	} else {
+		spin_lock_bh(&pool->sp_lock);
+		nrservs -= pool->sp_nrthreads;
+		spin_unlock_bh(&pool->sp_lock);
+	}
+
+	if (nrservs > 0)
+		return svc_start_kthreads(serv, pool, nrservs);
+	if (nrservs < 0)
+		return svc_signal_kthreads(serv, pool, nrservs);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(svc_set_num_threads);
 
 /* destroy old threads */
 static int
@@ -798,10 +819,11 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 }
 
 int
-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 {
 	if (pool == NULL) {
-		nrservs -= serv->sv_nrthreads;
+		/* The -1 assumes caller has done a svc_get() */
+		nrservs -= (serv->sv_nrthreads-1);
 	} else {
 		spin_lock_bh(&pool->sp_lock);
 		nrservs -= pool->sp_nrthreads;
@@ -814,28 +836,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 		return svc_stop_kthreads(serv, pool, nrservs);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(svc_set_num_threads);
-
-/**
- * svc_rqst_replace_page - Replace one page in rq_pages[]
- * @rqstp: svc_rqst with pages to replace
- * @page: replacement page
- *
- * When replacing a page in rq_pages, batch the release of the
- * replaced pages to avoid hammering the page allocator.
- */
-void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
-{
-	if (*rqstp->rq_next_page) {
-		if (!pagevec_space(&rqstp->rq_pvec))
-			__pagevec_release(&rqstp->rq_pvec);
-		pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page);
-	}
-
-	get_page(page);
-	*(rqstp->rq_next_page++) = page;
-}
-EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
+EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
 
 /*
  * Called from a server thread as it's exiting. Caller must hold the "service
@@ -845,7 +846,6 @@ void
 svc_rqst_free(struct svc_rqst *rqstp)
 {
 	svc_release_buffer(rqstp);
-	put_page(rqstp->rq_scratch_page);
 	kfree(rqstp->rq_resp);
 	kfree(rqstp->rq_argp);
 	kfree(rqstp->rq_auth_data);
@@ -865,14 +865,11 @@ svc_exit_thread(struct svc_rqst *rqstp)
 		list_del_rcu(&rqstp->rq_all);
 	spin_unlock_bh(&pool->sp_lock);
 
-	spin_lock_bh(&serv->sv_lock);
-	serv->sv_nrthreads -= 1;
-	spin_unlock_bh(&serv->sv_lock);
-	svc_sock_update_bufs(serv);
-
 	svc_rqst_free(rqstp);
 
-	svc_put(serv);
+	/* Release the server */
+	if (serv)
+		svc_destroy(serv);
 }
 EXPORT_SYMBOL_GPL(svc_exit_thread);
 
@@ -1164,6 +1161,22 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
 #endif
 
+__be32
+svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err)
+{
+	set_bit(RQ_AUTHERR, &rqstp->rq_flags);
+	return auth_err;
+}
+EXPORT_SYMBOL_GPL(svc_return_autherr);
+
+static __be32
+svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp)
+{
+	if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags))
+		return *statp;
+	return rpc_auth_ok;
+}
+
 static int
 svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
@@ -1187,7 +1200,7 @@ svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	    test_bit(RQ_DROPME, &rqstp->rq_flags))
 		return 0;
 
-	if (rqstp->rq_auth_stat != rpc_auth_ok)
+	if (test_bit(RQ_AUTHERR, &rqstp->rq_flags))
 		return 1;
 
 	if (*statp != rpc_success)
@@ -1237,7 +1250,7 @@ svc_generic_init_request(struct svc_rqst *rqstp,
 	rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc];
 
 	/* Initialize storage for argp and resp */
-	memset(rqstp->rq_argp, 0, procp->pc_argzero);
+	memset(rqstp->rq_argp, 0, procp->pc_argsize);
 	memset(rqstp->rq_resp, 0, procp->pc_ressize);
 
 	/* Bump per-procedure stats counter */
@@ -1266,7 +1279,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	struct svc_process_info process;
 	__be32			*statp;
 	u32			prog, vers;
-	__be32			rpc_stat;
+	__be32			auth_stat, rpc_stat;
 	int			auth_res;
 	__be32			*reply_statp;
 
@@ -1309,12 +1322,14 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	 * We do this before anything else in order to get a decent
 	 * auth verifier.
 	 */
-	auth_res = svc_authenticate(rqstp);
+	auth_res = svc_authenticate(rqstp, &auth_stat);
 	/* Also give the program a chance to reject this call: */
-	if (auth_res == SVC_OK && progp)
+	if (auth_res == SVC_OK && progp) {
+		auth_stat = rpc_autherr_badcred;
 		auth_res = progp->pg_authenticate(rqstp);
+	}
 	if (auth_res != SVC_OK)
-		trace_svc_authenticate(rqstp, auth_res);
+		trace_svc_authenticate(rqstp, auth_res, auth_stat);
 	switch (auth_res) {
 	case SVC_OK:
 		break;
@@ -1373,15 +1388,15 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 			goto release_dropit;
 		if (*statp == rpc_garbage_args)
 			goto err_garbage;
+		auth_stat = svc_get_autherr(rqstp, statp);
+		if (auth_stat != rpc_auth_ok)
+			goto err_release_bad_auth;
 	} else {
 		dprintk("svc: calling dispatcher\n");
 		if (!process.dispatch(rqstp, statp))
 			goto release_dropit; /* Release reply info */
 	}
 
-	if (rqstp->rq_auth_stat != rpc_auth_ok)
-		goto err_release_bad_auth;
-
 	/* Check RPC status result */
 	if (*statp != rpc_success)
 		resv->iov_len = ((void*)statp)  - resv->iov_base + 4;
@@ -1410,7 +1425,7 @@ release_dropit:
 	svc_authorise(rqstp);
 close_xprt:
 	if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
-		svc_xprt_close(rqstp->rq_xprt);
+		svc_close_xprt(rqstp->rq_xprt);
 	dprintk("svc: svc_process close\n");
 	return 0;
 
@@ -1431,14 +1446,13 @@ err_release_bad_auth:
 	if (procp->pc_release)
 		procp->pc_release(rqstp);
 err_bad_auth:
-	dprintk("svc: authentication failed (%d)\n",
-		be32_to_cpu(rqstp->rq_auth_stat));
+	dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
 	serv->sv_stats->rpcbadauth++;
 	/* Restore write pointer to location of accept status: */
 	xdr_ressize_check(rqstp, reply_statp);
 	svc_putnl(resv, 1);	/* REJECT */
 	svc_putnl(resv, 1);	/* AUTH_ERROR */
-	svc_putu32(resv, rqstp->rq_auth_stat);	/* status */
+	svc_putnl(resv, ntohl(auth_stat));	/* status */
 	goto sendit;
 
 err_bad_prog:
@@ -1612,7 +1626,7 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
 EXPORT_SYMBOL_GPL(svc_max_payload);
 
 /**
- * svc_encode_result_payload - mark a range of bytes as a result payload
+ * svc_encode_read_payload - mark a range of bytes as a READ payload
  * @rqstp: svc_rqst to operate on
  * @offset: payload's byte offset in rqstp->rq_res
  * @length: size of payload, in bytes
@@ -1620,28 +1634,26 @@ EXPORT_SYMBOL_GPL(svc_max_payload);
  * Returns zero on success, or a negative errno if a permanent
  * error occurred.
  */
-int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset,
-			      unsigned int length)
+int svc_encode_read_payload(struct svc_rqst *rqstp, unsigned int offset,
+			    unsigned int length)
 {
-	return rqstp->rq_xprt->xpt_ops->xpo_result_payload(rqstp, offset,
-							   length);
+	return rqstp->rq_xprt->xpt_ops->xpo_read_payload(rqstp, offset, length);
 }
-EXPORT_SYMBOL_GPL(svc_encode_result_payload);
+EXPORT_SYMBOL_GPL(svc_encode_read_payload);
 
 /**
  * svc_fill_write_vector - Construct data argument for VFS write call
  * @rqstp: svc_rqst to operate on
- * @payload: xdr_buf containing only the write data payload
+ * @pages: list of pages containing data payload
+ * @first: buffer containing first section of write payload
+ * @total: total number of bytes of write payload
  *
  * Fills in rqstp::rq_vec, and returns the number of elements.
  */
-unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
-				   struct xdr_buf *payload)
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
+				   struct kvec *first, size_t total)
 {
-	struct page **pages = payload->pages;
-	struct kvec *first = payload->head;
 	struct kvec *vec = rqstp->rq_vec;
-	size_t total = payload->len;
 	unsigned int i;
 
 	/* Some types of transport can present the write payload
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d1eacf3358b8..06e503466c32 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -233,35 +233,30 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 	return xprt;
 }
 
-/**
- * svc_xprt_received - start next receiver thread
- * @xprt: controlling transport
- *
- * The caller must hold the XPT_BUSY bit and must
+/*
+ * svc_xprt_received conditionally queues the transport for processing
+ * by another thread. The caller must hold the XPT_BUSY bit and must
  * not thereafter touch transport data.
  *
  * Note: XPT_DATA only gets cleared when a read-attempt finds no (or
  * insufficient) data.
  */
-void svc_xprt_received(struct svc_xprt *xprt)
+static void svc_xprt_received(struct svc_xprt *xprt)
 {
 	if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) {
 		WARN_ONCE(1, "xprt=0x%p already busy!", xprt);
 		return;
 	}
 
-	trace_svc_xprt_received(xprt);
-
 	/* As soon as we clear busy, the xprt could be closed and
-	 * 'put', so we need a reference to call svc_xprt_enqueue with:
+	 * 'put', so we need a reference to call svc_enqueue_xprt with:
 	 */
 	svc_xprt_get(xprt);
 	smp_mb__before_atomic();
 	clear_bit(XPT_BUSY, &xprt->xpt_flags);
-	svc_xprt_enqueue(xprt);
+	xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
 	svc_xprt_put(xprt);
 }
-EXPORT_SYMBOL_GPL(svc_xprt_received);
 
 void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
 {
@@ -272,7 +267,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
 	svc_xprt_received(new);
 }
 
-static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
+static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 			    struct net *net, const int family,
 			    const unsigned short port, int flags,
 			    const struct cred *cred)
@@ -308,35 +303,21 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
 	return -EPROTONOSUPPORT;
 }
 
-/**
- * svc_xprt_create - Add a new listener to @serv
- * @serv: target RPC service
- * @xprt_name: transport class name
- * @net: network namespace
- * @family: network address family
- * @port: listener port
- * @flags: SVC_SOCK flags
- * @cred: credential to bind to this transport
- *
- * Return values:
- *   %0: New listener added successfully
- *   %-EPROTONOSUPPORT: Requested transport type not supported
- */
-int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
+int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
 		    struct net *net, const int family,
 		    const unsigned short port, int flags,
 		    const struct cred *cred)
 {
 	int err;
 
-	err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
+	err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
 	if (err == -EPROTONOSUPPORT) {
 		request_module("svc%s", xprt_name);
-		err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
+		err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
 	}
 	return err;
 }
-EXPORT_SYMBOL_GPL(svc_xprt_create);
+EXPORT_SYMBOL_GPL(svc_create_xprt);
 
 /*
  * Copy the local and remote xprt addresses to the rqstp structure
@@ -412,8 +393,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
 	smp_rmb();
 	xpt_flags = READ_ONCE(xprt->xpt_flags);
 
-	if (xpt_flags & BIT(XPT_BUSY))
-		return false;
 	if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
 		return true;
 	if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
@@ -426,12 +405,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
 	return false;
 }
 
-/**
- * svc_xprt_enqueue - Queue a transport on an idle nfsd thread
- * @xprt: transport with data pending
- *
- */
-void svc_xprt_enqueue(struct svc_xprt *xprt)
+void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 {
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp = NULL;
@@ -475,6 +449,19 @@ out_unlock:
 	put_cpu();
 	trace_svc_xprt_do_enqueue(xprt, rqstp);
 }
+EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
+
+/*
+ * Queue up a transport with data pending. If there are idle nfsd
+ * processes, wake 'em up.
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
+{
+	if (test_bit(XPT_BUSY, &xprt->xpt_flags))
+		return;
+	xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
+}
 EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 
 /*
@@ -533,7 +520,6 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
 	kfree(rqstp->rq_deferred);
 	rqstp->rq_deferred = NULL;
 
-	pagevec_release(&rqstp->rq_pvec);
 	svc_free_res_pages(rqstp);
 	rqstp->rq_res.page_len = 0;
 	rqstp->rq_res.page_base = 0;
@@ -660,8 +646,6 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 	int pages;
 	int i;
 
-	pagevec_init(&rqstp->rq_pvec);
-
 	/* now allocate needed pages.  If we get a failure, sleep briefly */
 	pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
 	if (pages > RPCSVC_MAXPAGES) {
@@ -674,13 +658,13 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 		while (rqstp->rq_pages[i] == NULL) {
 			struct page *p = alloc_page(GFP_KERNEL);
 			if (!p) {
-				set_current_state(TASK_IDLE);
-				if (kthread_should_stop()) {
+				set_current_state(TASK_INTERRUPTIBLE);
+				if (signalled() || kthread_should_stop()) {
 					set_current_state(TASK_RUNNING);
 					return -EINTR;
 				}
+				schedule_timeout(msecs_to_jiffies(500));
 			}
-			freezable_schedule_timeout(msecs_to_jiffies(500));
 			rqstp->rq_pages[i] = p;
 		}
 	rqstp->rq_page_end = &rqstp->rq_pages[i];
@@ -713,7 +697,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
 		return false;
 
 	/* are we shutting down? */
-	if (kthread_should_stop())
+	if (signalled() || kthread_should_stop())
 		return false;
 
 	/* are we freezing? */
@@ -735,14 +719,18 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 	if (rqstp->rq_xprt)
 		goto out_found;
 
-	set_current_state(TASK_IDLE);
+	/*
+	 * We have to be able to interrupt this wait
+	 * to bring down the daemons ...
+	 */
+	set_current_state(TASK_INTERRUPTIBLE);
 	smp_mb__before_atomic();
 	clear_bit(SP_CONGESTED, &pool->sp_flags);
 	clear_bit(RQ_BUSY, &rqstp->rq_flags);
 	smp_mb__after_atomic();
 
 	if (likely(rqst_should_sleep(rqstp)))
-		time_left = freezable_schedule_timeout(timeout);
+		time_left = schedule_timeout(timeout);
 	else
 		__set_current_state(TASK_RUNNING);
 
@@ -757,7 +745,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 	if (!time_left)
 		atomic_long_inc(&pool->sp_stats.threads_timedout);
 
-	if (kthread_should_stop())
+	if (signalled() || kthread_should_stop())
 		return ERR_PTR(-EINTR);
 	return ERR_PTR(-EAGAIN);
 out_found:
@@ -856,7 +844,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	try_to_freeze();
 	cond_resched();
 	err = -EINTR;
-	if (kthread_should_stop())
+	if (signalled() || kthread_should_stop())
 		goto out;
 
 	xprt = svc_get_next_xprt(rqstp, timeout);
@@ -1052,12 +1040,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
 	svc_xprt_put(xprt);
 }
 
-/**
- * svc_xprt_close - Close a client connection
- * @xprt: transport to disconnect
- *
- */
-void svc_xprt_close(struct svc_xprt *xprt)
+void svc_close_xprt(struct svc_xprt *xprt)
 {
 	trace_svc_xprt_close(xprt);
 	set_bit(XPT_CLOSE, &xprt->xpt_flags);
@@ -1072,7 +1055,7 @@ void svc_xprt_close(struct svc_xprt *xprt)
 	 */
 	svc_delete_xprt(xprt);
 }
-EXPORT_SYMBOL_GPL(svc_xprt_close);
+EXPORT_SYMBOL_GPL(svc_close_xprt);
 
 static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
 {
@@ -1124,11 +1107,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
 	}
 }
 
-/**
- * svc_xprt_destroy_all - Destroy transports associated with @serv
- * @serv: RPC service to be shut down
- * @net: target network namespace
- *
+/*
  * Server threads may still be running (especially in the case where the
  * service is still running in other network namespaces).
  *
@@ -1140,7 +1119,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
  * threads, we may need to wait a little while and then check again to
  * see if they're done.
  */
-void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
+void svc_close_net(struct svc_serv *serv, struct net *net)
 {
 	int delay = 0;
 
@@ -1151,7 +1130,6 @@ void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
 		msleep(delay++);
 	}
 }
-EXPORT_SYMBOL_GPL(svc_xprt_destroy_all);
 
 /*
  * Handle defer and revisit of requests
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 5a8b8e03fdd4..998b196b6176 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -59,12 +59,12 @@ svc_put_auth_ops(struct auth_ops *aops)
 }
 
 int
-svc_authenticate(struct svc_rqst *rqstp)
+svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
 {
 	rpc_authflavor_t	flavor;
 	struct auth_ops		*aops;
 
-	rqstp->rq_auth_stat = rpc_auth_ok;
+	*authp = rpc_auth_ok;
 
 	flavor = svc_getnl(&rqstp->rq_arg.head[0]);
 
@@ -72,7 +72,7 @@ svc_authenticate(struct svc_rqst *rqstp)
 
 	aops = svc_get_auth_ops(flavor);
 	if (aops == NULL) {
-		rqstp->rq_auth_stat = rpc_autherr_badcred;
+		*authp = rpc_autherr_badcred;
 		return SVC_DENIED;
 	}
 
@@ -80,7 +80,7 @@ svc_authenticate(struct svc_rqst *rqstp)
 	init_svc_cred(&rqstp->rq_cred);
 
 	rqstp->rq_authop = aops;
-	return aops->accept(rqstp);
+	return aops->accept(rqstp, authp);
 }
 EXPORT_SYMBOL_GPL(svc_authenticate);
 
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 1868596259af..60754a292589 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -699,9 +699,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 
 	rqstp->rq_client = NULL;
 	if (rqstp->rq_proc == 0)
-		goto out;
+		return SVC_OK;
 
-	rqstp->rq_auth_stat = rpc_autherr_badcred;
 	ipm = ip_map_cached_get(xprt);
 	if (ipm == NULL)
 		ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
@@ -738,16 +737,13 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 		put_group_info(cred->cr_group_info);
 		cred->cr_group_info = gi;
 	}
-
-out:
-	rqstp->rq_auth_stat = rpc_auth_ok;
 	return SVC_OK;
 }
 
 EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
 
 static int
-svcauth_null_accept(struct svc_rqst *rqstp)
+svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -758,12 +754,12 @@ svcauth_null_accept(struct svc_rqst *rqstp)
 
 	if (svc_getu32(argv) != 0) {
 		dprintk("svc: bad null cred\n");
-		rqstp->rq_auth_stat = rpc_autherr_badcred;
+		*authp = rpc_autherr_badcred;
 		return SVC_DENIED;
 	}
 	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
 		dprintk("svc: bad null verf\n");
-		rqstp->rq_auth_stat = rpc_autherr_badverf;
+		*authp = rpc_autherr_badverf;
 		return SVC_DENIED;
 	}
 
@@ -807,7 +803,7 @@ struct auth_ops svcauth_null = {
 
 
 static int
-svcauth_unix_accept(struct svc_rqst *rqstp)
+svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -849,7 +845,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp)
 	}
 	groups_sort(cred->cr_group_info);
 	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
-		rqstp->rq_auth_stat = rpc_autherr_badverf;
+		*authp = rpc_autherr_badverf;
 		return SVC_DENIED;
 	}
 
@@ -861,7 +857,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp)
 	return SVC_OK;
 
 badcred:
-	rqstp->rq_auth_stat = rpc_autherr_badcred;
+	*authp = rpc_autherr_badcred;
 	return SVC_DENIED;
 }
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index cb0cfcd8a814..3d5ee042c501 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -181,8 +181,8 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 	}
 }
 
-static int svc_sock_result_payload(struct svc_rqst *rqstp, unsigned int offset,
-				   unsigned int length)
+static int svc_sock_read_payload(struct svc_rqst *rqstp, unsigned int offset,
+				 unsigned int length)
 {
 	return 0;
 }
@@ -635,7 +635,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
 	.xpo_create = svc_udp_create,
 	.xpo_recvfrom = svc_udp_recvfrom,
 	.xpo_sendto = svc_udp_sendto,
-	.xpo_result_payload = svc_sock_result_payload,
+	.xpo_read_payload = svc_sock_read_payload,
 	.xpo_release_rqst = svc_udp_release_rqst,
 	.xpo_detach = svc_sock_detach,
 	.xpo_free = svc_sock_free,
@@ -1209,7 +1209,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_create = svc_tcp_create,
 	.xpo_recvfrom = svc_tcp_recvfrom,
 	.xpo_sendto = svc_tcp_sendto,
-	.xpo_result_payload = svc_sock_result_payload,
+	.xpo_read_payload = svc_sock_read_payload,
 	.xpo_release_rqst = svc_tcp_release_rqst,
 	.xpo_detach = svc_tcp_sock_detach,
 	.xpo_free = svc_sock_free,
@@ -1342,10 +1342,25 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	return svsk;
 }
 
+bool svc_alien_sock(struct net *net, int fd)
+{
+	int err;
+	struct socket *sock = sockfd_lookup(fd, &err);
+	bool ret = false;
+
+	if (!sock)
+		goto out;
+	if (sock_net(sock->sk) != net)
+		ret = true;
+	sockfd_put(sock);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(svc_alien_sock);
+
 /**
  * svc_addsock - add a listener socket to an RPC service
  * @serv: pointer to RPC service to which to add a new listener
- * @net: caller's network namespace
  * @fd: file descriptor of the new listener
  * @name_return: pointer to buffer to fill in with name of listener
  * @len: size of the buffer
@@ -1355,8 +1370,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
  * Name is terminated with '\n'.  On error, returns a negative errno
  * value.
  */
-int svc_addsock(struct svc_serv *serv, struct net *net, const int fd,
-		char *name_return, const size_t len, const struct cred *cred)
+int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
+		const size_t len, const struct cred *cred)
 {
 	int err = 0;
 	struct socket *so = sockfd_lookup(fd, &err);
@@ -1367,9 +1382,6 @@ int svc_addsock(struct svc_serv *serv, struct net *net, const int fd,
 
 	if (!so)
 		return err;
-	err = -EINVAL;
-	if (sock_net(so->sk) != net)
-		goto out;
 	err = -EAFNOSUPPORT;
 	if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
 		goto out;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index e2bd0cd39114..d84bb5037bb5 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -669,7 +669,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
 	struct kvec *iov = buf->head;
 	int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
 
-	xdr_reset_scratch_buffer(xdr);
+	xdr_set_scratch_buffer(xdr, NULL, 0);
 	BUG_ON(scratch_len < 0);
 	xdr->buf = buf;
 	xdr->iov = iov;
@@ -691,29 +691,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
 EXPORT_SYMBOL_GPL(xdr_init_encode);
 
 /**
- * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages
- * @xdr: pointer to xdr_stream struct
- * @buf: pointer to XDR buffer into which to encode data
- * @pages: list of pages to decode into
- * @rqst: pointer to controlling rpc_rqst, for debugging
- *
- */
-void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
-			   struct page **pages, struct rpc_rqst *rqst)
-{
-	xdr_reset_scratch_buffer(xdr);
-
-	xdr->buf = buf;
-	xdr->page_ptr = pages;
-	xdr->iov = NULL;
-	xdr->p = page_address(*pages);
-	xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
-	xdr->rqst = rqst;
-}
-EXPORT_SYMBOL_GPL(xdr_init_encode_pages);
-
-/**
- * __xdr_commit_encode - Ensure all data is written to buffer
+ * xdr_commit_encode - Ensure all data is written to buffer
  * @xdr: pointer to xdr_stream
  *
  * We handle encoding across page boundaries by giving the caller a
@@ -725,25 +703,22 @@ EXPORT_SYMBOL_GPL(xdr_init_encode_pages);
  * required at the end of encoding, or any other time when the xdr_buf
  * data might be read.
  */
-void __xdr_commit_encode(struct xdr_stream *xdr)
+inline void xdr_commit_encode(struct xdr_stream *xdr)
 {
 	int shift = xdr->scratch.iov_len;
 	void *page;
 
+	if (shift == 0)
+		return;
 	page = page_address(*xdr->page_ptr);
 	memcpy(xdr->scratch.iov_base, page, shift);
 	memmove(page, page + shift, (void *)xdr->p - page);
-	xdr_reset_scratch_buffer(xdr);
+	xdr->scratch.iov_len = 0;
 }
-EXPORT_SYMBOL_GPL(__xdr_commit_encode);
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
 
-/*
- * The buffer space to be reserved crosses the boundary between
- * xdr->buf->head and xdr->buf->pages, or between two pages
- * in xdr->buf->pages.
- */
-static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
-						   size_t nbytes)
+static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+		size_t nbytes)
 {
 	__be32 *p;
 	int space_left;
@@ -768,7 +743,8 @@ static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
 	 * the "scratch" iov to track any temporarily unused fragment of
 	 * space at the end of the previous buffer:
 	 */
-	xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
+	xdr->scratch.iov_base = xdr->p;
+	xdr->scratch.iov_len = frag1bytes;
 	p = page_address(*xdr->page_ptr);
 	/*
 	 * Note this is where the next encode will start after we've
@@ -1080,7 +1056,8 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
 		     struct rpc_rqst *rqst)
 {
 	xdr->buf = buf;
-	xdr_reset_scratch_buffer(xdr);
+	xdr->scratch.iov_base = NULL;
+	xdr->scratch.iov_len = 0;
 	xdr->nwords = XDR_QUADLEN(buf->len);
 	if (buf->head[0].iov_len != 0)
 		xdr_set_iov(xdr, buf->head, buf->len);
@@ -1128,6 +1105,24 @@ static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 	return p;
 }
 
+/**
+ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to an empty buffer
+ * @buflen: size of 'buf'
+ *
+ * The scratch buffer is used when decoding from an array of pages.
+ * If an xdr_inline_decode() call spans across page boundaries, then
+ * we copy the data into the scratch buffer in order to allow linear
+ * access.
+ */
+void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
+{
+	xdr->scratch.iov_base = buf;
+	xdr->scratch.iov_len = buflen;
+}
+EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
+
 static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
 {
 	__be32 *p;
@@ -1437,51 +1432,6 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
-/**
- * xdr_stream_subsegment - set @subbuf to a portion of @xdr
- * @xdr: an xdr_stream set up for decoding
- * @subbuf: the result buffer
- * @nbytes: length of @xdr to extract, in bytes
- *
- * Sets up @subbuf to represent a portion of @xdr. The portion
- * starts at the current offset in @xdr, and extends for a length
- * of @nbytes. If this is successful, @xdr is advanced to the next
- * position following that portion.
- *
- * Return values:
- *   %true: @subbuf has been initialized, and @xdr has been advanced.
- *   %false: a bounds error has occurred
- */
-bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
-			   unsigned int nbytes)
-{
-	unsigned int remaining, offset, len;
-
-	if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes))
-		return false;
-
-	if (subbuf->head[0].iov_len)
-		if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len))
-			return false;
-
-	remaining = subbuf->page_len;
-	offset = subbuf->page_base;
-	while (remaining) {
-		len = min_t(unsigned int, remaining, PAGE_SIZE) - offset;
-
-		if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
-			return false;
-		if (!__xdr_inline_decode(xdr, len))
-			return false;
-
-		remaining -= len;
-		offset = 0;
-	}
-
-	return true;
-}
-EXPORT_SYMBOL_GPL(xdr_stream_subsegment);
-
 /**
  * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
  * @buf: buf to be trimmed
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index feac8c26fb87..c5154bc38e12 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -186,7 +186,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
 
 	ret = rpcrdma_bc_send_request(rdma, rqst);
 	if (ret == -ENOTCONN)
-		svc_xprt_close(sxprt);
+		svc_close_xprt(sxprt);
 	return ret;
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index d6436c13d5c4..c3d588b149aa 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -448,6 +448,7 @@ static ssize_t svc_rdma_encode_write_chunk(__be32 *src,
  * svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list
  * @rctxt: Reply context with information about the RPC Call
  * @sctxt: Send context for the RPC Reply
+ * @length: size in bytes of the payload in the first Write chunk
  *
  * The client provides a Write chunk list in the Call message. Fill
  * in the segments in the first Write chunk in the Reply's transport
@@ -464,12 +465,12 @@ static ssize_t svc_rdma_encode_write_chunk(__be32 *src,
  */
 static ssize_t
 svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt,
-			   struct svc_rdma_send_ctxt *sctxt)
+			   struct svc_rdma_send_ctxt *sctxt,
+			   unsigned int length)
 {
 	ssize_t len, ret;
 
-	ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt,
-					  rctxt->rc_read_payload_length);
+	ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, length);
 	if (ret < 0)
 		return ret;
 	len = ret;
@@ -922,12 +923,21 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 		goto err0;
 	if (wr_lst) {
 		/* XXX: Presume the client sent only one Write chunk */
-		ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr,
-						rctxt->rc_read_payload_offset,
-						rctxt->rc_read_payload_length);
+		unsigned long offset;
+		unsigned int length;
+
+		if (rctxt->rc_read_payload_length) {
+			offset = rctxt->rc_read_payload_offset;
+			length = rctxt->rc_read_payload_length;
+		} else {
+			offset = xdr->head[0].iov_len;
+			length = xdr->page_len;
+		}
+		ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset,
+						length);
 		if (ret < 0)
 			goto err2;
-		if (svc_rdma_encode_write_list(rctxt, sctxt) < 0)
+		if (svc_rdma_encode_write_list(rctxt, sctxt, length) < 0)
 			goto err0;
 	} else {
 		if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0)
@@ -969,19 +979,19 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 }
 
 /**
- * svc_rdma_result_payload - special processing for a result payload
+ * svc_rdma_read_payload - special processing for a READ payload
  * @rqstp: svc_rqst to operate on
  * @offset: payload's byte offset in @xdr
  * @length: size of payload, in bytes
  *
  * Returns zero on success.
  *
- * For the moment, just record the xdr_buf location of the result
+ * For the moment, just record the xdr_buf location of the READ
  * payload. svc_rdma_sendto will use that location later when
  * we actually send the payload.
  */
-int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
-			    unsigned int length)
+int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset,
+			  unsigned int length)
 {
 	struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c895f80df659..5f7e3d12523f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
 	.xpo_create = svc_rdma_create,
 	.xpo_recvfrom = svc_rdma_recvfrom,
 	.xpo_sendto = svc_rdma_sendto,
-	.xpo_result_payload = svc_rdma_result_payload,
+	.xpo_read_payload = svc_rdma_read_payload,
 	.xpo_release_rqst = svc_rdma_release_rqst,
 	.xpo_detach = svc_rdma_detach,
 	.xpo_free = svc_rdma_free,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 61ae7acea796..dd57a411adf6 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -959,7 +959,7 @@ static struct sock *unix_find_other(struct net *net,
 		if (err)
 			goto fail;
 		inode = d_backing_inode(path.dentry);
-		err = path_permission(&path, MAY_WRITE);
+		err = inode_permission(inode, MAY_WRITE);
 		if (err)
 			goto put_fail;
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 78a48bd0f2b9..08249f7a09d6 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -168,9 +168,8 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 		"panic",
 		"do_exit",
 		"do_task_dead",
-		"kthread_exit",
 		"make_task_dead",
-		"__module_put_and_kthread_exit",
+		"__module_put_and_exit",
 		"complete_and_exit",
 		"__reiserfs_panic",
 		"lbug_with_loc",