From 88eb084d18c6124acccfe06edfc161dfa11bb34b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Jul 2024 16:32:09 +0000 Subject: [PATCH] Revert "Merge 5.10.220 into android12-5.10-lts" This reverts commit 87a7f35a248737adec8257a65ec4cb6ee9523f0b, reversing changes made to 640645c85ba551dc98a3cd56f51be40c707e10fb. 5.10.220 is a bunch of vfs and nfs changes that are not needed in Android systems, so revert the whole lot all at once, except for the version number bump. Change-Id: If28dc2231f27d326d3730716f23545dd0a2cdc75 Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/files.rst | 8 +- Documentation/filesystems/locking.rst | 10 +- Documentation/filesystems/nfs/exporting.rst | 78 - arch/powerpc/platforms/cell/spufs/coredump.c | 2 +- crypto/algboss.c | 4 +- fs/Kconfig | 6 +- fs/autofs/dev-ioctl.c | 5 +- fs/cachefiles/namei.c | 9 +- fs/cifs/connect.c | 2 +- fs/coredump.c | 5 +- fs/ecryptfs/inode.c | 10 +- fs/exec.c | 29 +- fs/exportfs/expfs.c | 40 +- fs/file.c | 177 +- fs/init.c | 6 +- fs/lockd/clnt4xdr.c | 9 +- fs/lockd/clntproc.c | 3 + fs/lockd/host.c | 4 +- fs/lockd/svc.c | 260 +- fs/lockd/svc4proc.c | 70 +- fs/lockd/svclock.c | 67 +- fs/lockd/svcproc.c | 62 +- fs/lockd/svcsubs.c | 123 +- fs/lockd/svcxdr.h | 142 - fs/lockd/xdr.c | 448 ++- fs/lockd/xdr4.c | 462 +-- fs/locks.c | 102 +- fs/namei.c | 21 +- fs/nfs/blocklayout/blocklayout.c | 2 +- fs/nfs/blocklayout/dev.c | 2 +- fs/nfs/callback.c | 111 +- fs/nfs/callback_xdr.c | 33 +- fs/nfs/dir.c | 2 +- fs/nfs/export.c | 17 - fs/nfs/file.c | 3 - fs/nfs/filelayout/filelayout.c | 4 +- fs/nfs/filelayout/filelayoutdev.c | 2 +- fs/nfs/flexfilelayout/flexfilelayout.c | 4 +- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- fs/nfs/nfs42xdr.c | 2 +- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4xdr.c | 6 +- fs/nfs/pagelist.c | 3 + fs/nfs/super.c | 8 - fs/nfs/write.c | 3 + fs/nfs_common/Makefile | 2 +- fs/nfs_common/nfs_ssc.c | 2 + fs/nfs_common/nfsacl.c | 123 - fs/nfsd/Kconfig | 40 +- fs/nfsd/Makefile | 8 +- fs/nfsd/acl.h | 6 +- fs/nfsd/blocklayout.c | 1 - fs/nfsd/blocklayoutxdr.c | 1 - fs/nfsd/cache.h | 2 +- fs/nfsd/export.c | 74 +- fs/nfsd/export.h | 16 +- fs/nfsd/filecache.c | 1217 +++--- fs/nfsd/filecache.h | 23 +- fs/nfsd/flexfilelayout.c | 3 +- fs/nfsd/lockd.c | 10 +- fs/nfsd/netns.h | 63 +- fs/nfsd/nfs2acl.c | 214 +- fs/nfsd/nfs3acl.c | 140 +- fs/nfsd/nfs3proc.c | 402 +- fs/nfsd/nfs3xdr.c | 1801 ++++----- fs/nfsd/nfs4acl.c | 45 +- fs/nfsd/nfs4callback.c | 168 +- fs/nfsd/nfs4idmap.c | 9 +- fs/nfsd/nfs4layouts.c | 4 +- fs/nfsd/nfs4proc.c | 1111 ++---- fs/nfsd/nfs4recover.c | 20 +- fs/nfsd/nfs4state.c | 1715 +++----- fs/nfsd/nfs4xdr.c | 3771 +++++++++--------- fs/nfsd/nfscache.c | 115 +- fs/nfsd/nfsctl.c | 169 +- fs/nfsd/nfsd.h | 50 +- fs/nfsd/nfsfh.c | 291 +- fs/nfsd/nfsfh.h | 179 +- fs/nfsd/nfsproc.c | 262 +- fs/nfsd/nfssvc.c | 356 +- fs/nfsd/nfsxdr.c | 842 ++-- fs/nfsd/state.h | 69 +- fs/nfsd/stats.c | 124 +- fs/nfsd/stats.h | 98 +- fs/nfsd/trace.c | 1 - fs/nfsd/trace.h | 898 +---- fs/nfsd/vfs.c | 933 +++-- fs/nfsd/vfs.h | 62 +- fs/nfsd/xdr.h | 68 +- fs/nfsd/xdr3.h | 116 +- fs/nfsd/xdr4.h | 127 +- fs/nfsd/xdr4cb.h | 6 - fs/notify/dnotify/dnotify.c | 17 +- fs/notify/fanotify/fanotify.c | 487 +-- fs/notify/fanotify/fanotify.h | 252 +- fs/notify/fanotify/fanotify_user.c | 886 +--- fs/notify/fdinfo.c | 19 +- fs/notify/fsnotify.c | 183 +- fs/notify/fsnotify.h | 19 +- fs/notify/group.c | 38 +- fs/notify/inotify/inotify.h | 11 +- fs/notify/inotify/inotify_fsnotify.c | 12 +- fs/notify/inotify/inotify_user.c | 87 +- fs/notify/mark.c | 172 +- fs/notify/notification.c | 72 +- fs/open.c | 49 +- fs/overlayfs/overlayfs.h | 9 +- fs/proc/fd.c | 48 +- fs/udf/file.c | 2 +- fs/verity/enable.c | 2 +- include/linux/dnotify.h | 2 +- include/linux/errno.h | 1 - include/linux/exportfs.h | 15 - include/linux/fanotify.h | 74 +- include/linux/fdtable.h | 37 +- include/linux/fs.h | 54 +- include/linux/fsnotify.h | 77 +- include/linux/fsnotify_backend.h | 372 +- include/linux/iversion.h | 13 - include/linux/kallsyms.h | 17 +- include/linux/kthread.h | 1 - include/linux/lockd/bind.h | 3 +- include/linux/lockd/lockd.h | 17 +- include/linux/lockd/xdr.h | 35 +- include/linux/lockd/xdr4.h | 33 +- include/linux/module.h | 24 +- include/linux/nfs.h | 8 + include/linux/nfs4.h | 21 +- include/linux/nfs_ssc.h | 14 - include/linux/nfsacl.h | 6 - include/linux/pid.h | 1 - include/linux/sched/user.h | 3 + include/linux/sunrpc/msg_prot.h | 3 + include/linux/sunrpc/svc.h | 151 +- include/linux/sunrpc/svc_rdma.h | 4 +- include/linux/sunrpc/svc_xprt.h | 16 +- include/linux/sunrpc/svcauth.h | 4 +- include/linux/sunrpc/svcsock.h | 7 +- include/linux/sunrpc/xdr.h | 153 +- include/linux/syscalls.h | 12 + include/linux/sysctl.h | 2 - include/linux/user_namespace.h | 4 - include/trace/events/sunrpc.h | 26 +- include/uapi/linux/fanotify.h | 42 - include/uapi/linux/nfs3.h | 6 - include/uapi/linux/nfsd/nfsfh.h | 105 + kernel/audit_fsnotify.c | 8 +- kernel/audit_tree.c | 2 +- kernel/audit_watch.c | 5 +- kernel/bpf/inode.c | 2 +- kernel/bpf/syscall.c | 20 +- kernel/bpf/task_iter.c | 2 +- kernel/fork.c | 12 +- kernel/kallsyms.c | 8 +- kernel/kcmp.c | 29 +- kernel/kthread.c | 23 +- kernel/livepatch/core.c | 7 +- kernel/module.c | 24 +- kernel/pid.c | 15 +- kernel/sys.c | 2 +- kernel/sysctl.c | 54 +- kernel/trace/trace_kprobe.c | 4 +- kernel/ucount.c | 4 - mm/madvise.c | 2 +- mm/memcontrol.c | 2 +- mm/mincore.c | 2 +- net/bluetooth/bnep/core.c | 2 +- net/bluetooth/cmtp/core.c | 2 +- net/bluetooth/hidp/core.c | 2 +- net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 47 +- net/sunrpc/sched.c | 1 - net/sunrpc/svc.c | 314 +- net/sunrpc/svc_xprt.c | 104 +- net/sunrpc/svcauth.c | 8 +- net/sunrpc/svcauth_unix.c | 18 +- net/sunrpc/svcsock.c | 32 +- net/sunrpc/xdr.c | 112 +- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 32 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- net/unix/af_unix.c | 2 +- tools/objtool/check.c | 3 +- 183 files changed, 8839 insertions(+), 13928 deletions(-) delete mode 100644 fs/lockd/svcxdr.h create mode 100644 include/uapi/linux/nfsd/nfsfh.h diff --git a/Documentation/filesystems/files.rst b/Documentation/filesystems/files.rst index bcf84459917f..cbf8e57376bf 100644 --- a/Documentation/filesystems/files.rst +++ b/Documentation/filesystems/files.rst @@ -62,7 +62,7 @@ the fdtable structure - be held. 4. To look up the file structure given an fd, a reader - must use either lookup_fd_rcu() or files_lookup_fd_rcu() APIs. These + must use either fcheck() or fcheck_files() APIs. These take care of barrier requirements due to lock-free lookup. An example:: @@ -70,7 +70,7 @@ the fdtable structure - struct file *file; rcu_read_lock(); - file = lookup_fd_rcu(fd); + file = fcheck(fd); if (file) { ... } @@ -84,7 +84,7 @@ the fdtable structure - on ->f_count:: rcu_read_lock(); - file = files_lookup_fd_rcu(files, fd); + file = fcheck_files(files, fd); if (file) { if (atomic_long_inc_not_zero(&file->f_count)) *fput_needed = 1; @@ -104,7 +104,7 @@ the fdtable structure - lock-free, they must be installed using rcu_assign_pointer() API. If they are looked up lock-free, rcu_dereference() must be used. However it is advisable to use files_fdtable() - and lookup_fd_rcu()/files_lookup_fd_rcu() which take care of these issues. + and fcheck()/fcheck_files() which take care of these issues. 7. While updating, the fdtable pointer must be looked up while holding files->file_lock. If ->file_lock is dropped, then diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 5db6dec0b423..18d93fc7dc46 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -433,21 +433,17 @@ prototypes:: void (*lm_break)(struct file_lock *); /* break_lease callback */ int (*lm_change)(struct file_lock **, int); bool (*lm_breaker_owns_lease)(struct file_lock *); - bool (*lm_lock_expirable)(struct file_lock *); - void (*lm_expire_lock)(void); locking rules: ====================== ============= ================= ========= -ops flc_lock blocked_lock_lock may block +ops inode->i_lock blocked_lock_lock may block ====================== ============= ================= ========= -lm_notify: no yes no +lm_notify: yes yes no lm_grant: no no no lm_break: yes no no lm_change yes no no -lm_breaker_owns_lease: yes no no -lm_lock_expirable yes no no -lm_expire_lock no no yes +lm_breaker_owns_lease: no no no ====================== ============= ================= ========= buffer_head diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst index 6f59a364f84c..33d588a01ace 100644 --- a/Documentation/filesystems/nfs/exporting.rst +++ b/Documentation/filesystems/nfs/exporting.rst @@ -154,11 +154,6 @@ struct which has the following members: to find potential names, and matches inode numbers to find the correct match. - flags - Some filesystems may need to be handled differently than others. The - export_operations struct also includes a flags field that allows the - filesystem to communicate such information to nfsd. See the Export - Operations Flags section below for more explanation. A filehandle fragment consists of an array of 1 or more 4byte words, together with a one byte "type". @@ -168,76 +163,3 @@ generated by encode_fh, in which case it will have been padded with nuls. Rather, the encode_fh routine should choose a "type" which indicates the decode_fh how much of the filehandle is valid, and how it should be interpreted. - -Export Operations Flags ------------------------ -In addition to the operation vector pointers, struct export_operations also -contains a "flags" field that allows the filesystem to communicate to nfsd -that it may want to do things differently when dealing with it. The -following flags are defined: - - EXPORT_OP_NOWCC - disable NFSv3 WCC attributes on this filesystem - RFC 1813 recommends that servers always send weak cache consistency - (WCC) data to the client after each operation. The server should - atomically collect attributes about the inode, do an operation on it, - and then collect the attributes afterward. This allows the client to - skip issuing GETATTRs in some situations but means that the server - is calling vfs_getattr for almost all RPCs. On some filesystems - (particularly those that are clustered or networked) this is expensive - and atomicity is difficult to guarantee. This flag indicates to nfsd - that it should skip providing WCC attributes to the client in NFSv3 - replies when doing operations on this filesystem. Consider enabling - this on filesystems that have an expensive ->getattr inode operation, - or when atomicity between pre and post operation attribute collection - is impossible to guarantee. - - EXPORT_OP_NOSUBTREECHK - disallow subtree checking on this fs - Many NFS operations deal with filehandles, which the server must then - vet to ensure that they live inside of an exported tree. When the - export consists of an entire filesystem, this is trivial. nfsd can just - ensure that the filehandle live on the filesystem. When only part of a - filesystem is exported however, then nfsd must walk the ancestors of the - inode to ensure that it's within an exported subtree. This is an - expensive operation and not all filesystems can support it properly. - This flag exempts the filesystem from subtree checking and causes - exportfs to get back an error if it tries to enable subtree checking - on it. - - EXPORT_OP_CLOSE_BEFORE_UNLINK - always close cached files before unlinking - On some exportable filesystems (such as NFS) unlinking a file that - is still open can cause a fair bit of extra work. For instance, - the NFS client will do a "sillyrename" to ensure that the file - sticks around while it's still open. When reexporting, that open - file is held by nfsd so we usually end up doing a sillyrename, and - then immediately deleting the sillyrenamed file just afterward when - the link count actually goes to zero. Sometimes this delete can race - with other operations (for instance an rmdir of the parent directory). - This flag causes nfsd to close any open files for this inode _before_ - calling into the vfs to do an unlink or a rename that would replace - an existing file. - - EXPORT_OP_REMOTE_FS - Backing storage for this filesystem is remote - PF_LOCAL_THROTTLE exists for loopback NFSD, where a thread needs to - write to one bdi (the final bdi) in order to free up writes queued - to another bdi (the client bdi). Such threads get a private balance - of dirty pages so that dirty pages for the client bdi do not imact - the daemon writing to the final bdi. For filesystems whose durable - storage is not local (such as exported NFS filesystems), this - constraint has negative consequences. EXPORT_OP_REMOTE_FS enables - an export to disable writeback throttling. - - EXPORT_OP_NOATOMIC_ATTR - Filesystem does not update attributes atomically - EXPORT_OP_NOATOMIC_ATTR indicates that the exported filesystem - cannot provide the semantics required by the "atomic" boolean in - NFSv4's change_info4. This boolean indicates to a client whether the - returned before and after change attributes were obtained atomically - with the respect to the requested metadata operation (UNLINK, - OPEN/CREATE, MKDIR, etc). - - EXPORT_OP_FLUSH_ON_CLOSE - Filesystem flushes file data on close(2) - On most filesystems, inodes can remain under writeback after the - file is closed. NFSD relies on client activity or local flusher - threads to handle writeback. Certain filesystems, such as NFS, flush - all of an inode's dirty data on last close. Exports that behave this - way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip - waiting for writeback when closing such files. diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 60b5583e9eaf..026c181a98c5 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -74,7 +74,7 @@ static struct spu_context *coredump_next_context(int *fd) *fd = n - 1; rcu_read_lock(); - file = lookup_fd_rcu(*fd); + file = fcheck(*fd); ctx = SPUFS_I(file_inode(file))->i_ctx; get_spu_context(ctx); rcu_read_unlock(); diff --git a/crypto/algboss.c b/crypto/algboss.c index b87f907bb142..5ebccbd6b74e 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -74,7 +74,7 @@ out: complete_all(¶m->larval->completion); crypto_alg_put(¶m->larval->alg); kfree(param); - module_put_and_kthread_exit(0); + module_put_and_exit(0); } static int cryptomgr_schedule_probe(struct crypto_larval *larval) @@ -209,7 +209,7 @@ skiptest: crypto_alg_tested(param->driver, err); kfree(param); - module_put_and_kthread_exit(0); + module_put_and_exit(0); } static int cryptomgr_schedule_test(struct crypto_alg *alg) diff --git a/fs/Kconfig b/fs/Kconfig index d34b8227c772..a6a721108d1c 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -321,7 +321,7 @@ config LOCKD config LOCKD_V4 bool - depends on NFSD || NFS_V3 + depends on NFSD_V3 || NFS_V3 depends on FILE_LOCKING default y @@ -334,10 +334,6 @@ config NFS_COMMON depends on NFSD || NFS_FS || LOCKD default y -config NFS_V4_2_SSC_HELPER - bool - default y if NFS_V4_2 - source "net/sunrpc/Kconfig" source "fs/ceph/Kconfig" source "fs/cifs/Kconfig" diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index 5bf781ea6d67..322b7dfb4ea0 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -4,10 +4,9 @@ * Copyright 2008 Ian Kent */ -#include #include #include -#include +#include #include #include @@ -290,7 +289,7 @@ static int autofs_dev_ioctl_closemount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - return close_fd(param->ioctlfd); + return ksys_close(param->ioctlfd); } /* diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7b987de0babe..ecc8ecbbfa5a 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -412,14 +412,9 @@ try_again: if (ret < 0) { cachefiles_io_error(cache, "Rename security error %d", ret); } else { - struct renamedata rd = { - .old_dir = d_inode(dir), - .old_dentry = rep, - .new_dir = d_inode(cache->graveyard), - .new_dentry = grave, - }; trace_cachefiles_rename(object, rep, grave, why); - ret = vfs_rename(&rd); + ret = vfs_rename(d_inode(dir), rep, + d_inode(cache->graveyard), grave, NULL, 0); if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a3c0e6a4e484..164b98540716 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1242,7 +1242,7 @@ next_pdu: } memalloc_noreclaim_restore(noreclaim_flag); - module_put_and_kthread_exit(0); + module_put_and_exit(0); } /* extract the host portion of the UNC string */ diff --git a/fs/coredump.c b/fs/coredump.c index ca4802d14158..7c5edadf5208 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -587,6 +587,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) int ispipe; size_t *argv = NULL; int argc = 0; + struct files_struct *displaced; /* require nonrelative corefile path and be extra careful */ bool need_suid_safe = false; bool core_dumped = false; @@ -792,9 +793,11 @@ void do_coredump(const kernel_siginfo_t *siginfo) } /* get us an unshared descriptor table; almost always a no-op */ - retval = unshare_files(); + retval = unshare_files(&displaced); if (retval) goto close_fail; + if (displaced) + put_files_struct(displaced); if (!dump_interrupted()) { /* * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index cd1a60a319b8..7777bb6f66d2 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -598,7 +598,6 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dir_dentry; struct dentry *trap; struct inode *target_inode; - struct renamedata rd = {}; if (flags) return -EINVAL; @@ -628,12 +627,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = -ENOTEMPTY; goto out_lock; } - - rd.old_dir = d_inode(lower_old_dir_dentry); - rd.old_dentry = lower_old_dentry; - rd.new_dir = d_inode(lower_new_dir_dentry); - rd.new_dentry = lower_new_dentry; - rc = vfs_rename(&rd); + rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry, + d_inode(lower_new_dir_dentry), lower_new_dentry, + NULL, 0); if (rc) goto out_lock; if (target_inode) diff --git a/fs/exec.c b/fs/exec.c index 398ccf06d799..4edc932a7dce 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1266,11 +1266,6 @@ int begin_new_exec(struct linux_binprm * bprm) if (retval) goto out; - /* Ensure the files table is not shared. */ - retval = unshare_files(); - if (retval) - goto out; - /* * Must be called _before_ exec_mmap() as bprm->mm is * not visibile until then. This also enables the update @@ -1796,6 +1791,7 @@ static int bprm_execve(struct linux_binprm *bprm, int fd, struct filename *filename, int flags) { struct file *file; + struct files_struct *displaced; int retval; /* @@ -1803,10 +1799,14 @@ static int bprm_execve(struct linux_binprm *bprm, */ io_uring_task_cancel(); - retval = prepare_bprm_creds(bprm); + retval = unshare_files(&displaced); if (retval) return retval; + retval = prepare_bprm_creds(bprm); + if (retval) + goto out_files; + check_unsafe_exec(bprm); current->in_execve = 1; @@ -1820,14 +1820,11 @@ static int bprm_execve(struct linux_binprm *bprm, bprm->file = file; /* * Record that a name derived from an O_CLOEXEC fd will be - * inaccessible after exec. This allows the code in exec to - * choose to fail when the executable is not mmaped into the - * interpreter and an open file descriptor is not passed to - * the interpreter. This makes for a better user experience - * than having the interpreter start and then immediately fail - * when it finds the executable is inaccessible. + * inaccessible after exec. Relies on having exclusive access to + * current->files (due to unshare_files above). */ - if (bprm->fdpath && get_close_on_exec(fd)) + if (bprm->fdpath && + close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; /* Set the unchanging part of bprm->cred */ @@ -1845,6 +1842,8 @@ static int bprm_execve(struct linux_binprm *bprm, rseq_execve(current); acct_update_integrals(current); task_numa_free(current, false); + if (displaced) + put_files_struct(displaced); return retval; out: @@ -1861,6 +1860,10 @@ out_unmark: current->fs->in_exec = 0; current->in_execve = 0; +out_files: + if (displaced) + reset_files_struct(displaced); + return retval; } diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 8c28bd1c9ed9..2dd55b172d57 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -18,7 +18,7 @@ #include #include -#define dprintk(fmt, args...) pr_debug(fmt, ##args) +#define dprintk(fmt, args...) do{}while(0) static int get_name(const struct path *path, char *name, struct dentry *child); @@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, inode_unlock(dentry->d_inode); if (IS_ERR(parent)) { - dprintk("get_parent of %lu failed, err %ld\n", - dentry->d_inode->i_ino, PTR_ERR(parent)); + dprintk("%s: get_parent of %ld failed, err %d\n", + __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); return parent; } @@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, dprintk("%s: found name: %s\n", __func__, nbuf); tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { - dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); + dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); err = PTR_ERR(tmp); goto out_err; } @@ -417,11 +417,9 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, } EXPORT_SYMBOL_GPL(exportfs_encode_fh); -struct dentry * -exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, - int fileid_type, - int (*acceptable)(void *, struct dentry *), - void *context) +struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, + int fh_len, int fileid_type, + int (*acceptable)(void *, struct dentry *), void *context) { const struct export_operations *nop = mnt->mnt_sb->s_export_op; struct dentry *result, *alias; @@ -434,8 +432,10 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, if (!nop || !nop->fh_to_dentry) return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); + if (PTR_ERR(result) == -ENOMEM) + return ERR_CAST(result); if (IS_ERR_OR_NULL(result)) - return result; + return ERR_PTR(-ESTALE); /* * If no acceptance criteria was specified by caller, a disconnected @@ -561,26 +561,10 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, err_result: dput(result); + if (err != -ENOMEM) + err = -ESTALE; return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw); - -struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, - int fh_len, int fileid_type, - int (*acceptable)(void *, struct dentry *), - void *context) -{ - struct dentry *ret; - - ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, - acceptable, context); - if (IS_ERR_OR_NULL(ret)) { - if (ret == ERR_PTR(-ENOMEM)) - return ret; - return ERR_PTR(-ESTALE); - } - return ret; -} EXPORT_SYMBOL_GPL(exportfs_decode_fh); MODULE_LICENSE("GPL"); diff --git a/fs/file.c b/fs/file.c index fdb84a64724b..d6bc73960e4a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -175,7 +175,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) spin_unlock(&files->file_lock); new_fdt = alloc_fdtable(nr); - /* make sure all fd_install() have seen resize_in_progress + /* make sure all __fd_install() have seen resize_in_progress * or have finished their rcu_read_lock_sched() section. */ if (atomic_read(&files->count) > 1) @@ -198,7 +198,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) rcu_assign_pointer(files->fdt, new_fdt); if (cur_fdt != &files->fdtab) call_rcu(&cur_fdt->rcu, free_fdtable_rcu); - /* coupled with smp_rmb() in fd_install() */ + /* coupled with smp_rmb() in __fd_install() */ smp_wmb(); return 1; } @@ -466,6 +466,18 @@ void put_files_struct(struct files_struct *files) } } +void reset_files_struct(struct files_struct *files) +{ + struct task_struct *tsk = current; + struct files_struct *old; + + old = tsk->files; + task_lock(tsk); + tsk->files = files; + task_unlock(tsk); + put_files_struct(old); +} + void exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; @@ -509,9 +521,9 @@ static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) /* * allocate a file descriptor, mark it busy. */ -static int alloc_fd(unsigned start, unsigned end, unsigned flags) +int __alloc_fd(struct files_struct *files, + unsigned start, unsigned end, unsigned flags) { - struct files_struct *files = current->files; unsigned int fd; int error; struct fdtable *fdt; @@ -567,9 +579,14 @@ out: return error; } +static int alloc_fd(unsigned start, unsigned flags) +{ + return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); +} + int __get_unused_fd_flags(unsigned flags, unsigned long nofile) { - return alloc_fd(0, nofile, flags); + return __alloc_fd(current->files, 0, nofile, flags); } int get_unused_fd_flags(unsigned flags) @@ -608,13 +625,17 @@ EXPORT_SYMBOL(put_unused_fd); * It should never happen - if we allow dup2() do it, _really_ bad things * will follow. * - * This consumes the "file" refcount, so callers should treat it - * as if they had called fput(file). + * NOTE: __fd_install() variant is really, really low-level; don't + * use it unless you are forced to by truly lousy API shoved down + * your throat. 'files' *MUST* be either current->files or obtained + * by get_files_struct(current) done by whoever had given it to you, + * or really bad things will happen. Normally you want to use + * fd_install() instead. */ -void fd_install(unsigned int fd, struct file *file) +void __fd_install(struct files_struct *files, unsigned int fd, + struct file *file) { - struct files_struct *files = current->files; struct fdtable *fdt; rcu_read_lock_sched(); @@ -636,6 +657,15 @@ void fd_install(unsigned int fd, struct file *file) rcu_read_unlock_sched(); } +/* + * This consumes the "file" refcount, so callers should treat it + * as if they had called fput(file). + */ +void fd_install(unsigned int fd, struct file *file) +{ + __fd_install(current->files, fd, file); +} + EXPORT_SYMBOL(fd_install); static struct file *pick_file(struct files_struct *files, unsigned fd) @@ -659,9 +689,11 @@ out_unlock: return file; } -int close_fd(unsigned fd) +/* + * The same warnings as for __alloc_fd()/__fd_install() apply here... + */ +int __close_fd(struct files_struct *files, unsigned fd) { - struct files_struct *files = current->files; struct file *file; file = pick_file(files, fd); @@ -670,7 +702,7 @@ int close_fd(unsigned fd) return filp_close(file, files); } -EXPORT_SYMBOL(close_fd); /* for ksys_close() */ +EXPORT_SYMBOL(__close_fd); /* for ksys_close() */ /** * __close_range() - Close all file descriptors in a given range. @@ -829,28 +861,68 @@ void do_close_on_exec(struct files_struct *files) spin_unlock(&files->file_lock); } +static inline struct file *__fget_files_rcu(struct files_struct *files, + unsigned int fd, fmode_t mask, unsigned int refs) +{ + for (;;) { + struct file *file; + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + struct file __rcu **fdentry; + + if (unlikely(fd >= fdt->max_fds)) + return NULL; + + fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); + file = rcu_dereference_raw(*fdentry); + if (unlikely(!file)) + return NULL; + + if (unlikely(file->f_mode & mask)) + return NULL; + + /* + * Ok, we have a file pointer. However, because we do + * this all locklessly under RCU, we may be racing with + * that file being closed. + * + * Such a race can take two forms: + * + * (a) the file ref already went down to zero, + * and get_file_rcu_many() fails. Just try + * again: + */ + if (unlikely(!get_file_rcu_many(file, refs))) + continue; + + /* + * (b) the file table entry has changed under us. + * Note that we don't need to re-check the 'fdt->fd' + * pointer having changed, because it always goes + * hand-in-hand with 'fdt'. + * + * If so, we need to put our refs and try again. + */ + if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || + unlikely(rcu_dereference_raw(*fdentry) != file)) { + fput_many(file, refs); + continue; + } + + /* + * Ok, we have a ref to the file, and checked that it + * still exists. + */ + return file; + } +} + static struct file *__fget_files(struct files_struct *files, unsigned int fd, fmode_t mask, unsigned int refs) { struct file *file; rcu_read_lock(); -loop: - file = files_lookup_fd_rcu(files, fd); - if (file) { - /* File object ref couldn't be taken. - * dup2() atomicity guarantee is the reason - * we loop to catch the new file (or NULL pointer) - */ - if (file->f_mode & mask) - file = NULL; - else if (!get_file_rcu_many(file, refs)) - goto loop; - else if (files_lookup_fd_raw(files, fd) != file) { - fput_many(file, refs); - goto loop; - } - } + file = __fget_files_rcu(files, fd, mask, refs); rcu_read_unlock(); return file; @@ -891,42 +963,6 @@ struct file *fget_task(struct task_struct *task, unsigned int fd) return file; } -struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd) -{ - /* Must be called with rcu_read_lock held */ - struct files_struct *files; - struct file *file = NULL; - - task_lock(task); - files = task->files; - if (files) - file = files_lookup_fd_rcu(files, fd); - task_unlock(task); - - return file; -} - -struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret_fd) -{ - /* Must be called with rcu_read_lock held */ - struct files_struct *files; - unsigned int fd = *ret_fd; - struct file *file = NULL; - - task_lock(task); - files = task->files; - if (files) { - for (; fd < files_fdtable(files)->max_fds; fd++) { - file = files_lookup_fd_rcu(files, fd); - if (file) - break; - } - } - task_unlock(task); - *ret_fd = fd; - return file; -} - /* * Lightweight file lookup - no refcnt increment if fd table isn't shared. * @@ -949,7 +985,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) struct file *file; if (atomic_read(&files->count) == 1) { - file = files_lookup_fd_raw(files, fd); + file = __fcheck_files(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; return (unsigned long)file; @@ -1085,7 +1121,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags) struct files_struct *files = current->files; if (!file) - return close_fd(fd); + return __close_fd(files, fd); if (fd >= rlimit(RLIMIT_NOFILE)) return -EBADF; @@ -1174,7 +1210,7 @@ static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) spin_lock(&files->file_lock); err = expand_files(files, newfd); - file = files_lookup_fd_locked(files, oldfd); + file = fcheck(oldfd); if (unlikely(!file)) goto Ebadf; if (unlikely(err < 0)) { @@ -1203,7 +1239,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) int retval = oldfd; rcu_read_lock(); - if (!files_lookup_fd_rcu(files, oldfd)) + if (!fcheck_files(files, oldfd)) retval = -EBADF; rcu_read_unlock(); return retval; @@ -1228,11 +1264,10 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes) int f_dupfd(unsigned int from, struct file *file, unsigned flags) { - unsigned long nofile = rlimit(RLIMIT_NOFILE); int err; - if (from >= nofile) + if (from >= rlimit(RLIMIT_NOFILE)) return -EINVAL; - err = alloc_fd(from, nofile, flags); + err = alloc_fd(from, flags); if (err >= 0) { get_file(file); fd_install(err, file); diff --git a/fs/init.c b/fs/init.c index 02723bea8499..e9c320a48cf1 100644 --- a/fs/init.c +++ b/fs/init.c @@ -49,7 +49,7 @@ int __init init_chdir(const char *filename) error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); if (error) return error; - error = path_permission(&path, MAY_EXEC | MAY_CHDIR); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &path); path_put(&path); @@ -64,7 +64,7 @@ int __init init_chroot(const char *filename) error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); if (error) return error; - error = path_permission(&path, MAY_EXEC | MAY_CHDIR); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; error = -EPERM; @@ -118,7 +118,7 @@ int __init init_eaccess(const char *filename) error = kern_path(filename, LOOKUP_FOLLOW, &path); if (error) return error; - error = path_permission(&path, MAY_ACCESS); + error = inode_permission(d_inode(path.dentry), MAY_ACCESS); path_put(&path); return error; } diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 8161667c976f..7df6324ccb8a 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -261,6 +261,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) u32 exclusive; int error; __be32 *p; + s32 end; memset(lock, 0, sizeof(*lock)); locks_init_lock(fl); @@ -284,7 +285,13 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK; p = xdr_decode_hyper(p, &l_offset); xdr_decode_hyper(p, &l_len); - nlm4svc_set_file_lock_range(fl, l_offset, l_len); + end = l_offset + l_len - 1; + + fl->fl_start = (loff_t)l_offset; + if (l_len == 0 || end < 0) + fl->fl_end = OFFSET_MAX; + else + fl->fl_end = (loff_t)end; error = 0; out: return error; diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 99fffc9cb958..b11f2afa84f1 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -794,6 +794,9 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data) goto retry_cancel; } + dprintk("lockd: cancel status %u (task %u)\n", + status, task->tk_pid); + switch (status) { case NLM_LCK_GRANTED: case NLM_LCK_DENIED_GRACE_PERIOD: diff --git a/fs/lockd/host.c b/fs/lockd/host.c index cdc8e12cdac4..771c289f6df7 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -163,8 +163,8 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, host->h_nsmhandle = nsm; host->h_addrbuf = nsm->sm_addrbuf; host->net = ni->net; - host->h_cred = get_cred(ni->cred); - strscpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); + host->h_cred = get_cred(ni->cred), + strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename)); out: return host; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 5579e67da17d..1a639e34847d 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -54,9 +54,13 @@ EXPORT_SYMBOL_GPL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; -static struct svc_serv *nlmsvc_serv; +static struct task_struct *nlmsvc_task; +static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; +static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq); + unsigned int lockd_net_id; /* @@ -180,10 +184,6 @@ lockd(void *vrqstp) nlm_shutdown_hosts(); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); - - dprintk("lockd_down: service stopped\n"); - - svc_exit_thread(rqstp); return 0; } @@ -196,8 +196,8 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) - return svc_xprt_create(serv, name, net, family, port, - SVC_SOCK_DEFAULTS, cred); + return svc_create_xprt(serv, name, net, family, port, + SVC_SOCK_DEFAULTS, cred); svc_xprt_put(xprt); return 0; } @@ -247,8 +247,7 @@ out_err: if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); - svc_xprt_destroy_all(serv, net); - svc_rpcb_cleanup(serv, net); + svc_shutdown_net(serv, net); return err; } @@ -286,12 +285,13 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) nlm_shutdown_hosts_net(net); cancel_delayed_work_sync(&ln->grace_period_end); locks_end_grace(&ln->lockd_manager); - svc_xprt_destroy_all(serv, net); - svc_rpcb_cleanup(serv, net); + svc_shutdown_net(serv, net); + dprintk("%s: per-net data destroyed; net=%x\n", + __func__, net->ns.inum); } } else { - pr_err("%s: no users! net=%x\n", - __func__, net->ns.inum); + pr_err("%s: no users! task=%p, net=%x\n", + __func__, nlmsvc_task, net->ns.inum); BUG(); } } @@ -302,16 +302,20 @@ static int lockd_inetaddr_event(struct notifier_block *this, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sockaddr_in sin; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nlm_ntf_refcnt)) goto out; - if (nlmsvc_serv) { + if (nlmsvc_rqst) { dprintk("lockd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; - svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin); + svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, + (struct sockaddr *)&sin); } + atomic_dec(&nlm_ntf_refcnt); + wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -328,17 +332,21 @@ static int lockd_inet6addr_event(struct notifier_block *this, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sockaddr_in6 sin6; - if (event != NETDEV_DOWN) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nlm_ntf_refcnt)) goto out; - if (nlmsvc_serv) { + if (nlmsvc_rqst) { dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; sin6.sin6_addr = ifa->addr; if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) sin6.sin6_scope_id = ifa->idev->dev->ifindex; - svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6); + svc_age_temp_xprts_now(nlmsvc_rqst->rq_server, + (struct sockaddr *)&sin6); } + atomic_dec(&nlm_ntf_refcnt); + wake_up(&nlm_ntf_wq); out: return NOTIFY_DONE; @@ -349,14 +357,86 @@ static struct notifier_block lockd_inet6addr_notifier = { }; #endif -static int lockd_get(void) +static void lockd_unregister_notifiers(void) +{ + unregister_inetaddr_notifier(&lockd_inetaddr_notifier); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&lockd_inet6addr_notifier); +#endif + wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0); +} + +static void lockd_svc_exit_thread(void) +{ + atomic_dec(&nlm_ntf_refcnt); + lockd_unregister_notifiers(); + svc_exit_thread(nlmsvc_rqst); +} + +static int lockd_start_svc(struct svc_serv *serv) { - struct svc_serv *serv; int error; - if (nlmsvc_serv) { - nlmsvc_users++; + if (nlmsvc_rqst) return 0; + + /* + * Create the kernel thread and wait for it to start. + */ + nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); + if (IS_ERR(nlmsvc_rqst)) { + error = PTR_ERR(nlmsvc_rqst); + printk(KERN_WARNING + "lockd_up: svc_rqst allocation failed, error=%d\n", + error); + lockd_unregister_notifiers(); + goto out_rqst; + } + + atomic_inc(&nlm_ntf_refcnt); + svc_sock_update_bufs(serv); + serv->sv_maxconn = nlm_max_connections; + + nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name); + if (IS_ERR(nlmsvc_task)) { + error = PTR_ERR(nlmsvc_task); + printk(KERN_WARNING + "lockd_up: kthread_run failed, error=%d\n", error); + goto out_task; + } + nlmsvc_rqst->rq_task = nlmsvc_task; + wake_up_process(nlmsvc_task); + + dprintk("lockd_up: service started\n"); + return 0; + +out_task: + lockd_svc_exit_thread(); + nlmsvc_task = NULL; +out_rqst: + nlmsvc_rqst = NULL; + return error; +} + +static const struct svc_serv_ops lockd_sv_ops = { + .svo_shutdown = svc_rpcb_cleanup, + .svo_enqueue_xprt = svc_xprt_do_enqueue, +}; + +static struct svc_serv *lockd_create_svc(void) +{ + struct svc_serv *serv; + + /* + * Check whether we're already up and running. + */ + if (nlmsvc_rqst) { + /* + * Note: increase service usage, because later in case of error + * svc_destroy() will be called. + */ + svc_get(nlmsvc_rqst->rq_server); + return nlmsvc_rqst->rq_server; } /* @@ -371,44 +451,17 @@ static int lockd_get(void) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } - - serv->sv_maxconn = nlm_max_connections; - error = svc_set_num_threads(serv, NULL, 1); - /* The thread now holds the only reference */ - svc_put(serv); - if (error < 0) - return error; - - nlmsvc_serv = serv; register_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) register_inet6addr_notifier(&lockd_inet6addr_notifier); #endif dprintk("lockd_up: service created\n"); - nlmsvc_users++; - return 0; -} - -static void lockd_put(void) -{ - if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n")) - return; - if (--nlmsvc_users) - return; - - unregister_inetaddr_notifier(&lockd_inetaddr_notifier); -#if IS_ENABLED(CONFIG_IPV6) - unregister_inet6addr_notifier(&lockd_inet6addr_notifier); -#endif - - svc_set_num_threads(nlmsvc_serv, NULL, 0); - nlmsvc_serv = NULL; - dprintk("lockd_down: service destroyed\n"); + return serv; } /* @@ -416,21 +469,36 @@ static void lockd_put(void) */ int lockd_up(struct net *net, const struct cred *cred) { + struct svc_serv *serv; int error; mutex_lock(&nlmsvc_mutex); - error = lockd_get(); - if (error) - goto err; - - error = lockd_up_net(nlmsvc_serv, net, cred); - if (error < 0) { - lockd_put(); - goto err; + serv = lockd_create_svc(); + if (IS_ERR(serv)) { + error = PTR_ERR(serv); + goto err_create; } -err: + error = lockd_up_net(serv, net, cred); + if (error < 0) { + lockd_unregister_notifiers(); + goto err_put; + } + + error = lockd_start_svc(serv); + if (error < 0) { + lockd_down_net(serv, net); + goto err_put; + } + nlmsvc_users++; + /* + * Note: svc_serv structures have an initial use count of 1, + * so we exit through here on both success and failure. + */ +err_put: + svc_destroy(serv); +err_create: mutex_unlock(&nlmsvc_mutex); return error; } @@ -443,8 +511,27 @@ void lockd_down(struct net *net) { mutex_lock(&nlmsvc_mutex); - lockd_down_net(nlmsvc_serv, net); - lockd_put(); + lockd_down_net(nlmsvc_rqst->rq_server, net); + if (nlmsvc_users) { + if (--nlmsvc_users) + goto out; + } else { + printk(KERN_ERR "lockd_down: no users! task=%p\n", + nlmsvc_task); + BUG(); + } + + if (!nlmsvc_task) { + printk(KERN_ERR "lockd_down: no lockd running.\n"); + BUG(); + } + kthread_stop(nlmsvc_task); + dprintk("lockd_down: service stopped\n"); + lockd_svc_exit_thread(); + dprintk("lockd_down: service destroyed\n"); + nlmsvc_task = NULL; + nlmsvc_rqst = NULL; +out: mutex_unlock(&nlmsvc_mutex); } EXPORT_SYMBOL_GPL(lockd_down); @@ -497,7 +584,7 @@ static struct ctl_table nlm_sysctls[] = { .data = &nsm_use_hostnames, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dobool, + .proc_handler = proc_dointvec, }, { .procname = "nsm_local_state", @@ -562,7 +649,6 @@ static int lockd_authenticate(struct svc_rqst *rqstp) switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: case RPC_AUTH_UNIX: - rqstp->rq_auth_stat = rpc_auth_ok; if (rqstp->rq_proc == 0) return SVC_OK; if (is_callback(rqstp->rq_proc)) { @@ -573,7 +659,6 @@ static int lockd_authenticate(struct svc_rqst *rqstp) } return svc_set_client(rqstp); } - rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } @@ -681,44 +766,6 @@ static void __exit exit_nlm(void) module_init(init_nlm); module_exit(exit_nlm); -/** - * nlmsvc_dispatch - Process an NLM Request - * @rqstp: incoming request - * @statp: pointer to location of accept_stat field in RPC Reply buffer - * - * Return values: - * %0: Processing complete; do not send a Reply - * %1: Processing complete; send Reply in rqstp->rq_res - */ -static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp) -{ - const struct svc_procedure *procp = rqstp->rq_procinfo; - - svcxdr_init_decode(rqstp); - if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream)) - goto out_decode_err; - - *statp = procp->pc_func(rqstp); - if (*statp == rpc_drop_reply) - return 0; - if (*statp != rpc_success) - return 1; - - svcxdr_init_encode(rqstp); - if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream)) - goto out_encode_err; - - return 1; - -out_decode_err: - *statp = rpc_garbage_args; - return 1; - -out_encode_err: - *statp = rpc_system_err; - return 1; -} - /* * Define NLM program and procedures */ @@ -728,7 +775,6 @@ static const struct svc_version nlmsvc_version1 = { .vs_nproc = 17, .vs_proc = nlmsvc_procedures, .vs_count = nlmsvc_version1_count, - .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; static unsigned int nlmsvc_version3_count[24]; @@ -737,7 +783,6 @@ static const struct svc_version nlmsvc_version3 = { .vs_nproc = 24, .vs_proc = nlmsvc_procedures, .vs_count = nlmsvc_version3_count, - .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; #ifdef CONFIG_LOCKD_V4 @@ -747,7 +792,6 @@ static const struct svc_version nlmsvc_version4 = { .vs_nproc = 24, .vs_proc = nlmsvc_procedures4, .vs_count = nlmsvc_version4_count, - .vs_dispatch = nlmsvc_dispatch, .vs_xdrsize = NLMSVC_XDRSIZE, }; #endif diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index b72023a6b4c1..fa41dda39925 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -32,10 +32,6 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, if (!nlmsvc_ops) return nlm_lck_denied_nolocks; - if (lock->lock_start > OFFSET_MAX || - (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start)))) - return nlm4_fbig; - /* Obtain host handle */ if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) || (argp->monitor && nsm_monitor(host) < 0)) @@ -44,21 +40,13 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Obtain file pointer. Not used by FREE_ALL call. */ if (filp != NULL) { - int mode = lock_to_openmode(&lock->fl); - - error = nlm_lookup_file(rqstp, &file, lock); - if (error) + if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0) goto no_locks; *filp = file; /* Set up the missing parts of the file_lock structure */ - lock->fl.fl_flags = FL_POSIX; - lock->fl.fl_file = file->f_file[mode]; + lock->fl.fl_file = file->f_file; lock->fl.fl_pid = current->tgid; - lock->fl.fl_start = (loff_t)lock->lock_start; - lock->fl.fl_end = lock->lock_len ? - (loff_t)(lock->lock_start + lock->lock_len - 1) : - OFFSET_MAX; lock->fl.fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); if (!lock->fl.fl_owner) { @@ -96,7 +84,6 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; - struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST4 called\n"); @@ -106,7 +93,6 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; - test_owner = argp->lock.fl.fl_owner; /* Now check for conflicting locks */ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie); if (resp->status == nlm_drop_reply) @@ -114,7 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); - nlmsvc_put_lockowner(test_owner); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -280,6 +266,8 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp) */ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) { + dprintk("lockd: %5u callback returned %d\n", task->tk_pid, + -task->tk_status); } static void nlm4svc_callback_release(void *data) @@ -522,239 +510,191 @@ const struct svc_procedure nlmsvc_procedures4[24] = { .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), - .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "NULL", }, [NLMPROC_TEST] = { .pc_func = nlm4svc_proc_test, .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_testres, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+2+No+Rg, - .pc_name = "TEST", }, [NLMPROC_LOCK] = { .pc_func = nlm4svc_proc_lock, .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "LOCK", }, [NLMPROC_CANCEL] = { .pc_func = nlm4svc_proc_cancel, .pc_decode = nlm4svc_decode_cancargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "CANCEL", }, [NLMPROC_UNLOCK] = { .pc_func = nlm4svc_proc_unlock, .pc_decode = nlm4svc_decode_unlockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "UNLOCK", }, [NLMPROC_GRANTED] = { .pc_func = nlm4svc_proc_granted, .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "GRANTED", }, [NLMPROC_TEST_MSG] = { .pc_func = nlm4svc_proc_test_msg, .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "TEST_MSG", }, [NLMPROC_LOCK_MSG] = { .pc_func = nlm4svc_proc_lock_msg, .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "LOCK_MSG", }, [NLMPROC_CANCEL_MSG] = { .pc_func = nlm4svc_proc_cancel_msg, .pc_decode = nlm4svc_decode_cancargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "CANCEL_MSG", }, [NLMPROC_UNLOCK_MSG] = { .pc_func = nlm4svc_proc_unlock_msg, .pc_decode = nlm4svc_decode_unlockargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "UNLOCK_MSG", }, [NLMPROC_GRANTED_MSG] = { .pc_func = nlm4svc_proc_granted_msg, .pc_decode = nlm4svc_decode_testargs, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "GRANTED_MSG", }, [NLMPROC_TEST_RES] = { .pc_func = nlm4svc_proc_null, .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "TEST_RES", }, [NLMPROC_LOCK_RES] = { .pc_func = nlm4svc_proc_null, .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "LOCK_RES", }, [NLMPROC_CANCEL_RES] = { .pc_func = nlm4svc_proc_null, .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "CANCEL_RES", }, [NLMPROC_UNLOCK_RES] = { .pc_func = nlm4svc_proc_null, .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "UNLOCK_RES", }, [NLMPROC_GRANTED_RES] = { .pc_func = nlm4svc_proc_granted_res, .pc_decode = nlm4svc_decode_res, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "GRANTED_RES", }, [NLMPROC_NSM_NOTIFY] = { .pc_func = nlm4svc_proc_sm_notify, .pc_decode = nlm4svc_decode_reboot, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_reboot), - .pc_argzero = sizeof(struct nlm_reboot), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "SM_NOTIFY", }, [17] = { .pc_func = nlm4svc_proc_unused, .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), - .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, - .pc_name = "UNUSED", }, [18] = { .pc_func = nlm4svc_proc_unused, .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), - .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, - .pc_name = "UNUSED", }, [19] = { .pc_func = nlm4svc_proc_unused, .pc_decode = nlm4svc_decode_void, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_void), - .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, - .pc_name = "UNUSED", }, [NLMPROC_SHARE] = { .pc_func = nlm4svc_proc_share, .pc_decode = nlm4svc_decode_shareargs, .pc_encode = nlm4svc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, - .pc_name = "SHARE", }, [NLMPROC_UNSHARE] = { .pc_func = nlm4svc_proc_unshare, .pc_decode = nlm4svc_decode_shareargs, .pc_encode = nlm4svc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, - .pc_name = "UNSHARE", }, [NLMPROC_NM_LOCK] = { .pc_func = nlm4svc_proc_nm_lock, .pc_decode = nlm4svc_decode_lockargs, .pc_encode = nlm4svc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "NM_LOCK", }, [NLMPROC_FREE_ALL] = { .pc_func = nlm4svc_proc_free_all, .pc_decode = nlm4svc_decode_notify, .pc_encode = nlm4svc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "FREE_ALL", }, }; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 4e30f3c50970..273a81971ed5 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -31,7 +31,6 @@ #include #include #include -#include #define NLMDBG_FACILITY NLMDBG_SVCLOCK @@ -340,7 +339,7 @@ nlmsvc_get_lockowner(struct nlm_lockowner *lockowner) return lockowner; } -void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) +static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) { if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) return; @@ -470,27 +469,18 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_host *host, struct nlm_lock *lock, int wait, struct nlm_cookie *cookie, int reclaim) { -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - struct inode *inode = nlmsvc_file_inode(file); -#endif struct nlm_block *block = NULL; int error; - int mode; - int async_block = 0; __be32 ret; dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n", - inode->i_sb->s_id, inode->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_type, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end, wait); - if (nlmsvc_file_file(file)->f_op->lock) { - async_block = wait; - wait = 0; - } - /* Lock file against concurrent access */ mutex_lock(&file->f_mutex); /* Get existing block (in case client is busy-waiting) @@ -534,8 +524,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, if (!wait) lock->fl.fl_flags &= ~FL_SLEEP; - mode = lock_to_openmode(&lock->fl); - error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL); + error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); lock->fl.fl_flags &= ~FL_SLEEP; dprintk("lockd: vfs_lock_file returned %d\n", error); @@ -551,7 +540,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, */ if (wait) break; - ret = async_block ? nlm_lck_blocked : nlm_lck_denied; + ret = nlm_lck_denied; goto out; case FILE_LOCK_DEFERRED: if (wait) @@ -588,12 +577,12 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_lock *conflock, struct nlm_cookie *cookie) { int error; - int mode; __be32 ret; + struct nlm_lockowner *test_owner; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", - nlmsvc_file_inode(file)->i_sb->s_id, - nlmsvc_file_inode(file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_type, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -603,8 +592,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } - mode = lock_to_openmode(&lock->fl); - error = vfs_test_lock(file->f_file[mode], &lock->fl); + /* If there's a conflicting lock, remember to clean up the test lock */ + test_owner = (struct nlm_lockowner *)lock->fl.fl_owner; + + error = vfs_test_lock(file->f_file, &lock->fl); if (error) { /* We can't currently deal with deferred test requests */ if (error == FILE_LOCK_DEFERRED) @@ -631,6 +622,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->fl.fl_end = lock->fl.fl_end; locks_release_private(&lock->fl); + /* Clean up the test lock */ + lock->fl.fl_owner = NULL; + nlmsvc_put_lockowner(test_owner); + ret = nlm_lck_denied; out: return ret; @@ -646,11 +641,11 @@ out: __be32 nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) { - int error = 0; + int error; dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n", - nlmsvc_file_inode(file)->i_sb->s_id, - nlmsvc_file_inode(file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -659,14 +654,7 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock) nlmsvc_cancel_blocked(net, file, lock); lock->fl.fl_type = F_UNLCK; - lock->fl.fl_file = file->f_file[O_RDONLY]; - if (lock->fl.fl_file) - error = vfs_lock_file(lock->fl.fl_file, F_SETLK, - &lock->fl, NULL); - lock->fl.fl_file = file->f_file[O_WRONLY]; - if (lock->fl.fl_file) - error |= vfs_lock_file(lock->fl.fl_file, F_SETLK, - &lock->fl, NULL); + error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); return (error < 0)? nlm_lck_denied_nolocks : nlm_granted; } @@ -683,11 +671,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l { struct nlm_block *block; int status = 0; - int mode; dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n", - nlmsvc_file_inode(file)->i_sb->s_id, - nlmsvc_file_inode(file)->i_ino, + locks_inode(file->f_file)->i_sb->s_id, + locks_inode(file->f_file)->i_ino, lock->fl.fl_pid, (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); @@ -699,10 +686,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l block = nlmsvc_lookup_block(file, lock); mutex_unlock(&file->f_mutex); if (block != NULL) { - struct file_lock *fl = &block->b_call->a_args.lock.fl; - - mode = lock_to_openmode(fl); - vfs_cancel_lock(block->b_file->f_file[mode], fl); + vfs_cancel_lock(block->b_file->f_file, + &block->b_call->a_args.lock.fl); status = nlmsvc_unlink_block(block); nlmsvc_release_block(block); } @@ -818,7 +803,6 @@ nlmsvc_grant_blocked(struct nlm_block *block) { struct nlm_file *file = block->b_file; struct nlm_lock *lock = &block->b_call->a_args.lock; - int mode; int error; loff_t fl_start, fl_end; @@ -844,8 +828,7 @@ nlmsvc_grant_blocked(struct nlm_block *block) lock->fl.fl_flags |= FL_SLEEP; fl_start = lock->fl.fl_start; fl_end = lock->fl.fl_end; - mode = lock_to_openmode(&lock->fl); - error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL); + error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); lock->fl.fl_flags &= ~FL_SLEEP; lock->fl.fl_start = fl_start; lock->fl.fl_end = fl_end; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 32784f508c81..50855f2c1f4b 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -55,7 +55,6 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, struct nlm_host *host = NULL; struct nlm_file *file = NULL; struct nlm_lock *lock = &argp->lock; - int mode; __be32 error = 0; /* nfsd callbacks must have been installed for this procedure */ @@ -70,15 +69,13 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Obtain file pointer. Not used by FREE_ALL call. */ if (filp != NULL) { - error = cast_status(nlm_lookup_file(rqstp, &file, lock)); + error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh)); if (error != 0) goto no_locks; *filp = file; /* Set up the missing parts of the file_lock structure */ - mode = lock_to_openmode(&lock->fl); - lock->fl.fl_flags = FL_POSIX; - lock->fl.fl_file = file->f_file[mode]; + lock->fl.fl_file = file->f_file; lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); @@ -117,7 +114,6 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) struct nlm_args *argp = rqstp->rq_argp; struct nlm_host *host; struct nlm_file *file; - struct nlm_lockowner *test_owner; __be32 rc = rpc_success; dprintk("lockd: TEST called\n"); @@ -127,8 +123,6 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; - test_owner = argp->lock.fl.fl_owner; - /* Now check for conflicting locks */ resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie)); if (resp->status == nlm_drop_reply) @@ -137,7 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); - nlmsvc_put_lockowner(test_owner); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -305,6 +299,8 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp) */ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) { + dprintk("lockd: %5u callback returned %d\n", task->tk_pid, + -task->tk_status); } void nlmsvc_release_call(struct nlm_rqst *call) @@ -556,239 +552,191 @@ const struct svc_procedure nlmsvc_procedures[24] = { .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), - .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "NULL", }, [NLMPROC_TEST] = { .pc_func = nlmsvc_proc_test, .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_testres, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+2+No+Rg, - .pc_name = "TEST", }, [NLMPROC_LOCK] = { .pc_func = nlmsvc_proc_lock, .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "LOCK", }, [NLMPROC_CANCEL] = { .pc_func = nlmsvc_proc_cancel, .pc_decode = nlmsvc_decode_cancargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "CANCEL", }, [NLMPROC_UNLOCK] = { .pc_func = nlmsvc_proc_unlock, .pc_decode = nlmsvc_decode_unlockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "UNLOCK", }, [NLMPROC_GRANTED] = { .pc_func = nlmsvc_proc_granted, .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "GRANTED", }, [NLMPROC_TEST_MSG] = { .pc_func = nlmsvc_proc_test_msg, .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "TEST_MSG", }, [NLMPROC_LOCK_MSG] = { .pc_func = nlmsvc_proc_lock_msg, .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "LOCK_MSG", }, [NLMPROC_CANCEL_MSG] = { .pc_func = nlmsvc_proc_cancel_msg, .pc_decode = nlmsvc_decode_cancargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "CANCEL_MSG", }, [NLMPROC_UNLOCK_MSG] = { .pc_func = nlmsvc_proc_unlock_msg, .pc_decode = nlmsvc_decode_unlockargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "UNLOCK_MSG", }, [NLMPROC_GRANTED_MSG] = { .pc_func = nlmsvc_proc_granted_msg, .pc_decode = nlmsvc_decode_testargs, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "GRANTED_MSG", }, [NLMPROC_TEST_RES] = { .pc_func = nlmsvc_proc_null, .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "TEST_RES", }, [NLMPROC_LOCK_RES] = { .pc_func = nlmsvc_proc_null, .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "LOCK_RES", }, [NLMPROC_CANCEL_RES] = { .pc_func = nlmsvc_proc_null, .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "CANCEL_RES", }, [NLMPROC_UNLOCK_RES] = { .pc_func = nlmsvc_proc_null, .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "UNLOCK_RES", }, [NLMPROC_GRANTED_RES] = { .pc_func = nlmsvc_proc_granted_res, .pc_decode = nlmsvc_decode_res, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_res), - .pc_argzero = sizeof(struct nlm_res), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "GRANTED_RES", }, [NLMPROC_NSM_NOTIFY] = { .pc_func = nlmsvc_proc_sm_notify, .pc_decode = nlmsvc_decode_reboot, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_reboot), - .pc_argzero = sizeof(struct nlm_reboot), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "SM_NOTIFY", }, [17] = { .pc_func = nlmsvc_proc_unused, .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), - .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "UNUSED", }, [18] = { .pc_func = nlmsvc_proc_unused, .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), - .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "UNUSED", }, [19] = { .pc_func = nlmsvc_proc_unused, .pc_decode = nlmsvc_decode_void, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_void), - .pc_argzero = sizeof(struct nlm_void), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = St, - .pc_name = "UNUSED", }, [NLMPROC_SHARE] = { .pc_func = nlmsvc_proc_share, .pc_decode = nlmsvc_decode_shareargs, .pc_encode = nlmsvc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, - .pc_name = "SHARE", }, [NLMPROC_UNSHARE] = { .pc_func = nlmsvc_proc_unshare, .pc_decode = nlmsvc_decode_shareargs, .pc_encode = nlmsvc_encode_shareres, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St+1, - .pc_name = "UNSHARE", }, [NLMPROC_NM_LOCK] = { .pc_func = nlmsvc_proc_nm_lock, .pc_decode = nlmsvc_decode_lockargs, .pc_encode = nlmsvc_encode_res, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_res), .pc_xdrressize = Ck+St, - .pc_name = "NM_LOCK", }, [NLMPROC_FREE_ALL] = { .pc_func = nlmsvc_proc_free_all, .pc_decode = nlmsvc_decode_notify, .pc_encode = nlmsvc_encode_void, .pc_argsize = sizeof(struct nlm_args), - .pc_argzero = sizeof(struct nlm_args), .pc_ressize = sizeof(struct nlm_void), .pc_xdrressize = 0, - .pc_name = "FREE_ALL", }, }; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index e3b6229e7ae5..028fc152da22 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) static inline void nlm_debug_print_file(char *msg, struct nlm_file *file) { - struct inode *inode = nlmsvc_file_inode(file); + struct inode *inode = locks_inode(file->f_file); dprintk("lockd: %s %s/%ld\n", msg, inode->i_sb->s_id, inode->i_ino); @@ -71,75 +71,56 @@ static inline unsigned int file_hash(struct nfs_fh *f) return tmp & (FILE_NRHASH - 1); } -int lock_to_openmode(struct file_lock *lock) -{ - return (lock->fl_type == F_WRLCK) ? O_WRONLY : O_RDONLY; -} - -/* - * Open the file. Note that if we're reexporting, for example, - * this could block the lockd thread for a while. - * - * We have to make sure we have the right credential to open - * the file. - */ -static __be32 nlm_do_fopen(struct svc_rqst *rqstp, - struct nlm_file *file, int mode) -{ - struct file **fp = &file->f_file[mode]; - __be32 nfserr; - - if (*fp) - return 0; - nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode); - if (nfserr) - dprintk("lockd: open failed (error %d)\n", nfserr); - return nfserr; -} - /* * Lookup file info. If it doesn't exist, create a file info struct * and open a (VFS) file for the given inode. + * + * FIXME: + * Note that we open the file O_RDONLY even when creating write locks. + * This is not quite right, but for now, we assume the client performs + * the proper R/W checking. */ __be32 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, - struct nlm_lock *lock) + struct nfs_fh *f) { struct nlm_file *file; unsigned int hash; __be32 nfserr; - int mode; - nlm_debug_print_fh("nlm_lookup_file", &lock->fh); + nlm_debug_print_fh("nlm_lookup_file", f); - hash = file_hash(&lock->fh); - mode = lock_to_openmode(&lock->fl); + hash = file_hash(f); /* Lock file table */ mutex_lock(&nlm_file_mutex); hlist_for_each_entry(file, &nlm_files[hash], f_list) - if (!nfs_compare_fh(&file->f_handle, &lock->fh)) { - mutex_lock(&file->f_mutex); - nfserr = nlm_do_fopen(rqstp, file, mode); - mutex_unlock(&file->f_mutex); + if (!nfs_compare_fh(&file->f_handle, f)) goto found; - } - nlm_debug_print_fh("creating file for", &lock->fh); + + nlm_debug_print_fh("creating file for", f); nfserr = nlm_lck_denied_nolocks; file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) - goto out_free; + goto out_unlock; - memcpy(&file->f_handle, &lock->fh, sizeof(struct nfs_fh)); + memcpy(&file->f_handle, f, sizeof(struct nfs_fh)); mutex_init(&file->f_mutex); INIT_HLIST_NODE(&file->f_list); INIT_LIST_HEAD(&file->f_blocks); - nfserr = nlm_do_fopen(rqstp, file, mode); - if (nfserr) - goto out_unlock; + /* Open the file. Note that this must not sleep for too long, else + * we would lock up lockd:-) So no NFS re-exports, folks. + * + * We have to make sure we have the right credential to open + * the file. + */ + if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) { + dprintk("lockd: open failed (error %d)\n", nfserr); + goto out_free; + } hlist_add_head(&file->f_list, &nlm_files[hash]); @@ -147,6 +128,7 @@ found: dprintk("lockd: found file %p (count %d)\n", file, file->f_count); *result = file; file->f_count++; + nfserr = 0; out_unlock: mutex_unlock(&nlm_file_mutex); @@ -166,40 +148,13 @@ nlm_delete_file(struct nlm_file *file) nlm_debug_print_file("closing file", file); if (!hlist_unhashed(&file->f_list)) { hlist_del(&file->f_list); - if (file->f_file[O_RDONLY]) - nlmsvc_ops->fclose(file->f_file[O_RDONLY]); - if (file->f_file[O_WRONLY]) - nlmsvc_ops->fclose(file->f_file[O_WRONLY]); + nlmsvc_ops->fclose(file->f_file); kfree(file); } else { printk(KERN_WARNING "lockd: attempt to release unknown file!\n"); } } -static int nlm_unlock_files(struct nlm_file *file, const struct file_lock *fl) -{ - struct file_lock lock; - - locks_init_lock(&lock); - lock.fl_type = F_UNLCK; - lock.fl_start = 0; - lock.fl_end = OFFSET_MAX; - lock.fl_owner = fl->fl_owner; - lock.fl_pid = fl->fl_pid; - lock.fl_flags = FL_POSIX; - - lock.fl_file = file->f_file[O_RDONLY]; - if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL)) - goto out_err; - lock.fl_file = file->f_file[O_WRONLY]; - if (lock.fl_file && vfs_lock_file(lock.fl_file, F_SETLK, &lock, NULL)) - goto out_err; - return 0; -out_err: - pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__); - return 1; -} - /* * Loop over all locks on the given file and perform the specified * action. @@ -210,7 +165,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = locks_inode_context(inode); + struct file_lock_context *flctx = inode->i_flctx; struct nlm_host *lockhost; if (!flctx || list_empty_careful(&flctx->flc_posix)) @@ -227,10 +182,17 @@ again: lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host; if (match(lockhost, host)) { + struct file_lock lock = *fl; spin_unlock(&flctx->flc_lock); - if (nlm_unlock_files(file, fl)) + lock.fl_type = F_UNLCK; + lock.fl_start = 0; + lock.fl_end = OFFSET_MAX; + if (vfs_lock_file(file->f_file, F_SETLK, &lock, NULL) < 0) { + printk("lockd: unlock failure in %s:%d\n", + __FILE__, __LINE__); return 1; + } goto again; } } @@ -265,7 +227,7 @@ nlm_file_inuse(struct nlm_file *file) { struct inode *inode = nlmsvc_file_inode(file); struct file_lock *fl; - struct file_lock_context *flctx = locks_inode_context(inode); + struct file_lock_context *flctx = inode->i_flctx; if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) return 1; @@ -284,14 +246,6 @@ nlm_file_inuse(struct nlm_file *file) return 0; } -static void nlm_close_files(struct nlm_file *file) -{ - if (file->f_file[O_RDONLY]) - nlmsvc_ops->fclose(file->f_file[O_RDONLY]); - if (file->f_file[O_WRONLY]) - nlmsvc_ops->fclose(file->f_file[O_WRONLY]); -} - /* * Loop over all files in the file table. */ @@ -322,7 +276,7 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match, if (list_empty(&file->f_blocks) && !file->f_locks && !file->f_shares && !file->f_count) { hlist_del(&file->f_list); - nlm_close_files(file); + nlmsvc_ops->fclose(file->f_file); kfree(file); } } @@ -456,13 +410,12 @@ nlmsvc_invalidate_all(void) nlm_traverse_files(NULL, nlmsvc_is_client, NULL); } - static int nlmsvc_match_sb(void *datap, struct nlm_file *file) { struct super_block *sb = datap; - return sb == nlmsvc_file_inode(file)->i_sb; + return sb == locks_inode(file->f_file)->i_sb; } /** diff --git a/fs/lockd/svcxdr.h b/fs/lockd/svcxdr.h deleted file mode 100644 index 4f1a451da5ba..000000000000 --- a/fs/lockd/svcxdr.h +++ /dev/null @@ -1,142 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Encode/decode NLM basic data types - * - * Basic NLMv3 XDR data types are not defined in an IETF standards - * document. X/Open has a description of these data types that - * is useful. See Chapter 10 of "Protocols for Interworking: - * XNFS, Version 3W". - * - * Basic NLMv4 XDR data types are defined in Appendix II.1.4 of - * RFC 1813: "NFS Version 3 Protocol Specification". - * - * Author: Chuck Lever - * - * Copyright (c) 2020, Oracle and/or its affiliates. - */ - -#ifndef _LOCKD_SVCXDR_H_ -#define _LOCKD_SVCXDR_H_ - -static inline bool -svcxdr_decode_stats(struct xdr_stream *xdr, __be32 *status) -{ - __be32 *p; - - p = xdr_inline_decode(xdr, XDR_UNIT); - if (!p) - return false; - *status = *p; - - return true; -} - -static inline bool -svcxdr_encode_stats(struct xdr_stream *xdr, __be32 status) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, XDR_UNIT); - if (!p) - return false; - *p = status; - - return true; -} - -static inline bool -svcxdr_decode_string(struct xdr_stream *xdr, char **data, unsigned int *data_len) -{ - __be32 *p; - u32 len; - - if (xdr_stream_decode_u32(xdr, &len) < 0) - return false; - if (len > NLM_MAXSTRLEN) - return false; - p = xdr_inline_decode(xdr, len); - if (!p) - return false; - *data_len = len; - *data = (char *)p; - - return true; -} - -/* - * NLM cookies are defined by specification to be a variable-length - * XDR opaque no longer than 1024 bytes. However, this implementation - * limits their length to 32 bytes, and treats zero-length cookies - * specially. - */ -static inline bool -svcxdr_decode_cookie(struct xdr_stream *xdr, struct nlm_cookie *cookie) -{ - __be32 *p; - u32 len; - - if (xdr_stream_decode_u32(xdr, &len) < 0) - return false; - if (len > NLM_MAXCOOKIELEN) - return false; - if (!len) - goto out_hpux; - - p = xdr_inline_decode(xdr, len); - if (!p) - return false; - cookie->len = len; - memcpy(cookie->data, p, len); - - return true; - - /* apparently HPUX can return empty cookies */ -out_hpux: - cookie->len = 4; - memset(cookie->data, 0, 4); - return true; -} - -static inline bool -svcxdr_encode_cookie(struct xdr_stream *xdr, const struct nlm_cookie *cookie) -{ - __be32 *p; - - if (xdr_stream_encode_u32(xdr, cookie->len) < 0) - return false; - p = xdr_reserve_space(xdr, cookie->len); - if (!p) - return false; - memcpy(p, cookie->data, cookie->len); - - return true; -} - -static inline bool -svcxdr_decode_owner(struct xdr_stream *xdr, struct xdr_netobj *obj) -{ - __be32 *p; - u32 len; - - if (xdr_stream_decode_u32(xdr, &len) < 0) - return false; - if (len > XDR_MAX_NETOBJ) - return false; - p = xdr_inline_decode(xdr, len); - if (!p) - return false; - obj->len = len; - obj->data = (u8 *)p; - - return true; -} - -static inline bool -svcxdr_encode_owner(struct xdr_stream *xdr, const struct xdr_netobj *obj) -{ - if (obj->len > XDR_MAX_NETOBJ) - return false; - return xdr_stream_encode_opaque(xdr, obj->data, obj->len) > 0; -} - -#endif /* _LOCKD_SVCXDR_H_ */ diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 2fb5748dae0c..982629f7b120 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -19,7 +19,7 @@ #include -#include "svcxdr.h" +#define NLMDBG_FACILITY NLMDBG_XDR static inline loff_t @@ -42,313 +42,311 @@ loff_t_to_s32(loff_t offset) } /* - * NLM file handles are defined by specification to be a variable-length - * XDR opaque no longer than 1024 bytes. However, this implementation - * constrains their length to exactly the length of an NFSv2 file - * handle. + * XDR functions for basic NLM types */ -static bool -svcxdr_decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh) +static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c) { - __be32 *p; - u32 len; + unsigned int len; - if (xdr_stream_decode_u32(xdr, &len) < 0) - return false; - if (len != NFS2_FHSIZE) - return false; - - p = xdr_inline_decode(xdr, len); - if (!p) - return false; - fh->size = NFS2_FHSIZE; - memcpy(fh->data, p, len); - memset(fh->data + NFS2_FHSIZE, 0, sizeof(fh->data) - NFS2_FHSIZE); - - return true; + len = ntohl(*p++); + + if(len==0) + { + c->len=4; + memset(c->data, 0, 4); /* hockeypux brain damage */ + } + else if(len<=NLM_MAXCOOKIELEN) + { + c->len=len; + memcpy(c->data, p, len); + p+=XDR_QUADLEN(len); + } + else + { + dprintk("lockd: bad cookie size %d (only cookies under " + "%d bytes are supported.)\n", + len, NLM_MAXCOOKIELEN); + return NULL; + } + return p; } -static bool -svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) +static inline __be32 * +nlm_encode_cookie(__be32 *p, struct nlm_cookie *c) { - struct file_lock *fl = &lock->fl; - s32 start, len, end; + *p++ = htonl(c->len); + memcpy(p, c->data, c->len); + p+=XDR_QUADLEN(c->len); + return p; +} - if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return false; - if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return false; - if (!svcxdr_decode_owner(xdr, &lock->oh)) - return false; - if (xdr_stream_decode_u32(xdr, &lock->svid) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &start) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &len) < 0) - return false; +static __be32 * +nlm_decode_fh(__be32 *p, struct nfs_fh *f) +{ + unsigned int len; + + if ((len = ntohl(*p++)) != NFS2_FHSIZE) { + dprintk("lockd: bad fhandle size %d (should be %d)\n", + len, NFS2_FHSIZE); + return NULL; + } + f->size = NFS2_FHSIZE; + memset(f->data, 0, sizeof(f->data)); + memcpy(f->data, p, NFS2_FHSIZE); + return p + XDR_QUADLEN(NFS2_FHSIZE); +} + +/* + * Encode and decode owner handle + */ +static inline __be32 * +nlm_decode_oh(__be32 *p, struct xdr_netobj *oh) +{ + return xdr_decode_netobj(p, oh); +} + +static inline __be32 * +nlm_encode_oh(__be32 *p, struct xdr_netobj *oh) +{ + return xdr_encode_netobj(p, oh); +} + +static __be32 * +nlm_decode_lock(__be32 *p, struct nlm_lock *lock) +{ + struct file_lock *fl = &lock->fl; + s32 start, len, end; + + if (!(p = xdr_decode_string_inplace(p, &lock->caller, + &lock->len, + NLM_MAXSTRLEN)) + || !(p = nlm_decode_fh(p, &lock->fh)) + || !(p = nlm_decode_oh(p, &lock->oh))) + return NULL; + lock->svid = ntohl(*p++); locks_init_lock(fl); fl->fl_flags = FL_POSIX; - fl->fl_type = F_RDLCK; + fl->fl_type = F_RDLCK; /* as good as anything else */ + start = ntohl(*p++); + len = ntohl(*p++); end = start + len - 1; + fl->fl_start = s32_to_loff_t(start); + if (len == 0 || end < 0) fl->fl_end = OFFSET_MAX; else fl->fl_end = s32_to_loff_t(end); - - return true; + return p; } -static bool -svcxdr_encode_holder(struct xdr_stream *xdr, const struct nlm_lock *lock) +/* + * Encode result of a TEST/TEST_MSG call + */ +static __be32 * +nlm_encode_testres(__be32 *p, struct nlm_res *resp) { - const struct file_lock *fl = &lock->fl; - s32 start, len; + s32 start, len; - /* exclusive */ - if (xdr_stream_encode_bool(xdr, fl->fl_type != F_RDLCK) < 0) - return false; - if (xdr_stream_encode_u32(xdr, lock->svid) < 0) - return false; - if (!svcxdr_encode_owner(xdr, &lock->oh)) - return false; - start = loff_t_to_s32(fl->fl_start); - if (fl->fl_end == OFFSET_MAX) - len = 0; - else - len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1); - if (xdr_stream_encode_u32(xdr, start) < 0) - return false; - if (xdr_stream_encode_u32(xdr, len) < 0) - return false; + if (!(p = nlm_encode_cookie(p, &resp->cookie))) + return NULL; + *p++ = resp->status; - return true; -} + if (resp->status == nlm_lck_denied) { + struct file_lock *fl = &resp->lock.fl; -static bool -svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) -{ - if (!svcxdr_encode_stats(xdr, resp->status)) - return false; - switch (resp->status) { - case nlm_lck_denied: - if (!svcxdr_encode_holder(xdr, &resp->lock)) - return false; + *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one; + *p++ = htonl(resp->lock.svid); + + /* Encode owner handle. */ + if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) + return NULL; + + start = loff_t_to_s32(fl->fl_start); + if (fl->fl_end == OFFSET_MAX) + len = 0; + else + len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1); + + *p++ = htonl(start); + *p++ = htonl(len); } - return true; + return p; } /* - * Decode Call arguments + * First, the server side XDR functions */ - -bool -nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - return true; -} - -bool -nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; - u32 exclusive; + u32 exclusive; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return false; - if (!svcxdr_decode_lock(xdr, &argp->lock)) - return false; + if (!(p = nlm_decode_cookie(p, &argp->cookie))) + return 0; + + exclusive = ntohl(*p++); + if (!(p = nlm_decode_lock(p, &argp->lock))) + return 0; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return true; + return xdr_argsize_check(rqstp, p); } -bool -nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p) +{ + struct nlm_res *resp = rqstp->rq_resp; + + if (!(p = nlm_encode_testres(p, resp))) + return 0; + return xdr_ressize_check(rqstp, p); +} + +int +nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; - u32 exclusive; + u32 exclusive; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return false; - if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return false; - if (!svcxdr_decode_lock(xdr, &argp->lock)) - return false; + if (!(p = nlm_decode_cookie(p, &argp->cookie))) + return 0; + argp->block = ntohl(*p++); + exclusive = ntohl(*p++); + if (!(p = nlm_decode_lock(p, &argp->lock))) + return 0; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return false; + argp->reclaim = ntohl(*p++); + argp->state = ntohl(*p++); argp->monitor = 1; /* monitor client by default */ - return true; + return xdr_argsize_check(rqstp, p); } -bool -nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; - u32 exclusive; + u32 exclusive; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return false; - if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return false; - if (!svcxdr_decode_lock(xdr, &argp->lock)) - return false; + if (!(p = nlm_decode_cookie(p, &argp->cookie))) + return 0; + argp->block = ntohl(*p++); + exclusive = ntohl(*p++); + if (!(p = nlm_decode_lock(p, &argp->lock))) + return 0; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - - return true; + return xdr_argsize_check(rqstp, p); } -bool -nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (!svcxdr_decode_lock(xdr, &argp->lock)) - return false; + if (!(p = nlm_decode_cookie(p, &argp->cookie)) + || !(p = nlm_decode_lock(p, &argp->lock))) + return 0; argp->lock.fl.fl_type = F_UNLCK; - - return true; + return xdr_argsize_check(rqstp, p); } -bool -nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - struct nlm_res *resp = rqstp->rq_argp; - - if (!svcxdr_decode_cookie(xdr, &resp->cookie)) - return false; - if (!svcxdr_decode_stats(xdr, &resp->status)) - return false; - - return true; -} - -bool -nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - struct nlm_reboot *argp = rqstp->rq_argp; - __be32 *p; - u32 len; - - if (xdr_stream_decode_u32(xdr, &len) < 0) - return false; - if (len > SM_MAXSTRLEN) - return false; - p = xdr_inline_decode(xdr, len); - if (!p) - return false; - argp->len = len; - argp->mon = (char *)p; - if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return false; - p = xdr_inline_decode(xdr, SM_PRIV_SIZE); - if (!p) - return false; - memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); - - return true; -} - -bool -nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); - lock->svid = ~(u32)0; + lock->svid = ~(u32) 0; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return false; - if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return false; - if (!svcxdr_decode_owner(xdr, &lock->oh)) - return false; - /* XXX: Range checks are missing in the original code */ - if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0) - return false; - - return true; + if (!(p = nlm_decode_cookie(p, &argp->cookie)) + || !(p = xdr_decode_string_inplace(p, &lock->caller, + &lock->len, NLM_MAXSTRLEN)) + || !(p = nlm_decode_fh(p, &lock->fh)) + || !(p = nlm_decode_oh(p, &lock->oh))) + return 0; + argp->fsm_mode = ntohl(*p++); + argp->fsm_access = ntohl(*p++); + return xdr_argsize_check(rqstp, p); } -bool -nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p) +{ + struct nlm_res *resp = rqstp->rq_resp; + + if (!(p = nlm_encode_cookie(p, &resp->cookie))) + return 0; + *p++ = resp->status; + *p++ = xdr_zero; /* sequence argument */ + return xdr_ressize_check(rqstp, p); +} + +int +nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p) +{ + struct nlm_res *resp = rqstp->rq_resp; + + if (!(p = nlm_encode_cookie(p, &resp->cookie))) + return 0; + *p++ = resp->status; + return xdr_ressize_check(rqstp, p); +} + +int +nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; - if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return false; - if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return false; - - return true; + if (!(p = xdr_decode_string_inplace(p, &lock->caller, + &lock->len, NLM_MAXSTRLEN))) + return 0; + argp->state = ntohl(*p++); + return xdr_argsize_check(rqstp, p); } - -/* - * Encode Reply results - */ - -bool -nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) { - return true; + struct nlm_reboot *argp = rqstp->rq_argp; + + if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN))) + return 0; + argp->state = ntohl(*p++); + memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); + p += XDR_QUADLEN(SM_PRIV_SIZE); + return xdr_argsize_check(rqstp, p); } -bool -nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p) { - struct nlm_res *resp = rqstp->rq_resp; + struct nlm_res *resp = rqstp->rq_argp; - return svcxdr_encode_cookie(xdr, &resp->cookie) && - svcxdr_encode_testrply(xdr, resp); + if (!(p = nlm_decode_cookie(p, &resp->cookie))) + return 0; + resp->status = *p++; + return xdr_argsize_check(rqstp, p); } -bool -nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p) { - struct nlm_res *resp = rqstp->rq_resp; - - return svcxdr_encode_cookie(xdr, &resp->cookie) && - svcxdr_encode_stats(xdr, resp->status); + return xdr_argsize_check(rqstp, p); } -bool -nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p) { - struct nlm_res *resp = rqstp->rq_resp; - - if (!svcxdr_encode_cookie(xdr, &resp->cookie)) - return false; - if (!svcxdr_encode_stats(xdr, resp->status)) - return false; - /* sequence */ - if (xdr_stream_encode_u32(xdr, 0) < 0) - return false; - - return true; + return xdr_ressize_check(rqstp, p); } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 5fcbf30cd275..5fa9f48a9dba 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -18,7 +18,14 @@ #include #include -#include "svcxdr.h" +#define NLMDBG_FACILITY NLMDBG_XDR + +static inline loff_t +s64_to_loff_t(__s64 offset) +{ + return (loff_t)offset; +} + static inline s64 loff_t_to_s64(loff_t offset) @@ -33,317 +40,310 @@ loff_t_to_s64(loff_t offset) return res; } -void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len) +/* + * XDR functions for basic NLM types + */ +static __be32 * +nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c) { - s64 end = off + len - 1; + unsigned int len; - fl->fl_start = off; - if (len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = end; + len = ntohl(*p++); + + if(len==0) + { + c->len=4; + memset(c->data, 0, 4); /* hockeypux brain damage */ + } + else if(len<=NLM_MAXCOOKIELEN) + { + c->len=len; + memcpy(c->data, p, len); + p+=XDR_QUADLEN(len); + } + else + { + dprintk("lockd: bad cookie size %d (only cookies under " + "%d bytes are supported.)\n", + len, NLM_MAXCOOKIELEN); + return NULL; + } + return p; +} + +static __be32 * +nlm4_encode_cookie(__be32 *p, struct nlm_cookie *c) +{ + *p++ = htonl(c->len); + memcpy(p, c->data, c->len); + p+=XDR_QUADLEN(c->len); + return p; +} + +static __be32 * +nlm4_decode_fh(__be32 *p, struct nfs_fh *f) +{ + memset(f->data, 0, sizeof(f->data)); + f->size = ntohl(*p++); + if (f->size > NFS_MAXFHSIZE) { + dprintk("lockd: bad fhandle size %d (should be <=%d)\n", + f->size, NFS_MAXFHSIZE); + return NULL; + } + memcpy(f->data, p, f->size); + return p + XDR_QUADLEN(f->size); } /* - * NLM file handles are defined by specification to be a variable-length - * XDR opaque no longer than 1024 bytes. However, this implementation - * limits their length to the size of an NFSv3 file handle. + * Encode and decode owner handle */ -static bool -svcxdr_decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh) +static __be32 * +nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh) { - __be32 *p; - u32 len; - - if (xdr_stream_decode_u32(xdr, &len) < 0) - return false; - if (len > NFS_MAXFHSIZE) - return false; - - p = xdr_inline_decode(xdr, len); - if (!p) - return false; - fh->size = len; - memcpy(fh->data, p, len); - memset(fh->data + len, 0, sizeof(fh->data) - len); - - return true; + return xdr_decode_netobj(p, oh); } -static bool -svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) +static __be32 * +nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) { - struct file_lock *fl = &lock->fl; + struct file_lock *fl = &lock->fl; + __u64 len, start; + __s64 end; - if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return false; - if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return false; - if (!svcxdr_decode_owner(xdr, &lock->oh)) - return false; - if (xdr_stream_decode_u32(xdr, &lock->svid) < 0) - return false; - if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0) - return false; - if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0) - return false; + if (!(p = xdr_decode_string_inplace(p, &lock->caller, + &lock->len, NLM_MAXSTRLEN)) + || !(p = nlm4_decode_fh(p, &lock->fh)) + || !(p = nlm4_decode_oh(p, &lock->oh))) + return NULL; + lock->svid = ntohl(*p++); locks_init_lock(fl); fl->fl_flags = FL_POSIX; - fl->fl_type = F_RDLCK; - nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len); - return true; -} + fl->fl_type = F_RDLCK; /* as good as anything else */ + p = xdr_decode_hyper(p, &start); + p = xdr_decode_hyper(p, &len); + end = start + len - 1; -static bool -svcxdr_encode_holder(struct xdr_stream *xdr, const struct nlm_lock *lock) -{ - const struct file_lock *fl = &lock->fl; - s64 start, len; + fl->fl_start = s64_to_loff_t(start); - /* exclusive */ - if (xdr_stream_encode_bool(xdr, fl->fl_type != F_RDLCK) < 0) - return false; - if (xdr_stream_encode_u32(xdr, lock->svid) < 0) - return false; - if (!svcxdr_encode_owner(xdr, &lock->oh)) - return false; - start = loff_t_to_s64(fl->fl_start); - if (fl->fl_end == OFFSET_MAX) - len = 0; + if (len == 0 || end < 0) + fl->fl_end = OFFSET_MAX; else - len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1); - if (xdr_stream_encode_u64(xdr, start) < 0) - return false; - if (xdr_stream_encode_u64(xdr, len) < 0) - return false; - - return true; + fl->fl_end = s64_to_loff_t(end); + return p; } -static bool -svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp) +/* + * Encode result of a TEST/TEST_MSG call + */ +static __be32 * +nlm4_encode_testres(__be32 *p, struct nlm_res *resp) { - if (!svcxdr_encode_stats(xdr, resp->status)) - return false; - switch (resp->status) { - case nlm_lck_denied: - if (!svcxdr_encode_holder(xdr, &resp->lock)) - return false; + s64 start, len; + + dprintk("xdr: before encode_testres (p %p resp %p)\n", p, resp); + if (!(p = nlm4_encode_cookie(p, &resp->cookie))) + return NULL; + *p++ = resp->status; + + if (resp->status == nlm_lck_denied) { + struct file_lock *fl = &resp->lock.fl; + + *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one; + *p++ = htonl(resp->lock.svid); + + /* Encode owner handle. */ + if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) + return NULL; + + start = loff_t_to_s64(fl->fl_start); + if (fl->fl_end == OFFSET_MAX) + len = 0; + else + len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1); + + p = xdr_encode_hyper(p, start); + p = xdr_encode_hyper(p, len); + dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n", + resp->status, (int)resp->lock.svid, fl->fl_type, + (long long)fl->fl_start, (long long)fl->fl_end); } - return true; + dprintk("xdr: after encode_testres (p %p resp %p)\n", p, resp); + return p; } /* - * Decode Call arguments + * First, the server side XDR functions */ - -bool -nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - return true; -} - -bool -nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; - u32 exclusive; + u32 exclusive; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return false; - if (!svcxdr_decode_lock(xdr, &argp->lock)) - return false; + if (!(p = nlm4_decode_cookie(p, &argp->cookie))) + return 0; + + exclusive = ntohl(*p++); + if (!(p = nlm4_decode_lock(p, &argp->lock))) + return 0; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - return true; + return xdr_argsize_check(rqstp, p); } -bool -nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p) +{ + struct nlm_res *resp = rqstp->rq_resp; + + if (!(p = nlm4_encode_testres(p, resp))) + return 0; + return xdr_ressize_check(rqstp, p); +} + +int +nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; - u32 exclusive; + u32 exclusive; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return false; - if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return false; - if (!svcxdr_decode_lock(xdr, &argp->lock)) - return false; + if (!(p = nlm4_decode_cookie(p, &argp->cookie))) + return 0; + argp->block = ntohl(*p++); + exclusive = ntohl(*p++); + if (!(p = nlm4_decode_lock(p, &argp->lock))) + return 0; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return false; + argp->reclaim = ntohl(*p++); + argp->state = ntohl(*p++); argp->monitor = 1; /* monitor client by default */ - return true; + return xdr_argsize_check(rqstp, p); } -bool -nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; - u32 exclusive; + u32 exclusive; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (xdr_stream_decode_bool(xdr, &argp->block) < 0) - return false; - if (xdr_stream_decode_bool(xdr, &exclusive) < 0) - return false; - if (!svcxdr_decode_lock(xdr, &argp->lock)) - return false; + if (!(p = nlm4_decode_cookie(p, &argp->cookie))) + return 0; + argp->block = ntohl(*p++); + exclusive = ntohl(*p++); + if (!(p = nlm4_decode_lock(p, &argp->lock))) + return 0; if (exclusive) argp->lock.fl.fl_type = F_WRLCK; - - return true; + return xdr_argsize_check(rqstp, p); } -bool -nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (!svcxdr_decode_lock(xdr, &argp->lock)) - return false; + if (!(p = nlm4_decode_cookie(p, &argp->cookie)) + || !(p = nlm4_decode_lock(p, &argp->lock))) + return 0; argp->lock.fl.fl_type = F_UNLCK; - - return true; + return xdr_argsize_check(rqstp, p); } -bool -nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - struct nlm_res *resp = rqstp->rq_argp; - - if (!svcxdr_decode_cookie(xdr, &resp->cookie)) - return false; - if (!svcxdr_decode_stats(xdr, &resp->status)) - return false; - - return true; -} - -bool -nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - struct nlm_reboot *argp = rqstp->rq_argp; - __be32 *p; - u32 len; - - if (xdr_stream_decode_u32(xdr, &len) < 0) - return false; - if (len > SM_MAXSTRLEN) - return false; - p = xdr_inline_decode(xdr, len); - if (!p) - return false; - argp->len = len; - argp->mon = (char *)p; - if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return false; - p = xdr_inline_decode(xdr, SM_PRIV_SIZE); - if (!p) - return false; - memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); - - return true; -} - -bool -nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); - lock->svid = ~(u32)0; + lock->svid = ~(u32) 0; - if (!svcxdr_decode_cookie(xdr, &argp->cookie)) - return false; - if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return false; - if (!svcxdr_decode_fhandle(xdr, &lock->fh)) - return false; - if (!svcxdr_decode_owner(xdr, &lock->oh)) - return false; - /* XXX: Range checks are missing in the original code */ - if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0) - return false; - - return true; + if (!(p = nlm4_decode_cookie(p, &argp->cookie)) + || !(p = xdr_decode_string_inplace(p, &lock->caller, + &lock->len, NLM_MAXSTRLEN)) + || !(p = nlm4_decode_fh(p, &lock->fh)) + || !(p = nlm4_decode_oh(p, &lock->oh))) + return 0; + argp->fsm_mode = ntohl(*p++); + argp->fsm_access = ntohl(*p++); + return xdr_argsize_check(rqstp, p); } -bool -nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p) +{ + struct nlm_res *resp = rqstp->rq_resp; + + if (!(p = nlm4_encode_cookie(p, &resp->cookie))) + return 0; + *p++ = resp->status; + *p++ = xdr_zero; /* sequence argument */ + return xdr_ressize_check(rqstp, p); +} + +int +nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p) +{ + struct nlm_res *resp = rqstp->rq_resp; + + if (!(p = nlm4_encode_cookie(p, &resp->cookie))) + return 0; + *p++ = resp->status; + return xdr_ressize_check(rqstp, p); +} + +int +nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p) { struct nlm_args *argp = rqstp->rq_argp; struct nlm_lock *lock = &argp->lock; - if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) - return false; - if (xdr_stream_decode_u32(xdr, &argp->state) < 0) - return false; - - return true; + if (!(p = xdr_decode_string_inplace(p, &lock->caller, + &lock->len, NLM_MAXSTRLEN))) + return 0; + argp->state = ntohl(*p++); + return xdr_argsize_check(rqstp, p); } - -/* - * Encode Reply results - */ - -bool -nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p) { - return true; + struct nlm_reboot *argp = rqstp->rq_argp; + + if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN))) + return 0; + argp->state = ntohl(*p++); + memcpy(&argp->priv.data, p, sizeof(argp->priv.data)); + p += XDR_QUADLEN(SM_PRIV_SIZE); + return xdr_argsize_check(rqstp, p); } -bool -nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p) { - struct nlm_res *resp = rqstp->rq_resp; + struct nlm_res *resp = rqstp->rq_argp; - return svcxdr_encode_cookie(xdr, &resp->cookie) && - svcxdr_encode_testrply(xdr, resp); + if (!(p = nlm4_decode_cookie(p, &resp->cookie))) + return 0; + resp->status = *p++; + return xdr_argsize_check(rqstp, p); } -bool -nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p) { - struct nlm_res *resp = rqstp->rq_resp; - - return svcxdr_encode_cookie(xdr, &resp->cookie) && - svcxdr_encode_stats(xdr, resp->status); + return xdr_argsize_check(rqstp, p); } -bool -nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p) { - struct nlm_res *resp = rqstp->rq_resp; - - if (!svcxdr_encode_cookie(xdr, &resp->cookie)) - return false; - if (!svcxdr_encode_stats(xdr, resp->status)) - return false; - /* sequence */ - if (xdr_stream_encode_u32(xdr, 0) < 0) - return false; - - return true; + return xdr_ressize_check(rqstp, p); } diff --git a/fs/locks.c b/fs/locks.c index b0753c8871fb..cbb5701ce9f3 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -251,7 +251,7 @@ locks_get_lock_context(struct inode *inode, int type) struct file_lock_context *ctx; /* paired with cmpxchg() below */ - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (likely(ctx) || type == F_UNLCK) goto out; @@ -270,7 +270,7 @@ locks_get_lock_context(struct inode *inode, int type) */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); } out: trace_locks_get_lock_context(inode, type, ctx); @@ -323,7 +323,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list, void locks_free_lock_context(struct inode *inode) { - struct file_lock_context *ctx = locks_inode_context(inode); + struct file_lock_context *ctx = inode->i_flctx; if (unlikely(ctx)) { locks_check_ctx_lists(inode); @@ -376,34 +376,6 @@ void locks_release_private(struct file_lock *fl) } EXPORT_SYMBOL_GPL(locks_release_private); -/** - * locks_owner_has_blockers - Check for blocking lock requests - * @flctx: file lock context - * @owner: lock owner - * - * Return values: - * %true: @owner has at least one blocker - * %false: @owner has no blockers - */ -bool locks_owner_has_blockers(struct file_lock_context *flctx, - fl_owner_t owner) -{ - struct file_lock *fl; - - spin_lock(&flctx->flc_lock); - list_for_each_entry(fl, &flctx->flc_posix, fl_list) { - if (fl->fl_owner != owner) - continue; - if (!list_empty(&fl->fl_blocked_requests)) { - spin_unlock(&flctx->flc_lock); - return true; - } - } - spin_unlock(&flctx->flc_lock); - return false; -} -EXPORT_SYMBOL_GPL(locks_owner_has_blockers); - /* Free a lock which is not in use. */ void locks_free_lock(struct file_lock *fl) { @@ -982,32 +954,19 @@ posix_test_lock(struct file *filp, struct file_lock *fl) struct file_lock *cfl; struct file_lock_context *ctx; struct inode *inode = locks_inode(filp); - void *owner; - void (*func)(void); - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->fl_type = F_UNLCK; return; } -retry: spin_lock(&ctx->flc_lock); list_for_each_entry(cfl, &ctx->flc_posix, fl_list) { - if (!posix_locks_conflict(fl, cfl)) - continue; - if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable - && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) { - owner = cfl->fl_lmops->lm_mod_owner; - func = cfl->fl_lmops->lm_expire_lock; - __module_get(owner); - spin_unlock(&ctx->flc_lock); - (*func)(); - module_put(owner); - goto retry; + if (posix_locks_conflict(fl, cfl)) { + locks_copy_conflock(fl, cfl); + goto out; } - locks_copy_conflock(fl, cfl); - goto out; } fl->fl_type = F_UNLCK; out: @@ -1181,8 +1140,6 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, int error; bool added = false; LIST_HEAD(dispose); - void *owner; - void (*func)(void); ctx = locks_get_lock_context(inode, request->fl_type); if (!ctx) @@ -1201,7 +1158,6 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, new_fl2 = locks_alloc_lock(); } -retry: percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); /* @@ -1213,17 +1169,6 @@ retry: list_for_each_entry(fl, &ctx->flc_posix, fl_list) { if (!posix_locks_conflict(request, fl)) continue; - if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable - && (*fl->fl_lmops->lm_lock_expirable)(fl)) { - owner = fl->fl_lmops->lm_mod_owner; - func = fl->fl_lmops->lm_expire_lock; - __module_get(owner); - spin_unlock(&ctx->flc_lock); - percpu_up_read(&file_rwsem); - (*func)(); - module_put(owner); - goto retry; - } if (conflock) locks_copy_conflock(conflock, fl); error = -EAGAIN; @@ -1674,7 +1619,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) new_fl->fl_flags = type; /* typically we will check that ctx is non-NULL before calling */ - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; @@ -1779,7 +1724,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time) struct file_lock_context *ctx; struct file_lock *fl; - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); fl = list_first_entry_or_null(&ctx->flc_lease, @@ -1825,7 +1770,7 @@ int fcntl_getlease(struct file *filp) int type = F_UNLCK; LIST_HEAD(dispose); - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -1863,9 +1808,6 @@ check_conflicting_open(struct file *filp, const long arg, int flags) if (flags & FL_LAYOUT) return 0; - if (flags & FL_DELEG) - /* We leave these checks to the caller */ - return 0; if (arg == F_RDLCK) return inode_is_open_for_write(inode) ? -EAGAIN : 0; @@ -2014,7 +1956,7 @@ static int generic_delete_lease(struct file *filp, void *owner) struct file_lock_context *ctx; LIST_HEAD(dispose); - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; @@ -2594,15 +2536,14 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, */ if (!error && file_lock->fl_type != F_UNLCK && !(file_lock->fl_flags & FL_OFDLCK)) { - struct files_struct *files = current->files; /* * We need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in * close(). rcu_read_lock() wouldn't do. */ - spin_lock(&files->file_lock); - f = files_lookup_fd_locked(files, fd); - spin_unlock(&files->file_lock); + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); if (f != filp) { file_lock->fl_type = F_UNLCK; error = do_lock_file_wait(filp, cmd, file_lock); @@ -2726,15 +2667,14 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, */ if (!error && file_lock->fl_type != F_UNLCK && !(file_lock->fl_flags & FL_OFDLCK)) { - struct files_struct *files = current->files; /* * We need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in * close(). rcu_read_lock() wouldn't do. */ - spin_lock(&files->file_lock); - f = files_lookup_fd_locked(files, fd); - spin_unlock(&files->file_lock); + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); if (f != filp) { file_lock->fl_type = F_UNLCK; error = do_lock_file_wait(filp, cmd, file_lock); @@ -2765,7 +2705,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (!ctx || list_empty(&ctx->flc_posix)) return; @@ -2838,7 +2778,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = locks_inode_context(locks_inode(filp)); + ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); if (!ctx) return; @@ -2885,7 +2825,7 @@ bool vfs_inode_has_locks(struct inode *inode) struct file_lock_context *ctx; bool ret; - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (!ctx) return false; @@ -3030,7 +2970,7 @@ void show_fd_locks(struct seq_file *f, struct file_lock_context *ctx; int id = 0; - ctx = locks_inode_context(inode); + ctx = smp_load_acquire(&inode->i_flctx); if (!ctx) return; diff --git a/fs/namei.c b/fs/namei.c index 6b85ad8a1555..8cea84ecbf56 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4361,14 +4361,11 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ -int vfs_rename(struct renamedata *rd) +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode, unsigned int flags) { int error; - struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir; - struct dentry *old_dentry = rd->old_dentry; - struct dentry *new_dentry = rd->new_dentry; - struct inode **delegated_inode = rd->delegated_inode; - unsigned int flags = rd->flags; bool is_dir = d_is_dir(old_dentry); struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; @@ -4516,7 +4513,6 @@ EXPORT_SYMBOL_NS(vfs_rename, ANDROID_GKI_VFS_EXPORT_ONLY); int do_renameat2(int olddfd, struct filename *from, int newdfd, struct filename *to, unsigned int flags) { - struct renamedata rd; struct dentry *old_dentry, *new_dentry; struct dentry *trap; struct path old_path, new_path; @@ -4620,14 +4616,9 @@ retry_deleg: &new_path, new_dentry, flags); if (error) goto exit5; - - rd.old_dir = old_path.dentry->d_inode; - rd.old_dentry = old_dentry; - rd.new_dir = new_path.dentry->d_inode; - rd.new_dentry = new_dentry; - rd.delegated_inode = &delegated_inode; - rd.flags = flags; - error = vfs_rename(&rd); + error = vfs_rename(old_path.dentry->d_inode, old_dentry, + new_path.dentry->d_inode, new_dentry, + &delegated_inode, flags); exit5: dput(new_dentry); exit4: diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index a9e563145e0c..73000aa2d220 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -699,7 +699,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, xdr_init_decode_pages(&xdr, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_page(&xdr, scratch); + xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); status = -EIO; p = xdr_inline_decode(&xdr, 4); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 16412d6636e8..6e3a14fdff9c 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -510,7 +510,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out; xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_page(&xdr, scratch); + xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); p = xdr_inline_decode(&xdr, sizeof(__be32)); if (!p) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8fe143cad4a2..7817ad94a6ba 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -44,18 +45,18 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) int ret; struct nfs_net *nn = net_generic(net, nfs_net_id); - ret = svc_xprt_create(serv, "tcp", net, PF_INET, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, - cred); + ret = svc_create_xprt(serv, "tcp", net, PF_INET, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret <= 0) goto out_err; nn->nfs_callback_tcpport = ret; dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", nn->nfs_callback_tcpport, PF_INET, net->ns.inum); - ret = svc_xprt_create(serv, "tcp", net, PF_INET6, - nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, - cred); + ret = svc_create_xprt(serv, "tcp", net, PF_INET6, + nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS, + cred); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", @@ -80,6 +81,9 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); while (!kthread_freezable_should_stop(NULL)) { + + if (signal_pending(current)) + flush_signals(current); /* * Listen for a request on the socket */ @@ -88,8 +92,8 @@ nfs4_callback_svc(void *vrqstp) continue; svc_process(rqstp); } - svc_exit_thread(rqstp); + module_put_and_exit(0); return 0; } @@ -109,7 +113,11 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); while (!kthread_freezable_should_stop(NULL)) { - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE); + + if (signal_pending(current)) + flush_signals(current); + + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { req = list_first_entry(&serv->sv_cb_list, @@ -124,12 +132,12 @@ nfs41_callback_svc(void *vrqstp) } else { spin_unlock_bh(&serv->sv_cb_lock); if (!kthread_should_stop()) - freezable_schedule(); + schedule(); finish_wait(&serv->sv_cb_waitq, &wq); } } - svc_exit_thread(rqstp); + module_put_and_exit(0); return 0; } @@ -161,12 +169,12 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS) nrservs = NFS4_MIN_NR_CALLBACK_THREADS; - if (serv->sv_nrthreads == nrservs) + if (serv->sv_nrthreads-1 == nrservs) return 0; - ret = svc_set_num_threads(serv, NULL, nrservs); + ret = serv->sv_ops->svo_setup(serv, NULL, nrservs); if (ret) { - svc_set_num_threads(serv, NULL, 0); + serv->sv_ops->svo_setup(serv, NULL, 0); return ret; } dprintk("nfs_callback_up: service started\n"); @@ -181,7 +189,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc return; dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); - svc_xprt_destroy_all(serv, net); + svc_shutdown_net(serv, net); } static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, @@ -224,17 +232,59 @@ err_bind: return ret; } +static const struct svc_serv_ops nfs40_cb_sv_ops = { + .svo_function = nfs4_callback_svc, + .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads_sync, + .svo_module = THIS_MODULE, +}; +#if defined(CONFIG_NFS_V4_1) +static const struct svc_serv_ops nfs41_cb_sv_ops = { + .svo_function = nfs41_callback_svc, + .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads_sync, + .svo_module = THIS_MODULE, +}; + +static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { + [0] = &nfs40_cb_sv_ops, + [1] = &nfs41_cb_sv_ops, +}; +#else +static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { + [0] = &nfs40_cb_sv_ops, + [1] = NULL, +}; +#endif + static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; - int (*threadfn)(void *data); + const struct svc_serv_ops *sv_ops; struct svc_serv *serv; /* * Check whether we're already up and running. */ - if (cb_info->serv) - return svc_get(cb_info->serv); + if (cb_info->serv) { + /* + * Note: increase service usage, because later in case of error + * svc_destroy() will be called. + */ + svc_get(cb_info->serv); + return cb_info->serv; + } + + switch (minorversion) { + case 0: + sv_ops = nfs4_cb_sv_ops[0]; + break; + default: + sv_ops = nfs4_cb_sv_ops[1]; + } + + if (sv_ops == NULL) + return ERR_PTR(-ENOTSUPP); /* * Sanity check: if there's no task, @@ -244,16 +294,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - threadfn = nfs4_callback_svc; -#if defined(CONFIG_NFS_V4_1) - if (minorversion) - threadfn = nfs41_callback_svc; -#else - if (minorversion) - return ERR_PTR(-ENOTSUPP); -#endif - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, - threadfn); + serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); @@ -294,10 +335,16 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_start; cb_info->users++; + /* + * svc_create creates the svc_serv with sv_nrthreads == 1, and then + * svc_prepare_thread increments that. So we need to call svc_destroy + * on both success and failure so that the refcount is 1 when the + * thread exits. + */ err_net: if (!cb_info->users) cb_info->serv = NULL; - svc_put(serv); + svc_destroy(serv); err_create: mutex_unlock(&nfs_callback_mutex); return ret; @@ -322,8 +369,8 @@ void nfs_callback_down(int minorversion, struct net *net) cb_info->users--; if (cb_info->users == 0) { svc_get(serv); - svc_set_num_threads(serv, NULL, 0); - svc_put(serv); + serv->sv_ops->svo_setup(serv, NULL, 0); + svc_destroy(serv); dprintk("nfs_callback_down: service destroyed\n"); cb_info->serv = NULL; } @@ -382,8 +429,6 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) */ static int nfs_callback_authenticate(struct svc_rqst *rqstp) { - rqstp->rq_auth_stat = rpc_autherr_badcred; - switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) @@ -394,8 +439,6 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) if (svc_is_backchannel(rqstp)) return SVC_DENIED; } - - rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index db69fc267c9a..ca8a4aa351dc 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -63,13 +63,14 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp) return htonl(NFS4_OK); } -/* - * svc_process_common() looks for an XDR encoder to know when - * not to drop a Reply. - */ -static bool nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p) { - return true; + return xdr_argsize_check(rqstp, p); +} + +static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p) +{ + return xdr_ressize_check(rqstp, p); } static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, @@ -983,17 +984,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) out_invalidcred: pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n"); - rqstp->rq_auth_stat = rpc_autherr_badcred; - return rpc_success; -} - -static int -nfs_callback_dispatch(struct svc_rqst *rqstp, __be32 *statp) -{ - const struct svc_procedure *procp = rqstp->rq_procinfo; - - *statp = procp->pc_func(rqstp); - return 1; + return svc_return_autherr(rqstp, rpc_autherr_badcred); } /* @@ -1062,18 +1053,16 @@ static struct callback_op callback_ops[] = { static const struct svc_procedure nfs4_callback_procedures1[] = { [CB_NULL] = { .pc_func = nfs4_callback_null, + .pc_decode = nfs4_decode_void, .pc_encode = nfs4_encode_void, .pc_xdrressize = 1, - .pc_name = "NULL", }, [CB_COMPOUND] = { .pc_func = nfs4_callback_compound, .pc_encode = nfs4_encode_void, .pc_argsize = 256, - .pc_argzero = 256, .pc_ressize = 256, .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, - .pc_name = "COMPOUND", } }; @@ -1084,7 +1073,7 @@ const struct svc_version nfs4_callback_version1 = { .vs_proc = nfs4_callback_procedures1, .vs_count = nfs4_callback_count1, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, - .vs_dispatch = nfs_callback_dispatch, + .vs_dispatch = NULL, .vs_hidden = true, .vs_need_cong_ctrl = true, }; @@ -1096,7 +1085,7 @@ const struct svc_version nfs4_callback_version4 = { .vs_proc = nfs4_callback_procedures1, .vs_count = nfs4_callback_count4, .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, - .vs_dispatch = nfs_callback_dispatch, + .vs_dispatch = NULL, .vs_hidden = true, .vs_need_cong_ctrl = true, }; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 935029632d5f..9f88ca7b2001 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -576,7 +576,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en goto out_nopages; xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); do { if (entry->label) diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 993be63ab301..3430d6891e89 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -167,25 +167,8 @@ out: return parent; } -static u64 nfs_fetch_iversion(struct inode *inode) -{ - struct nfs_server *server = NFS_SERVER(inode); - - if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE | - NFS_INO_REVAL_PAGECACHE)) - __nfs_revalidate_inode(server, inode); - return inode_peek_iversion_raw(inode); -} - const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, - .fetch_iversion = nfs_fetch_iversion, - .flags = EXPORT_OP_NOWCC | - EXPORT_OP_NOSUBTREECHK | - EXPORT_OP_CLOSE_BEFORE_UNLINK | - EXPORT_OP_REMOTE_FS | - EXPORT_OP_NOATOMIC_ATTR | - EXPORT_OP_FLUSH_ON_CLOSE, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d35aae47b062..7be1a7f7fcb2 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -798,9 +798,6 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) nfs_inc_stats(inode, NFSIOS_VFSLOCK); - if (fl->fl_flags & FL_RECLAIM) - return -ENOGRACE; - /* No mandatory locks over NFS */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) goto out_err; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 2ed8b6885b09..deecfb50dd7e 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -293,6 +293,8 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs41_sequence_done(task, &hdr->res.seq_res); @@ -664,7 +666,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, return -ENOMEM; xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), * num_fh (4) */ diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 86c3f7e69ec4..d913e818858f 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -82,7 +82,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err; xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); /* Get the stripe count (number of stripe index) */ p = xdr_inline_decode(&stream, 4); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a263bfec4244..e4f2820ba5a5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -378,7 +378,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); /* stripe unit and mirror_array_cnt */ rc = -EIO; @@ -1419,6 +1419,8 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index bfa7202ca7be..1f12297109b4 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -69,7 +69,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, INIT_LIST_HEAD(&dsaddrs); xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_page(&stream, scratch); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); /* multipath count */ p = xdr_inline_decode(&stream, 4); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index df5bee2f505c..f2248d9d4db5 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1536,7 +1536,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, struct compound_hdr hdr; int status; - xdr_set_scratch_page(xdr, res->scratch); + xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE); status = decode_compound_hdr(xdr, &hdr); if (status) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d8fc5d72a161..afb617a4a7e4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2757,7 +2757,7 @@ again: goto again; nfs_put_client(clp); - module_put_and_kthread_exit(0); + module_put_and_exit(0); return 0; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4e5c6cb770ad..f1e599553f2b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6404,8 +6404,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct compound_hdr hdr; int status; - if (res->acl_scratch != NULL) - xdr_set_scratch_page(xdr, res->acl_scratch); + if (res->acl_scratch != NULL) { + void *p = page_address(res->acl_scratch); + xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); + } status = decode_compound_hdr(xdr, &hdr); if (status) goto out; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d79a3b6cb070..17fef6eb490c 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -870,6 +870,9 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) struct nfs_pgio_header *hdr = calldata; struct inode *inode = hdr->inode; + dprintk("NFS: %s: %5u, (status %d)\n", __func__, + task->tk_pid, task->tk_status); + if (hdr->rw_ops->rw_done(task, hdr, inode) != 0) return; if (task->tk_status < 0) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1ffce9076060..b3fcc27b9564 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -86,11 +86,9 @@ const struct super_operations nfs_sops = { }; EXPORT_SYMBOL_GPL(nfs_sops); -#ifdef CONFIG_NFS_V4_2 static const struct nfs_ssc_client_ops nfs_ssc_clnt_ops_tbl = { .sco_sb_deactive = nfs_sb_deactive, }; -#endif #if IS_ENABLED(CONFIG_NFS_V4) static int __init register_nfs4_fs(void) @@ -113,7 +111,6 @@ static void unregister_nfs4_fs(void) } #endif -#ifdef CONFIG_NFS_V4_2 static void nfs_ssc_register_ops(void) { nfs_ssc_register(&nfs_ssc_clnt_ops_tbl); @@ -123,7 +120,6 @@ static void nfs_ssc_unregister_ops(void) { nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl); } -#endif /* CONFIG_NFS_V4_2 */ static struct shrinker acl_shrinker = { .count_objects = nfs_access_cache_count, @@ -152,9 +148,7 @@ int __init register_nfs_fs(void) ret = register_shrinker(&acl_shrinker); if (ret < 0) goto error_3; -#ifdef CONFIG_NFS_V4_2 nfs_ssc_register_ops(); -#endif return 0; error_3: nfs_unregister_sysctl(); @@ -174,9 +168,7 @@ void __exit unregister_nfs_fs(void) unregister_shrinker(&acl_shrinker); nfs_unregister_sysctl(); unregister_nfs4_fs(); -#ifdef CONFIG_NFS_V4_2 nfs_ssc_unregister_ops(); -#endif unregister_filesystem(&nfs_fs_type); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2bde35921f2b..4cf060691979 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1809,6 +1809,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_commit_data *data = calldata; + dprintk("NFS: %5u nfs_commit_done (status %d)\n", + task->tk_pid, task->tk_status); + /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); trace_nfs_commit_done(task, data); diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile index 119c75ab9fd0..fa82f5aaa6d9 100644 --- a/fs/nfs_common/Makefile +++ b/fs/nfs_common/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o nfs_acl-objs := nfsacl.o obj-$(CONFIG_GRACE_PERIOD) += grace.o -obj-$(CONFIG_NFS_V4_2_SSC_HELPER) += nfs_ssc.o +obj-$(CONFIG_GRACE_PERIOD) += nfs_ssc.o diff --git a/fs/nfs_common/nfs_ssc.c b/fs/nfs_common/nfs_ssc.c index 7c1509e968c8..f43bbb373913 100644 --- a/fs/nfs_common/nfs_ssc.c +++ b/fs/nfs_common/nfs_ssc.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * fs/nfs_common/nfs_ssc_comm.c + * * Helper for knfsd's SSC to access ops in NFS client modules * * Author: Dai Ngo diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 5a5bd85d08f8..d056ad2fdefd 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -136,77 +136,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, } EXPORT_SYMBOL_GPL(nfsacl_encode); -/** - * nfs_stream_encode_acl - Encode an NFSv3 ACL - * - * @xdr: an xdr_stream positioned to receive an encoded ACL - * @inode: inode of file whose ACL this is - * @acl: posix_acl to encode - * @encode_entries: whether to encode ACEs as well - * @typeflag: ACL type: NFS_ACL_DEFAULT or zero - * - * Return values: - * %false: The ACL could not be encoded - * %true: @xdr is advanced to the next available position - */ -bool nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode, - struct posix_acl *acl, int encode_entries, - int typeflag) -{ - const size_t elem_size = XDR_UNIT * 3; - u32 entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; - struct nfsacl_encode_desc nfsacl_desc = { - .desc = { - .elem_size = elem_size, - .array_len = encode_entries ? entries : 0, - .xcode = xdr_nfsace_encode, - }, - .acl = acl, - .typeflag = typeflag, - .uid = inode->i_uid, - .gid = inode->i_gid, - }; - struct nfsacl_simple_acl aclbuf; - unsigned int base; - int err; - - if (entries > NFS_ACL_MAX_ENTRIES) - return false; - if (xdr_stream_encode_u32(xdr, entries) < 0) - return false; - - if (encode_entries && acl && acl->a_count == 3) { - struct posix_acl *acl2 = &aclbuf.acl; - - /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is - * invoked in contexts where a memory allocation failure is - * fatal. Fortunately this fake ACL is small enough to - * construct on the stack. */ - posix_acl_init(acl2, 4); - - /* Insert entries in canonical order: other orders seem - to confuse Solaris VxFS. */ - acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ - acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */ - acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */ - acl2->a_entries[2].e_tag = ACL_MASK; - acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */ - nfsacl_desc.acl = acl2; - } - - base = xdr_stream_pos(xdr); - if (!xdr_reserve_space(xdr, XDR_UNIT + - elem_size * nfsacl_desc.desc.array_len)) - return false; - err = xdr_encode_array2(xdr->buf, base, &nfsacl_desc.desc); - if (err) - return false; - - return true; -} -EXPORT_SYMBOL_GPL(nfs_stream_encode_acl); - - struct nfsacl_decode_desc { struct xdr_array2_desc desc; unsigned int count; @@ -366,55 +295,3 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, nfsacl_desc.desc.array_len; } EXPORT_SYMBOL_GPL(nfsacl_decode); - -/** - * nfs_stream_decode_acl - Decode an NFSv3 ACL - * - * @xdr: an xdr_stream positioned at an encoded ACL - * @aclcnt: OUT: count of ACEs in decoded posix_acl - * @pacl: OUT: a dynamically-allocated buffer containing the decoded posix_acl - * - * Return values: - * %false: The encoded ACL is not valid - * %true: @pacl contains a decoded ACL, and @xdr is advanced - * - * On a successful return, caller must release *pacl using posix_acl_release(). - */ -bool nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt, - struct posix_acl **pacl) -{ - const size_t elem_size = XDR_UNIT * 3; - struct nfsacl_decode_desc nfsacl_desc = { - .desc = { - .elem_size = elem_size, - .xcode = pacl ? xdr_nfsace_decode : NULL, - }, - }; - unsigned int base; - u32 entries; - - if (xdr_stream_decode_u32(xdr, &entries) < 0) - return false; - if (entries > NFS_ACL_MAX_ENTRIES) - return false; - - base = xdr_stream_pos(xdr); - if (!xdr_inline_decode(xdr, XDR_UNIT + elem_size * entries)) - return false; - nfsacl_desc.desc.array_maxlen = entries; - if (xdr_decode_array2(xdr->buf, base, &nfsacl_desc.desc)) - return false; - - if (pacl) { - if (entries != nfsacl_desc.desc.array_len || - posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) { - posix_acl_release(nfsacl_desc.acl); - return false; - } - *pacl = nfsacl_desc.acl; - } - if (aclcnt) - *aclcnt = entries; - return true; -} -EXPORT_SYMBOL_GPL(nfs_stream_decode_acl); diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 6d2d498a5957..248f1459c039 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -8,7 +8,6 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL - select NFS_ACL_SUPPORT if NFSD_V3_ACL depends on MULTIUSER help Choose Y here if you want to allow other computers to access @@ -27,29 +26,28 @@ config NFSD Below you can choose which versions of the NFS protocol are available to clients mounting the NFS server on this system. - Support for NFS version 3 (RFC 1813) is always available when + Support for NFS version 2 (RFC 1094) is always available when CONFIG_NFSD is selected. If unsure, say N. -config NFSD_V2 - bool "NFS server support for NFS version 2 (DEPRECATED)" - depends on NFSD - default n - help - NFSv2 (RFC 1094) was the first publicly-released version of NFS. - Unless you are hosting ancient (1990's era) NFS clients, you don't - need this. - - If unsure, say N. - config NFSD_V2_ACL - bool "NFS server support for the NFSv2 ACL protocol extension" - depends on NFSD_V2 + bool + depends on NFSD + +config NFSD_V3 + bool "NFS server support for NFS version 3" + depends on NFSD + help + This option enables support in your system's NFS server for + version 3 of the NFS protocol (RFC 1813). + + If unsure, say Y. config NFSD_V3_ACL bool "NFS server support for the NFSv3 ACL protocol extension" - depends on NFSD + depends on NFSD_V3 + select NFSD_V2_ACL help Solaris NFS servers support an auxiliary NFSv3 ACL protocol that never became an official part of the NFS version 3 protocol. @@ -72,13 +70,13 @@ config NFSD_V3_ACL config NFSD_V4 bool "NFS server support for NFS version 4" depends on NFSD && PROC_FS + select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS select CRYPTO select CRYPTO_MD5 select CRYPTO_SHA256 select GRACE_PERIOD - select NFS_V4_2_SSC_HELPER if NFS_V4_2 help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). @@ -100,7 +98,7 @@ config NFSD_BLOCKLAYOUT help This option enables support for the exporting pNFS block layouts in the kernel's NFS server. The pNFS block layout enables NFS - clients to directly perform I/O to block devices accessible to both + clients to directly perform I/O to block devices accesible to both the server and the clients. See RFC 5663 for more details. If unsure, say N. @@ -114,7 +112,7 @@ config NFSD_SCSILAYOUT help This option enables support for the exporting pNFS SCSI layouts in the kernel's NFS server. The pNFS SCSI layout enables NFS - clients to directly perform I/O to SCSI devices accessible to both + clients to directly perform I/O to SCSI devices accesible to both the server and the clients. See draft-ietf-nfsv4-scsi-layout for more details. @@ -128,7 +126,7 @@ config NFSD_FLEXFILELAYOUT This option enables support for the exporting pNFS Flex File layouts in the kernel's NFS server. The pNFS Flex File layout enables NFS clients to directly perform I/O to NFSv3 devices - accessible to both the server and the clients. See + accesible to both the server and the clients. See draft-ietf-nfsv4-flex-files for more details. Warning, this server implements the bare minimum functionality @@ -139,7 +137,7 @@ config NFSD_FLEXFILELAYOUT config NFSD_V4_2_INTER_SSC bool "NFSv4.2 inter server to server COPY" - depends on NFSD_V4 && NFS_V4_2 + depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 help This option enables support for NFSv4.2 inter server to server copy where the destination server calls the NFSv4.2 diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 6fffc8f03f74..3f0983e93a99 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -10,11 +10,11 @@ obj-$(CONFIG_NFSD) += nfsd.o # this one should be compiled first, as the tracing macros can easily blow up nfsd-y += trace.o -nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \ - export.o auth.o lockd.o nfscache.o \ - stats.o filecache.o nfs3proc.o nfs3xdr.o -nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o +nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o nfsxdr.o \ + stats.o filecache.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o +nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfs4recover.o diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index 4b7324458a94..ba14d2f4b64f 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -38,8 +38,6 @@ struct nfs4_acl; struct svc_fh; struct svc_rqst; -struct nfsd_attrs; -enum nfs_ftype4; int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); @@ -47,7 +45,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl); -__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, - struct nfsd_attrs *attr); +__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl); #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index d91a686d2f31..a07c39c94bbd 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -16,7 +16,6 @@ #include "blocklayoutxdr.h" #include "pnfs.h" #include "filecache.h" -#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 1ed2f691ebb9..2455dc8be18a 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -9,7 +9,6 @@ #include "nfsd.h" #include "blocklayoutxdr.h" -#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index f21259ead64b..65c331f75e9c 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -84,6 +84,6 @@ int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); -int nfsd_reply_cache_stats_show(struct seq_file *m, void *v); +int nfsd_reply_cache_stats_open(struct inode *, struct file *); #endif /* NFSCACHE_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 7c863f2c21e0..21e404e7cb68 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -331,29 +331,12 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) fsloc->locations = NULL; } -static int export_stats_init(struct export_stats *stats) -{ - stats->start_time = ktime_get_seconds(); - return nfsd_percpu_counters_init(stats->counter, EXP_STATS_COUNTERS_NUM); -} - -static void export_stats_reset(struct export_stats *stats) -{ - nfsd_percpu_counters_reset(stats->counter, EXP_STATS_COUNTERS_NUM); -} - -static void export_stats_destroy(struct export_stats *stats) -{ - nfsd_percpu_counters_destroy(stats->counter, EXP_STATS_COUNTERS_NUM); -} - static void svc_export_put(struct kref *ref) { struct svc_export *exp = container_of(ref, struct svc_export, h.ref); path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); - export_stats_destroy(&exp->ex_stats); kfree(exp->ex_uuid); kfree_rcu(exp, ex_rcu); } @@ -425,12 +408,6 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid) return -EINVAL; } - if (inode->i_sb->s_export_op->flags & EXPORT_OP_NOSUBTREECHK && - !(*flags & NFSEXP_NOSUBTREECHECK)) { - dprintk("%s: %s does not support subtree checking!\n", - __func__, inode->i_sb->s_type->name); - return -EINVAL; - } return 0; } @@ -709,47 +686,22 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs); static void show_secinfo(struct seq_file *m, struct svc_export *exp); -static int is_export_stats_file(struct seq_file *m) -{ - /* - * The export_stats file uses the same ops as the exports file. - * We use the file's name to determine the reported info per export. - * There is no rename in nsfdfs, so d_name.name is stable. - */ - return !strcmp(m->file->f_path.dentry->d_name.name, "export_stats"); -} - static int svc_export_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) { - struct svc_export *exp; - bool export_stats = is_export_stats_file(m); + struct svc_export *exp ; - if (h == NULL) { - if (export_stats) - seq_puts(m, "#path domain start-time\n#\tstats\n"); - else - seq_puts(m, "#path domain(flags)\n"); + if (h ==NULL) { + seq_puts(m, "#path domain(flags)\n"); return 0; } exp = container_of(h, struct svc_export, h); seq_path(m, &exp->ex_path, " \t\n\\"); seq_putc(m, '\t'); seq_escape(m, exp->ex_client->name, " \t\n\\"); - if (export_stats) { - seq_printf(m, "\t%lld\n", exp->ex_stats.start_time); - seq_printf(m, "\tfh_stale: %lld\n", - percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_FH_STALE])); - seq_printf(m, "\tio_read: %lld\n", - percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_READ])); - seq_printf(m, "\tio_write: %lld\n", - percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_WRITE])); - seq_putc(m, '\n'); - return 0; - } seq_putc(m, '('); - if (test_bit(CACHE_VALID, &h->flags) && + if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) { exp_flags(m, exp->ex_flags, exp->ex_fsid, exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs); @@ -790,7 +742,6 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_layout_types = 0; new->ex_uuid = NULL; new->cd = item->cd; - export_stats_reset(&new->ex_stats); } static void export_update(struct cache_head *cnew, struct cache_head *citem) @@ -823,15 +774,10 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) static struct cache_head *svc_export_alloc(void) { struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL); - if (!i) + if (i) + return &i->h; + else return NULL; - - if (export_stats_init(&i->ex_stats)) { - kfree(i); - return NULL; - } - - return &i->h; } static const struct cache_detail svc_export_cache_template = { @@ -1293,14 +1239,10 @@ static int e_show(struct seq_file *m, void *p) struct cache_head *cp = p; struct svc_export *exp = container_of(cp, struct svc_export, h); struct cache_detail *cd = m->private; - bool export_stats = is_export_stats_file(m); if (p == SEQ_START_TOKEN) { seq_puts(m, "# Version 1.1\n"); - if (export_stats) - seq_puts(m, "# Path Client Start-time\n#\tStats\n"); - else - seq_puts(m, "# Path Client(Flags) # IPs\n"); + seq_puts(m, "# Path Client(Flags) # IPs\n"); return 0; } diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index d03f7f6a8642..e7daa1f246f0 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -6,7 +6,6 @@ #define NFSD_EXPORT_H #include -#include #include #include @@ -47,19 +46,6 @@ struct exp_flavor_info { u32 flags; }; -/* Per-export stats */ -enum { - EXP_STATS_FH_STALE, - EXP_STATS_IO_READ, - EXP_STATS_IO_WRITE, - EXP_STATS_COUNTERS_NUM -}; - -struct export_stats { - time64_t start_time; - struct percpu_counter counter[EXP_STATS_COUNTERS_NUM]; -}; - struct svc_export { struct cache_head h; struct auth_domain * ex_client; @@ -76,7 +62,6 @@ struct svc_export { struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; struct rcu_head ex_rcu; - struct export_stats ex_stats; }; /* an "export key" (expkey) maps a filehandlefragement to an @@ -115,6 +100,7 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *); int exp_rootfh(struct net *, struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); +__be32 nfserrno(int errno); static inline void exp_put(struct svc_export *exp) { diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 615ea8324911..e30e1ddc1ace 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1,32 +1,7 @@ -// SPDX-License-Identifier: GPL-2.0 /* - * The NFSD open file cache. + * Open file cache. * * (c) 2015 - Jeff Layton - * - * An nfsd_file object is a per-file collection of open state that binds - * together: - * - a struct file * - * - a user credential - * - a network namespace - * - a read-ahead context - * - monitoring for writeback errors - * - * nfsd_file objects are reference-counted. Consumers acquire a new - * object via the nfsd_file_acquire API. They manage their interest in - * the acquired object, and hence the object's reference count, via - * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file - * object: - * - * * non-garbage-collected: When a consumer wants to precisely control - * the lifetime of a file's open state, it acquires a non-garbage- - * collected nfsd_file. The final nfsd_file_put releases the open - * state immediately. - * - * * garbage-collected: When a consumer does not control the lifetime - * of open state, it acquires a garbage-collected nfsd_file. The - * final nfsd_file_put allows the open state to linger for a period - * during which it may be re-used. */ #include @@ -37,7 +12,6 @@ #include #include #include -#include #include "vfs.h" #include "nfsd.h" @@ -46,75 +20,63 @@ #include "filecache.h" #include "trace.h" +#define NFSDDBG_FACILITY NFSDDBG_FH + +/* FIXME: dynamically size this for the machine somehow? */ +#define NFSD_FILE_HASH_BITS 12 +#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_CACHE_UP (0) +#define NFSD_FILE_SHUTDOWN (1) +#define NFSD_FILE_LRU_THRESHOLD (4096UL) +#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) /* We only care about NFSD_MAY_READ/WRITE for this cache */ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) +struct nfsd_fcache_bucket { + struct hlist_head nfb_head; + spinlock_t nfb_lock; + unsigned int nfb_count; + unsigned int nfb_maxcount; +}; + static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); -static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); -static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); -static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); -static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { + struct list_head list; struct work_struct work; + struct net *net; spinlock_t lock; struct list_head freeme; + struct rcu_head rcu; }; static struct workqueue_struct *nfsd_filecache_wq __read_mostly; static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; +static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; -static unsigned long nfsd_file_flags; +static long nfsd_file_lru_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; +static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; -static struct rhltable nfsd_file_rhltable - ____cacheline_aligned_in_smp; +static DEFINE_SPINLOCK(laundrette_lock); +static LIST_HEAD(laundrettes); -static bool -nfsd_match_cred(const struct cred *c1, const struct cred *c2) -{ - int i; - - if (!uid_eq(c1->fsuid, c2->fsuid)) - return false; - if (!gid_eq(c1->fsgid, c2->fsgid)) - return false; - if (c1->group_info == NULL || c2->group_info == NULL) - return c1->group_info == c2->group_info; - if (c1->group_info->ngroups != c2->group_info->ngroups) - return false; - for (i = 0; i < c1->group_info->ngroups; i++) { - if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) - return false; - } - return true; -} - -static const struct rhashtable_params nfsd_file_rhash_params = { - .key_len = sizeof_field(struct nfsd_file, nf_inode), - .key_offset = offsetof(struct nfsd_file, nf_inode), - .head_offset = offsetof(struct nfsd_file, nf_rlist), - - /* - * Start with a single page hash table to reduce resizing churn - * on light workloads. - */ - .min_size = 256, - .automatic_shrinking = true, -}; +static void nfsd_file_gc(void); static void nfsd_file_schedule_laundrette(void) { - if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, - NFSD_LAUNDRETTE_DELAY); + long count = atomic_long_read(&nfsd_filecache_count); + + if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) + return; + + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -153,21 +115,22 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) +nfsd_file_mark_find_or_create(struct nfsd_file *nf) { int err; struct fsnotify_mark *mark; struct nfsd_file_mark *nfm = NULL, *new; + struct inode *inode = nf->nf_inode; do { - fsnotify_group_lock(nfsd_file_fsnotify_group); + mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, nfm_mark)); - fsnotify_group_unlock(nfsd_file_fsnotify_group); + mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); if (nfm) { fsnotify_put_mark(mark); break; @@ -175,9 +138,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) /* Avoid soft lockup race with nfsd_file_mark_put() */ fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); fsnotify_put_mark(mark); - } else { - fsnotify_group_unlock(nfsd_file_fsnotify_group); - } + } else + mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); /* allocate a new nfm */ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); @@ -208,91 +170,51 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) } static struct nfsd_file * -nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, - bool want_gc) +nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, + struct net *net) { struct nfsd_file *nf; nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); - if (unlikely(!nf)) - return NULL; - - INIT_LIST_HEAD(&nf->nf_lru); - nf->nf_birthtime = ktime_get(); - nf->nf_file = NULL; - nf->nf_cred = get_current_cred(); - nf->nf_net = net; - nf->nf_flags = want_gc ? - BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : - BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); - nf->nf_inode = inode; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = need; - nf->nf_mark = NULL; + if (nf) { + INIT_HLIST_NODE(&nf->nf_node); + INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_file = NULL; + nf->nf_cred = get_current_cred(); + nf->nf_net = net; + nf->nf_flags = 0; + nf->nf_inode = inode; + nf->nf_hashval = hashval; + refcount_set(&nf->nf_ref, 1); + nf->nf_may = may & NFSD_FILE_MAY_MASK; + if (may & NFSD_MAY_NOT_BREAK_LEASE) { + if (may & NFSD_MAY_WRITE) + __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); + if (may & NFSD_MAY_READ) + __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); + } + nf->nf_mark = NULL; + trace_nfsd_file_alloc(nf); + } return nf; } -/** - * nfsd_file_check_write_error - check for writeback errors on a file - * @nf: nfsd_file to check for writeback errors - * - * Check whether a nfsd_file has an unseen error. Reset the write - * verifier if so. - */ -static void -nfsd_file_check_write_error(struct nfsd_file *nf) -{ - struct file *file = nf->nf_file; - - if ((file->f_mode & FMODE_WRITE) && - filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err))) - nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); -} - -static void -nfsd_file_hash_remove(struct nfsd_file *nf) -{ - trace_nfsd_file_unhash(nf); - rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, - nfsd_file_rhash_params); -} - static bool -nfsd_file_unhash(struct nfsd_file *nf) -{ - if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_hash_remove(nf); - return true; - } - return false; -} - -static void nfsd_file_free(struct nfsd_file *nf) { - s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); + bool flush = false; - trace_nfsd_file_free(nf); - - this_cpu_inc(nfsd_file_releases); - this_cpu_add(nfsd_file_total_age, age); - - nfsd_file_unhash(nf); + trace_nfsd_file_put_final(nf); if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { - nfsd_file_check_write_error(nf); + get_file(nf->nf_file); filp_close(nf->nf_file, NULL); + fput(nf->nf_file); + flush = true; } - - /* - * If this item is still linked via nf_lru, that's a bug. - * WARN and leak it to preserve system stability. - */ - if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) - return; - call_rcu(&nf->nf_rcu, nfsd_file_slab_free); + return flush; } static bool @@ -301,140 +223,191 @@ nfsd_file_check_writeback(struct nfsd_file *nf) struct file *file = nf->nf_file; struct address_space *mapping; - /* File not open for write? */ - if (!(file->f_mode & FMODE_WRITE)) + if (!file || !(file->f_mode & FMODE_WRITE)) return false; - - /* - * Some filesystems (e.g. NFS) flush all dirty data on close. - * On others, there is no need to wait for writeback. - */ - if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) - return false; - mapping = file->f_mapping; return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } - -static bool nfsd_file_lru_add(struct nfsd_file *nf) +static int +nfsd_file_check_write_error(struct nfsd_file *nf) { - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { - trace_nfsd_file_lru_add(nf); + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return 0; + return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); +} + +static void +nfsd_file_do_unhash(struct nfsd_file *nf) +{ + lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + + trace_nfsd_file_unhash(nf); + + if (nfsd_file_check_write_error(nf)) + nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); + --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; + hlist_del_rcu(&nf->nf_node); + atomic_long_dec(&nfsd_filecache_count); +} + +static bool +nfsd_file_unhash(struct nfsd_file *nf) +{ + if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_do_unhash(nf); + if (!list_empty(&nf->nf_lru)) + list_lru_del(&nfsd_file_lru, &nf->nf_lru); return true; } return false; } -static bool nfsd_file_lru_remove(struct nfsd_file *nf) +/* + * Return true if the file was unhashed. + */ +static bool +nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { - trace_nfsd_file_lru_del(nf); + lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + + trace_nfsd_file_unhash_and_release_locked(nf); + if (!nfsd_file_unhash(nf)) + return false; + /* keep final reference for nfsd_file_lru_dispose */ + if (refcount_dec_not_one(&nf->nf_ref)) return true; + + list_add(&nf->nf_lru, dispose); + return true; +} + +static void +nfsd_file_put_noref(struct nfsd_file *nf) +{ + trace_nfsd_file_put(nf); + + if (refcount_dec_and_test(&nf->nf_ref)) { + WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); + nfsd_file_free(nf); } - return false; +} + +void +nfsd_file_put(struct nfsd_file *nf) +{ + bool is_hashed; + + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { + nfsd_file_put_noref(nf); + return; + } + + filemap_flush(nf->nf_file->f_mapping); + is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; + nfsd_file_put_noref(nf); + if (is_hashed) + nfsd_file_schedule_laundrette(); + if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) + nfsd_file_gc(); } struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { - if (nf && refcount_inc_not_zero(&nf->nf_ref)) + if (likely(refcount_inc_not_zero(&nf->nf_ref))) return nf; return NULL; } -/** - * nfsd_file_put - put the reference to a nfsd_file - * @nf: nfsd_file of which to put the reference - * - * Put a reference to a nfsd_file. In the non-GC case, we just put the - * reference immediately. In the GC case, if the reference would be - * the last one, the put it on the LRU instead to be cleaned up later. - */ -void -nfsd_file_put(struct nfsd_file *nf) -{ - might_sleep(); - trace_nfsd_file_put(nf); - - if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && - test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - /* - * If this is the last reference (nf_ref == 1), then try to - * transfer it to the LRU. - */ - if (refcount_dec_not_one(&nf->nf_ref)) - return; - - /* Try to add it to the LRU. If that fails, decrement. */ - if (nfsd_file_lru_add(nf)) { - /* If it's still hashed, we're done */ - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_schedule_laundrette(); - return; - } - - /* - * We're racing with unhashing, so try to remove it from - * the LRU. If removal fails, then someone else already - * has our reference. - */ - if (!nfsd_file_lru_remove(nf)) - return; - } - } - if (refcount_dec_and_test(&nf->nf_ref)) - nfsd_file_free(nf); -} - static void nfsd_file_dispose_list(struct list_head *dispose) { struct nfsd_file *nf; - while (!list_empty(dispose)) { + while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_free(nf); + list_del(&nf->nf_lru); + nfsd_file_put_noref(nf); + } +} + +static void +nfsd_file_dispose_list_sync(struct list_head *dispose) +{ + bool flush = false; + struct nfsd_file *nf; + + while(!list_empty(dispose)) { + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); + list_del(&nf->nf_lru); + if (!refcount_dec_and_test(&nf->nf_ref)) + continue; + if (nfsd_file_free(nf)) + flush = true; + } + if (flush) + flush_delayed_fput(); +} + +static void +nfsd_file_list_remove_disposal(struct list_head *dst, + struct nfsd_fcache_disposal *l) +{ + spin_lock(&l->lock); + list_splice_init(&l->freeme, dst); + spin_unlock(&l->lock); +} + +static void +nfsd_file_list_add_disposal(struct list_head *files, struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net == net) { + spin_lock(&l->lock); + list_splice_tail_init(files, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); + break; + } + } + rcu_read_unlock(); +} + +static void +nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, + struct net *net) +{ + struct nfsd_file *nf, *tmp; + + list_for_each_entry_safe(nf, tmp, src, nf_lru) { + if (nf->nf_net == net) + list_move_tail(&nf->nf_lru, dst); } } -/** - * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list - * @dispose: list of nfsd_files to be disposed - * - * Transfers each file to the "freeme" list for its nfsd_net, to eventually - * be disposed of by the per-net garbage collector. - */ static void nfsd_file_dispose_list_delayed(struct list_head *dispose) { - while(!list_empty(dispose)) { - struct nfsd_file *nf = list_first_entry(dispose, - struct nfsd_file, nf_lru); - struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); - struct nfsd_fcache_disposal *l = nn->fcache_disposal; + LIST_HEAD(list); + struct nfsd_file *nf; - spin_lock(&l->lock); - list_move_tail(&nf->nf_lru, &l->freeme); - spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); + while(!list_empty(dispose)) { + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); + nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); + nfsd_file_list_add_disposal(&list, nf->nf_net); } } -/** - * nfsd_file_lru_cb - Examine an entry on the LRU list - * @item: LRU entry to examine - * @lru: controlling LRU - * @lock: LRU list lock (unused) - * @arg: dispose list - * - * Return values: - * %LRU_REMOVED: @item was removed from the LRU - * %LRU_ROTATE: @item is to be moved to the LRU tail - * %LRU_SKIP: @item cannot be evicted +/* + * Note this can deadlock with nfsd_file_cache_purge. */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, @@ -445,60 +418,72 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); - /* We should only be dealing with GC entries here */ - WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); + /* + * Do a lockless refcount check. The hashtable holds one reference, so + * we look to see if anything else has a reference, or if any have + * been put since the shrinker last ran. Those don't get unhashed and + * released. + * + * Note that in the put path, we set the flag and then decrement the + * counter. Here we check the counter and then test and clear the flag. + * That order is deliberate to ensure that we can do this locklessly. + */ + if (refcount_read(&nf->nf_ref) > 1) + goto out_skip; /* * Don't throw out files that are still undergoing I/O or * that have uncleared errors pending. */ - if (nfsd_file_check_writeback(nf)) { - trace_nfsd_file_gc_writeback(nf); - return LRU_SKIP; - } + if (nfsd_file_check_writeback(nf)) + goto out_skip; - /* If it was recently added to the list, skip it */ - if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { - trace_nfsd_file_gc_referenced(nf); - return LRU_ROTATE; - } + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) + goto out_skip; - /* - * Put the reference held on behalf of the LRU. If it wasn't the last - * one, then just remove it from the LRU and ignore it. - */ - if (!refcount_dec_and_test(&nf->nf_ref)) { - trace_nfsd_file_gc_in_use(nf); - list_lru_isolate(lru, &nf->nf_lru); - return LRU_REMOVED; - } + if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) + goto out_skip; - /* Refcount went to zero. Unhash it and queue it to the dispose list */ - nfsd_file_unhash(nf); list_lru_isolate_move(lru, &nf->nf_lru, head); - this_cpu_inc(nfsd_file_evictions); - trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; +out_skip: + return LRU_SKIP; +} + +static unsigned long +nfsd_file_lru_walk_list(struct shrink_control *sc) +{ + LIST_HEAD(head); + struct nfsd_file *nf; + unsigned long ret; + + if (sc) + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &head); + else + ret = list_lru_walk(&nfsd_file_lru, + nfsd_file_lru_cb, + &head, LONG_MAX); + list_for_each_entry(nf, &head, nf_lru) { + spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + nfsd_file_do_unhash(nf); + spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + } + nfsd_file_dispose_list_delayed(&head); + return ret; } static void nfsd_file_gc(void) { - LIST_HEAD(dispose); - unsigned long ret; - - ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, - &dispose, list_lru_count(&nfsd_file_lru)); - trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_dispose_list_delayed(&dispose); + nfsd_file_lru_walk_list(NULL); } static void nfsd_file_gc_worker(struct work_struct *work) { nfsd_file_gc(); - if (list_lru_count(&nfsd_file_lru)) - nfsd_file_schedule_laundrette(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -510,14 +495,7 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - LIST_HEAD(dispose); - unsigned long ret; - - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, - nfsd_file_lru_cb, &dispose); - trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_dispose_list_delayed(&dispose); - return ret; + return nfsd_file_lru_walk_list(sc); } static struct shrinker nfsd_file_shrinker = { @@ -526,123 +504,70 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -/** - * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file - * @nf: nfsd_file to attempt to queue - * @dispose: private list to queue successfully-put objects - * - * Unhash an nfsd_file, try to get a reference to it, and then put that - * reference. If it's the last reference, queue it to the dispose list. - */ static void -nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) - __must_hold(RCU) +__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, + struct list_head *dispose) { - int decrement = 1; + struct nfsd_file *nf; + struct hlist_node *tmp; - /* If we raced with someone else unhashing, ignore it */ - if (!nfsd_file_unhash(nf)) - return; - - /* If we can't get a reference, ignore it */ - if (!nfsd_file_get(nf)) - return; - - /* Extra decrement if we remove from the LRU */ - if (nfsd_file_lru_remove(nf)) - ++decrement; - - /* If refcount goes to 0, then put on the dispose list */ - if (refcount_sub_and_test(decrement, &nf->nf_ref)) { - list_add(&nf->nf_lru, dispose); - trace_nfsd_file_closing(nf); + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); + hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { + if (inode == nf->nf_inode) + nfsd_file_unhash_and_release_locked(nf, dispose); } -} - -/** - * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode - * @inode: inode on which to close out nfsd_files - * @dispose: list on which to gather nfsd_files to close out - * - * An nfsd_file represents a struct file being held open on behalf of nfsd. - * An open file however can block other activity (such as leases), or cause - * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). - * - * This function is intended to find open nfsd_files when this sort of - * conflicting access occurs and then attempt to close those files out. - * - * Populates the dispose list with entries that have already had their - * refcounts go to zero. The actual free of an nfsd_file can be expensive, - * so we leave it up to the caller whether it wants to wait or not. - */ -static void -nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) -{ - struct rhlist_head *tmp, *list; - struct nfsd_file *nf; - - rcu_read_lock(); - list = rhltable_lookup(&nfsd_file_rhltable, &inode, - nfsd_file_rhash_params); - rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { - if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) - continue; - nfsd_file_cond_queue(nf, dispose); - } - rcu_read_unlock(); -} - -/** - * nfsd_file_close_inode - attempt a delayed close of a nfsd_file - * @inode: inode of the file to attempt to remove - * - * Close out any open nfsd_files that can be reaped for @inode. The - * actual freeing is deferred to the dispose_list_delayed infrastructure. - * - * This is used by the fsnotify callbacks and setlease notifier. - */ -static void -nfsd_file_close_inode(struct inode *inode) -{ - LIST_HEAD(dispose); - - nfsd_file_queue_for_close(inode, &dispose); - nfsd_file_dispose_list_delayed(&dispose); + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); } /** * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Close out any open nfsd_files that can be reaped for @inode. The - * nfsd_files are closed out synchronously. - * - * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames - * when reexporting NFS. + * Walk the whole hash bucket, looking for any files that correspond to "inode". + * If any do, then unhash them and put the hashtable reference to them and + * destroy any that had their last reference put. Also ensure that any of the + * fputs also have their final __fput done as well. */ void nfsd_file_close_inode_sync(struct inode *inode) { - struct nfsd_file *nf; + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, + NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); - trace_nfsd_file_close(inode); + __nfsd_file_close_inode(inode, hashval, &dispose); + trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); + nfsd_file_dispose_list_sync(&dispose); +} - nfsd_file_queue_for_close(inode, &dispose); - while (!list_empty(&dispose)) { - nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_free(nf); - } - flush_delayed_fput(); +/** + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * @inode: inode of the file to attempt to remove + * + * Walk the whole hash bucket, looking for any files that correspond to "inode". + * If any do, then unhash them and put the hashtable reference to them and + * destroy any that had their last reference put. + */ +static void +nfsd_file_close_inode(struct inode *inode) +{ + unsigned int hashval = (unsigned int)hash_long(inode->i_ino, + NFSD_FILE_HASH_BITS); + LIST_HEAD(dispose); + + __nfsd_file_close_inode(inode, hashval, &dispose); + trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); + nfsd_file_dispose_list_delayed(&dispose); } /** * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Scrape the freeme list for this nfsd_net, and then dispose of them - * all. + * Walk the LRU list and close any entries that have not been used since + * the last scan. + * + * Note this can deadlock with nfsd_file_cache_purge. */ static void nfsd_file_delayed_close(struct work_struct *work) @@ -651,10 +576,7 @@ nfsd_file_delayed_close(struct work_struct *work) struct nfsd_fcache_disposal *l = container_of(work, struct nfsd_fcache_disposal, work); - spin_lock(&l->lock); - list_splice_init(&l->freeme, &head); - spin_unlock(&l->lock); - + nfsd_file_list_remove_disposal(&head, l); nfsd_file_dispose_list(&head); } @@ -666,7 +588,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, /* Only close files for F_SETLEASE leases */ if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode(file_inode(fl->fl_file)); + nfsd_file_close_inode_sync(file_inode(fl->fl_file)); return 0; } @@ -679,9 +601,6 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { - if (WARN_ON_ONCE(!inode)) - return 0; - trace_nfsd_file_fsnotify_handle_event(inode, mask); /* Should be no marks on non-regular files */ @@ -709,21 +628,25 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = { int nfsd_file_cache_init(void) { - int ret; + int ret = -ENOMEM; + unsigned int i; - lockdep_assert_held(&nfsd_mutex); - if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) + clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); + + if (nfsd_file_hashtbl) return 0; - ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); - if (ret) - return ret; - - ret = -ENOMEM; nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); if (!nfsd_filecache_wq) goto out; + nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, + sizeof(*nfsd_file_hashtbl), GFP_KERNEL); + if (!nfsd_file_hashtbl) { + pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); + goto out_err; + } + nfsd_file_slab = kmem_cache_create("nfsd_file", sizeof(struct nfsd_file), 0, 0, NULL); if (!nfsd_file_slab) { @@ -757,16 +680,19 @@ nfsd_file_cache_init(void) goto out_shrinker; } - nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, - FSNOTIFY_GROUP_NOFS); + nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); - ret = PTR_ERR(nfsd_file_fsnotify_group); nfsd_file_fsnotify_group = NULL; goto out_notifier; } + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { + INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); + spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); + } + INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; @@ -781,47 +707,50 @@ out_err: nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; + kvfree(nfsd_file_hashtbl); + nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; - rhltable_destroy(&nfsd_file_rhltable); goto out; } -/** - * __nfsd_file_cache_purge: clean out the cache for shutdown - * @net: net-namespace to shut down the cache (may be NULL) - * - * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, - * then close out everything. Called when an nfsd instance is being shut down, - * and when the exports table is flushed. +/* + * Note this can deadlock with nfsd_file_lru_cb. */ -static void -__nfsd_file_cache_purge(struct net *net) +void +nfsd_file_cache_purge(struct net *net) { - struct rhashtable_iter iter; - struct nfsd_file *nf; + unsigned int i; + struct nfsd_file *nf; + struct hlist_node *next; LIST_HEAD(dispose); + bool del; - rhltable_walk_enter(&nfsd_file_rhltable, &iter); - do { - rhashtable_walk_start(&iter); + if (!nfsd_file_hashtbl) + return; - nf = rhashtable_walk_next(&iter); - while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) - nfsd_file_cond_queue(nf, &dispose); - nf = rhashtable_walk_next(&iter); + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { + struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; + + spin_lock(&nfb->nfb_lock); + hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { + if (net && nf->nf_net != net) + continue; + del = nfsd_file_unhash_and_release_locked(nf, &dispose); + + /* + * Deadlock detected! Something marked this entry as + * unhased, but hasn't removed it from the hash list. + */ + WARN_ON_ONCE(!del); } - - rhashtable_walk_stop(&iter); - } while (nf == ERR_PTR(-EAGAIN)); - rhashtable_walk_exit(&iter); - - nfsd_file_dispose_list(&dispose); + spin_unlock(&nfb->nfb_lock); + nfsd_file_dispose_list(&dispose); + } } static struct nfsd_fcache_disposal * -nfsd_alloc_fcache_disposal(void) +nfsd_alloc_fcache_disposal(struct net *net) { struct nfsd_fcache_disposal *l; @@ -829,6 +758,7 @@ nfsd_alloc_fcache_disposal(void) if (!l) return NULL; INIT_WORK(&l->work, nfsd_file_delayed_close); + l->net = net; spin_lock_init(&l->lock); INIT_LIST_HEAD(&l->freeme); return l; @@ -837,40 +767,61 @@ nfsd_alloc_fcache_disposal(void) static void nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) { + rcu_assign_pointer(l->net, NULL); cancel_work_sync(&l->work); nfsd_file_dispose_list(&l->freeme); - kfree(l); + kfree_rcu(l, rcu); +} + +static void +nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_add_tail_rcu(&l->list, &laundrettes); + spin_unlock(&laundrette_lock); +} + +static void +nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_del_rcu(&l->list); + spin_unlock(&laundrette_lock); +} + +static int +nfsd_alloc_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = nfsd_alloc_fcache_disposal(net); + if (!l) + return -ENOMEM; + nfsd_add_fcache_disposal(l); + return 0; } static void nfsd_free_fcache_disposal_net(struct net *net) { - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct nfsd_fcache_disposal *l = nn->fcache_disposal; + struct nfsd_fcache_disposal *l; - nfsd_free_fcache_disposal(l); + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net != net) + continue; + nfsd_del_fcache_disposal(l); + rcu_read_unlock(); + nfsd_free_fcache_disposal(l); + return; + } + rcu_read_unlock(); } int nfsd_file_cache_start_net(struct net *net) { - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - nn->fcache_disposal = nfsd_alloc_fcache_disposal(); - return nn->fcache_disposal ? 0 : -ENOMEM; -} - -/** - * nfsd_file_cache_purge - Remove all cache items associated with @net - * @net: target net namespace - * - */ -void -nfsd_file_cache_purge(struct net *net) -{ - lockdep_assert_held(&nfsd_mutex); - if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) - __nfsd_file_cache_purge(net); + return nfsd_alloc_fcache_disposal_net(net); } void @@ -883,11 +834,7 @@ nfsd_file_cache_shutdown_net(struct net *net) void nfsd_file_cache_shutdown(void) { - int i; - - lockdep_assert_held(&nfsd_mutex); - if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) - return; + set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); lease_unregister_notifier(&nfsd_file_lease_notifier); unregister_shrinker(&nfsd_file_shrinker); @@ -896,7 +843,7 @@ nfsd_file_cache_shutdown(void) * calling nfsd_file_cache_purge */ cancel_delayed_work_sync(&nfsd_filecache_laundrette); - __nfsd_file_cache_purge(NULL); + nfsd_file_cache_purge(NULL); list_lru_destroy(&nfsd_file_lru); rcu_barrier(); fsnotify_put_group(nfsd_file_fsnotify_group); @@ -906,332 +853,240 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; + kvfree(nfsd_file_hashtbl); + nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; - rhltable_destroy(&nfsd_file_rhltable); +} - for_each_possible_cpu(i) { - per_cpu(nfsd_file_cache_hits, i) = 0; - per_cpu(nfsd_file_acquisitions, i) = 0; - per_cpu(nfsd_file_releases, i) = 0; - per_cpu(nfsd_file_total_age, i) = 0; - per_cpu(nfsd_file_evictions, i) = 0; +static bool +nfsd_match_cred(const struct cred *c1, const struct cred *c2) +{ + int i; + + if (!uid_eq(c1->fsuid, c2->fsuid)) + return false; + if (!gid_eq(c1->fsgid, c2->fsgid)) + return false; + if (c1->group_info == NULL || c2->group_info == NULL) + return c1->group_info == c2->group_info; + if (c1->group_info->ngroups != c2->group_info->ngroups) + return false; + for (i = 0; i < c1->group_info->ngroups; i++) { + if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) + return false; } + return true; } static struct nfsd_file * -nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, - struct inode *inode, unsigned char need, - bool want_gc) +nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, + unsigned int hashval, struct net *net) { - struct rhlist_head *tmp, *list; struct nfsd_file *nf; + unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - list = rhltable_lookup(&nfsd_file_rhltable, &inode, - nfsd_file_rhash_params); - rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, + nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { if (nf->nf_may != need) continue; + if (nf->nf_inode != inode) + continue; if (nf->nf_net != net) continue; - if (!nfsd_match_cred(nf->nf_cred, cred)) + if (!nfsd_match_cred(nf->nf_cred, current_cred())) continue; - if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) continue; - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) - continue; - - if (!nfsd_file_get(nf)) - continue; - return nf; + if (nfsd_file_get(nf) != NULL) + return nf; } return NULL; } /** - * nfsd_file_is_cached - are there any cached open files for this inode? - * @inode: inode to check + * nfsd_file_is_cached - are there any cached open files for this fh? + * @inode: inode of the file to check * - * The lookup matches inodes in all net namespaces and is atomic wrt - * nfsd_file_acquire(). - * - * Return values: - * %true: filecache contains at least one file matching this inode - * %false: filecache contains no files matching this inode + * Scan the hashtable for open files that match this fh. Returns true if there + * are any, and false if not. */ bool nfsd_file_is_cached(struct inode *inode) { - struct rhlist_head *tmp, *list; - struct nfsd_file *nf; - bool ret = false; + bool ret = false; + struct nfsd_file *nf; + unsigned int hashval; + + hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); rcu_read_lock(); - list = rhltable_lookup(&nfsd_file_rhltable, &inode, - nfsd_file_rhash_params); - rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) - if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { + hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, + nf_node) { + if (inode == nf->nf_inode) { ret = true; break; } + } rcu_read_unlock(); - - trace_nfsd_file_is_cached(inode, (int)ret); + trace_nfsd_file_is_cached(inode, hashval, (int)ret); return ret; } -static __be32 -nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct file *file, - struct nfsd_file **pnf, bool want_gc) +__be32 +nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) { - unsigned char need = may_flags & NFSD_FILE_MAY_MASK; + __be32 status; struct net *net = SVC_NET(rqstp); - struct nfsd_file *new, *nf; - const struct cred *cred; - bool open_retry = true; + struct nfsd_file *nf, *new; struct inode *inode; - __be32 status; - int ret; + unsigned int hashval; + bool retry = true; + /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; - inode = d_inode(fhp->fh_dentry); - cred = get_current_cred(); + inode = d_inode(fhp->fh_dentry); + hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); retry: rcu_read_lock(); - nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); + nf = nfsd_file_find_locked(inode, may_flags, hashval, net); rcu_read_unlock(); - - if (nf) { - /* - * If the nf is on the LRU then it holds an extra reference - * that must be put if it's removed. It had better not be - * the last one however, since we should hold another. - */ - if (nfsd_file_lru_remove(nf)) - WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); + if (nf) goto wait_for_construction; - } - new = nfsd_file_alloc(net, inode, need, want_gc); + new = nfsd_file_alloc(inode, may_flags, hashval, net); if (!new) { - status = nfserr_jukebox; - goto out; + trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, + NULL, nfserr_jukebox); + return nfserr_jukebox; } - rcu_read_lock(); - spin_lock(&inode->i_lock); - nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); - if (unlikely(nf)) { - spin_unlock(&inode->i_lock); - rcu_read_unlock(); - nfsd_file_slab_free(&new->nf_rcu); - goto wait_for_construction; - } - nf = new; - ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, - nfsd_file_rhash_params); - spin_unlock(&inode->i_lock); - rcu_read_unlock(); - if (likely(ret == 0)) + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); + nf = nfsd_file_find_locked(inode, may_flags, hashval, net); + if (nf == NULL) goto open_file; - - if (ret == -EEXIST) - goto retry; - trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); - status = nfserr_jukebox; - goto construction_err; + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + nfsd_file_slab_free(&new->nf_rcu); wait_for_construction: wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); - if (!open_retry) { + if (!retry) { status = nfserr_jukebox; - goto construction_err; + goto out; } - open_retry = false; + retry = false; + nfsd_file_put_noref(nf); goto retry; } + this_cpu_inc(nfsd_file_cache_hits); - status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); - if (status != nfs_ok) { + if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { + bool write = (may_flags & NFSD_MAY_WRITE); + + if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || + (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { + status = nfserrno(nfsd_open_break_lease( + file_inode(nf->nf_file), may_flags)); + if (status == nfs_ok) { + clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); + if (write) + clear_bit(NFSD_FILE_BREAK_WRITE, + &nf->nf_flags); + } + } + } +out: + if (status == nfs_ok) { + *pnf = nf; + } else { nfsd_file_put(nf); nf = NULL; } -out: - if (status == nfs_ok) { - this_cpu_inc(nfsd_file_acquisitions); - nfsd_file_check_write_error(nf); - *pnf = nf; - } - put_cred(cred); - trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); + trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); return status; - open_file: - trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); - if (nf->nf_mark) { - if (file) { - get_file(file); - nf->nf_file = file; - status = nfs_ok; - trace_nfsd_file_opened(nf, status); - } else { - status = nfsd_open_verified(rqstp, fhp, may_flags, - &nf->nf_file); - trace_nfsd_file_open(nf, status); - } - } else + nf = new; + /* Take reference for the hashtable */ + refcount_inc(&nf->nf_ref); + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); + __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + list_lru_add(&nfsd_file_lru, &nf->nf_lru); + hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); + ++nfsd_file_hashtbl[hashval].nfb_count; + nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, + nfsd_file_hashtbl[hashval].nfb_count); + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) + nfsd_file_gc(); + + nf->nf_mark = nfsd_file_mark_find_or_create(nf); + if (nf->nf_mark) + status = nfsd_open_verified(rqstp, fhp, S_IFREG, + may_flags, &nf->nf_file); + else status = nfserr_jukebox; /* * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || inode->i_nlink == 0) - nfsd_file_unhash(nf); - clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); - if (status == nfs_ok) - goto out; - -construction_err: - if (refcount_dec_and_test(&nf->nf_ref)) - nfsd_file_free(nf); - nf = NULL; + if (status != nfs_ok || inode->i_nlink == 0) { + bool do_free; + spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); + do_free = nfsd_file_unhash(nf); + spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + if (do_free) + nfsd_file_put_noref(nf); + } + clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); + smp_mb__after_atomic(); + wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); goto out; } -/** - * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file - * @rqstp: the RPC transaction being executed - * @fhp: the NFS filehandle of the file to be opened - * @may_flags: NFSD_MAY_ settings for the file - * @pnf: OUT: new or found "struct nfsd_file" object - * - * The nfsd_file object returned by this API is reference-counted - * and garbage-collected. The object is retained for a few - * seconds after the final nfsd_file_put() in case the caller - * wants to re-use it. - * - * Return values: - * %nfs_ok - @pnf points to an nfsd_file with its reference - * count boosted. - * - * On error, an nfsstat value in network byte order is returned. - */ -__be32 -nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) -{ - return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); -} - -/** - * nfsd_file_acquire - Get a struct nfsd_file with an open file - * @rqstp: the RPC transaction being executed - * @fhp: the NFS filehandle of the file to be opened - * @may_flags: NFSD_MAY_ settings for the file - * @pnf: OUT: new or found "struct nfsd_file" object - * - * The nfsd_file_object returned by this API is reference-counted - * but not garbage-collected. The object is unhashed after the - * final nfsd_file_put(). - * - * Return values: - * %nfs_ok - @pnf points to an nfsd_file with its reference - * count boosted. - * - * On error, an nfsstat value in network byte order is returned. - */ -__be32 -nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) -{ - return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); -} - -/** - * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file - * @rqstp: the RPC transaction being executed - * @fhp: the NFS filehandle of the file just created - * @may_flags: NFSD_MAY_ settings for the file - * @file: cached, already-open file (may be NULL) - * @pnf: OUT: new or found "struct nfsd_file" object - * - * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, - * and @file is non-NULL, use it to instantiate a new nfsd_file instead of - * opening a new one. - * - * Return values: - * %nfs_ok - @pnf points to an nfsd_file with its reference - * count boosted. - * - * On error, an nfsstat value in network byte order is returned. - */ -__be32 -nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct file *file, - struct nfsd_file **pnf) -{ - return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); -} - /* * Note that fields may be added, removed or reordered in the future. Programs * scraping this file for info should test the labels to ensure they're * getting the correct field. */ -int nfsd_file_cache_stats_show(struct seq_file *m, void *v) +static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long releases = 0, evictions = 0; - unsigned long hits = 0, acquisitions = 0; - unsigned int i, count = 0, buckets = 0; - unsigned long lru = 0, total_age = 0; + unsigned int i, count = 0, longest = 0; + unsigned long hits = 0; - /* Serialize with server shutdown */ + /* + * No need for spinlocks here since we're not terribly interested in + * accuracy. We do take the nfsd_mutex simply to ensure that we + * don't end up racing with server shutdown + */ mutex_lock(&nfsd_mutex); - if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { - struct bucket_table *tbl; - struct rhashtable *ht; - - lru = list_lru_count(&nfsd_file_lru); - - rcu_read_lock(); - ht = &nfsd_file_rhltable.ht; - count = atomic_read(&ht->nelems); - tbl = rht_dereference_rcu(ht->tbl, ht); - buckets = tbl->size; - rcu_read_unlock(); + if (nfsd_file_hashtbl) { + for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { + count += nfsd_file_hashtbl[i].nfb_count; + longest = max(longest, nfsd_file_hashtbl[i].nfb_count); + } } mutex_unlock(&nfsd_mutex); - for_each_possible_cpu(i) { + for_each_possible_cpu(i) hits += per_cpu(nfsd_file_cache_hits, i); - acquisitions += per_cpu(nfsd_file_acquisitions, i); - releases += per_cpu(nfsd_file_releases, i); - total_age += per_cpu(nfsd_file_total_age, i); - evictions += per_cpu(nfsd_file_evictions, i); - } - seq_printf(m, "total inodes: %u\n", count); - seq_printf(m, "hash buckets: %u\n", buckets); - seq_printf(m, "lru entries: %lu\n", lru); + seq_printf(m, "total entries: %u\n", count); + seq_printf(m, "longest chain: %u\n", longest); seq_printf(m, "cache hits: %lu\n", hits); - seq_printf(m, "acquisitions: %lu\n", acquisitions); - seq_printf(m, "releases: %lu\n", releases); - seq_printf(m, "evictions: %lu\n", evictions); - if (releases) - seq_printf(m, "mean age (ms): %ld\n", total_age / releases); - else - seq_printf(m, "mean age (ms): -\n"); return 0; } + +int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, nfsd_file_cache_stats_show, NULL); +} diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index e54165a3224f..435ceab27897 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -29,23 +29,23 @@ struct nfsd_file_mark { * never be dereferenced, only used for comparison. */ struct nfsd_file { - struct rhlist_head nf_rlist; - void *nf_inode; + struct hlist_node nf_node; + struct list_head nf_lru; + struct rcu_head nf_rcu; struct file *nf_file; const struct cred *nf_cred; struct net *nf_net; #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) -#define NFSD_FILE_REFERENCED (2) -#define NFSD_FILE_GC (3) +#define NFSD_FILE_BREAK_READ (2) +#define NFSD_FILE_BREAK_WRITE (3) +#define NFSD_FILE_REFERENCED (4) unsigned long nf_flags; + struct inode *nf_inode; + unsigned int nf_hashval; refcount_t nf_ref; unsigned char nf_may; - struct nfsd_file_mark *nf_mark; - struct list_head nf_lru; - struct rcu_head nf_rcu; - ktime_t nf_birthtime; }; int nfsd_file_cache_init(void); @@ -57,12 +57,7 @@ void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); -__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct file *file, - struct nfsd_file **nfp); -int nfsd_file_cache_stats_show(struct seq_file *m, void *v); +int nfsd_file_cache_stats_open(struct inode *, struct file *); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index fabc21ed68ce..db7ef07ae50c 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -15,7 +15,6 @@ #include "flexfilelayoutxdr.h" #include "pnfs.h" -#include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PNFS @@ -62,7 +61,7 @@ nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, goto out_error; fl->fh.size = fhp->fh_handle.fh_size; - memcpy(fl->fh.data, &fhp->fh_handle.fh_raw, fl->fh.size); + memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size); /* Give whole file layout segments */ seg->offset = 0; diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 46a7f9b813e5..3f5b3d7b62b7 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -25,22 +25,18 @@ * Note: we hold the dentry use count while the file is open. */ static __be32 -nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp, - int mode) +nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) { __be32 nfserr; - int access; struct svc_fh fh; /* must initialize before using! but maxsize doesn't matter */ fh_init(&fh,0); fh.fh_handle.fh_size = f->size; - memcpy(&fh.fh_handle.fh_raw, f->data, f->size); + memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); fh.fh_export = NULL; - access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ; - access |= NFSD_MAY_LOCK; - nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp); + nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); fh_put(&fh); /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 51a4b7885cae..02d3d2f0e616 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -10,8 +10,6 @@ #include #include -#include -#include /* Hash tables for nfs4_clientid state */ #define CLIENT_HASH_BITS 4 @@ -23,14 +21,6 @@ struct cld_net; struct nfsd4_client_tracking_ops; -enum { - /* cache misses due only to checksum comparison failures */ - NFSD_NET_PAYLOAD_MISSES, - /* amount of memory (in bytes) currently consumed by the DRC */ - NFSD_NET_DRC_MEM_USAGE, - NFSD_NET_COUNTERS_NUM -}; - /* * Represents a nfsd "container". With respect to nfsv4 state tracking, the * fields of interest are the *_id_hashtbls and the *_name_tree. These track @@ -109,8 +99,9 @@ struct nfsd_net { bool nfsd_net_up; bool lockd_up; - seqlock_t writeverf_lock; - unsigned char writeverf[8]; + /* Time of server startup */ + struct timespec64 nfssvc_boot; + seqlock_t boot_lock; /* * Max number of connections this nfsd container will allow. Defaults @@ -123,13 +114,12 @@ struct nfsd_net { u32 clverifier_counter; struct svc_serv *nfsd_serv; - /* When a listening socket is added to nfsd, keep_active is set - * and this justifies a reference on nfsd_serv. This stops - * nfsd_serv from being freed. When the number of threads is - * set, keep_active is cleared and the reference is dropped. So - * when the last thread exits, the service will be destroyed. - */ - int keep_active; + + wait_queue_head_t ntf_wq; + atomic_t ntf_refcnt; + + /* Allow umount to wait for nfsd state cleanup */ + struct completion nfsd_shutdown_complete; /* * clientid and stateid data for construction of net unique COPY @@ -159,16 +149,20 @@ struct nfsd_net { /* * Stats and other tracking of on the duplicate reply cache. - * The longest_chain* fields are modified with only the per-bucket - * cache lock, which isn't really safe and should be fixed if we want - * these statistics to be completely accurate. + * These fields and the "rc" fields in nfsdstats are modified + * with only the per-bucket cache lock, which isn't really safe + * and should be fixed if we want the statistics to be + * completely accurate. */ /* total number of entries */ atomic_t num_drc_entries; - /* Per-netns stats counters */ - struct percpu_counter counter[NFSD_NET_COUNTERS_NUM]; + /* cache misses due only to checksum comparison failures */ + unsigned int payload_misses; + + /* amount of memory (in bytes) currently consumed by the DRC */ + unsigned int drc_mem_usage; /* longest hash chain seen */ unsigned int longest_chain; @@ -177,25 +171,8 @@ struct nfsd_net { unsigned int longest_chain_cachesize; struct shrinker nfsd_reply_cache_shrinker; - - /* tracking server-to-server copy mounts */ - spinlock_t nfsd_ssc_lock; - struct list_head nfsd_ssc_mount_list; - wait_queue_head_t nfsd_ssc_waitq; - /* utsname taken from the process that starts the server */ char nfsd_name[UNX_MAXNODENAME+1]; - - struct nfsd_fcache_disposal *fcache_disposal; - - siphash_key_t siphash_key; - - atomic_t nfs4_client_count; - int nfs4_max_clients; - - atomic_t nfsd_courtesy_clients; - struct shrinker nfsd_client_shrinker; - struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ @@ -205,6 +182,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn); extern unsigned int nfsd_net_id; -void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn); -void nfsd_reset_write_verifier(struct nfsd_net *nn); +void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn); +void nfsd_reset_boot_verifier(struct nfsd_net *nn); #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 9adf672dedbd..6a900f770dd2 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -111,7 +111,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - inode_lock(inode); + fh_lock(fh); error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) @@ -120,7 +120,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_drop_lock; - inode_unlock(inode); + fh_unlock(fh); fh_drop_write(fh); @@ -134,7 +134,7 @@ out: return rpc_success; out_drop_lock: - inode_unlock(inode); + fh_unlock(fh); fh_drop_write(fh); out_errno: resp->status = nfserrno(error); @@ -185,106 +185,161 @@ out: /* * XDR decode functions */ +static int nfsaclsvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +{ + return 1; +} -static bool -nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_getaclargs *argp = rqstp->rq_argp; - if (!svcxdr_decode_fhandle(xdr, &argp->fh)) - return false; - if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return false; + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) + return 0; + argp->mask = ntohl(*p); p++; - return true; + return xdr_argsize_check(rqstp, p); } -static bool -nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) + +static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_setaclargs *argp = rqstp->rq_argp; + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; - if (!svcxdr_decode_fhandle(xdr, &argp->fh)) - return false; - if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return false; - if (argp->mask & ~NFS_ACL_MASK) - return false; - if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? - &argp->acl_access : NULL)) - return false; - if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? - &argp->acl_default : NULL)) - return false; + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) + return 0; + argp->mask = ntohl(*p++); + if (argp->mask & ~NFS_ACL_MASK || + !xdr_argsize_check(rqstp, p)) + return 0; - return true; + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (argp->mask & NFS_ACL) ? + &argp->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (argp->mask & NFS_DFACL) ? + &argp->acl_default : NULL); + return (n > 0); } -static bool -nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) { - struct nfsd3_accessargs *args = rqstp->rq_argp; + struct nfsd_fhandle *argp = rqstp->rq_argp; - if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u32(xdr, &args->access) < 0) - return false; + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) + return 0; + return xdr_argsize_check(rqstp, p); +} - return true; +static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +{ + struct nfsd3_accessargs *argp = rqstp->rq_argp; + + p = nfs2svc_decode_fh(p, &argp->fh); + if (!p) + return 0; + argp->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); } /* * XDR encode functions */ +/* + * There must be an encoding function for void results so svc_process + * will work properly. + */ +static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +{ + return xdr_ressize_check(rqstp, p); +} + /* GETACL */ -static bool -nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct inode *inode; + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + int w; - if (!svcxdr_encode_stat(xdr, resp->status)) - return false; + *p++ = resp->status; + if (resp->status != nfs_ok) + return xdr_ressize_check(rqstp, p); + /* + * Since this is version 2, the check for nfserr in + * nfsd_dispatch actually ensures the following cannot happen. + * However, it seems fragile to depend on that. + */ if (dentry == NULL || d_really_is_negative(dentry)) - return true; + return 0; inode = d_inode(dentry); - if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return false; - if (xdr_stream_encode_u32(xdr, resp->mask) < 0) - return false; + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; - if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, - resp->mask & NFS_ACL, 0)) - return false; - if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, - resp->mask & NFS_DFACL, NFS_ACL_DEFAULT)) - return false; + rqstp->rq_res.page_len = w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + while (w > 0) { + if (!*(rqstp->rq_next_page++)) + return 0; + w -= PAGE_SIZE; + } - return true; + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + return (n > 0); +} + +static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) +{ + struct nfsd_attrstat *resp = rqstp->rq_resp; + + *p++ = resp->status; + if (resp->status != nfs_ok) + goto out; + + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); +out: + return xdr_ressize_check(rqstp, p); } /* ACCESS */ -static bool -nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_accessres *resp = rqstp->rq_resp; - if (!svcxdr_encode_stat(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return false; - if (xdr_stream_encode_u32(xdr, resp->access) < 0) - return false; - break; - } + *p++ = resp->status; + if (resp->status != nfs_ok) + goto out; - return true; + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); + *p++ = htonl(resp->access); +out: + return xdr_ressize_check(rqstp, p); } /* @@ -299,6 +354,13 @@ static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp) posix_acl_release(resp->acl_default); } +static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp) +{ + struct nfsd_attrstat *resp = rqstp->rq_resp; + + fh_put(&resp->fh); +} + static void nfsaclsvc_release_access(struct svc_rqst *rqstp) { struct nfsd3_accessres *resp = rqstp->rq_resp; @@ -316,14 +378,12 @@ struct nfsd3_voidargs { int dummy; }; static const struct svc_procedure nfsd_acl_procedures2[5] = { [ACLPROC2_NULL] = { .pc_func = nfsacld_proc_null, - .pc_decode = nfssvc_decode_voidarg, - .pc_encode = nfssvc_encode_voidres, - .pc_argsize = sizeof(struct nfsd_voidargs), - .pc_argzero = sizeof(struct nfsd_voidargs), - .pc_ressize = sizeof(struct nfsd_voidres), + .pc_decode = nfsaclsvc_decode_voidarg, + .pc_encode = nfsaclsvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd3_voidargs), + .pc_ressize = sizeof(struct nfsd3_voidargs), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, - .pc_name = "NULL", }, [ACLPROC2_GETACL] = { .pc_func = nfsacld_proc_getacl, @@ -331,35 +391,29 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfsaclsvc_encode_getaclres, .pc_release = nfsaclsvc_release_getacl, .pc_argsize = sizeof(struct nfsd3_getaclargs), - .pc_argzero = sizeof(struct nfsd3_getaclargs), .pc_ressize = sizeof(struct nfsd3_getaclres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+2*(1+ACL), - .pc_name = "GETACL", }, [ACLPROC2_SETACL] = { .pc_func = nfsacld_proc_setacl, .pc_decode = nfsaclsvc_decode_setaclargs, - .pc_encode = nfssvc_encode_attrstatres, - .pc_release = nfssvc_release_attrstat, + .pc_encode = nfsaclsvc_encode_attrstatres, + .pc_release = nfsaclsvc_release_attrstat, .pc_argsize = sizeof(struct nfsd3_setaclargs), - .pc_argzero = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, - .pc_name = "SETACL", }, [ACLPROC2_GETATTR] = { .pc_func = nfsacld_proc_getattr, - .pc_decode = nfssvc_decode_fhandleargs, - .pc_encode = nfssvc_encode_attrstatres, - .pc_release = nfssvc_release_attrstat, + .pc_decode = nfsaclsvc_decode_fhandleargs, + .pc_encode = nfsaclsvc_encode_attrstatres, + .pc_release = nfsaclsvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), - .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, - .pc_name = "GETATTR", }, [ACLPROC2_ACCESS] = { .pc_func = nfsacld_proc_access, @@ -367,11 +421,9 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { .pc_encode = nfsaclsvc_encode_accessres, .pc_release = nfsaclsvc_release_access, .pc_argsize = sizeof(struct nfsd3_accessargs), - .pc_argzero = sizeof(struct nfsd3_accessargs), .pc_ressize = sizeof(struct nfsd3_accessres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT+1, - .pc_name = "SETATTR", }, }; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 161f831b3a1b..34a394e50e1d 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -101,7 +101,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - inode_lock(inode); + fh_lock(fh); error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); if (error) @@ -109,7 +109,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); out_drop_lock: - inode_unlock(inode); + fh_unlock(fh); fh_drop_write(fh); out_errno: resp->status = nfserrno(error); @@ -124,39 +124,43 @@ out: /* * XDR decode functions */ - -static bool -nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_getaclargs *args = rqstp->rq_argp; - if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u32(xdr, &args->mask) < 0) - return false; + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) + return 0; + args->mask = ntohl(*p); p++; - return true; + return xdr_argsize_check(rqstp, p); } -static bool -nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) + +static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) { - struct nfsd3_setaclargs *argp = rqstp->rq_argp; + struct nfsd3_setaclargs *args = rqstp->rq_argp; + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; - if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh)) - return false; - if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) - return false; - if (argp->mask & ~NFS_ACL_MASK) - return false; - if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? - &argp->acl_access : NULL)) - return false; - if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? - &argp->acl_default : NULL)) - return false; + p = nfs3svc_decode_fh(p, &args->fh); + if (!p) + return 0; + args->mask = ntohl(*p++); + if (args->mask & ~NFS_ACL_MASK || + !xdr_argsize_check(rqstp, p)) + return 0; - return true; + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (args->mask & NFS_ACL) ? + &args->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (args->mask & NFS_DFACL) ? + &args->acl_default : NULL); + return (n > 0); } /* @@ -164,47 +168,59 @@ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) */ /* GETACL */ -static bool -nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; - struct inode *inode; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - inode = d_inode(dentry); - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - if (xdr_stream_encode_u32(xdr, resp->mask) < 0) - return false; + *p++ = resp->status; + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0 && dentry && d_really_is_positive(dentry)) { + struct inode *inode = d_inode(dentry); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + int w; - if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, - resp->mask & NFS_ACL, 0)) - return false; - if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, - resp->mask & NFS_DFACL, - NFS_ACL_DEFAULT)) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - } + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; - return true; + rqstp->rq_res.page_len = w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + while (w > 0) { + if (!*(rqstp->rq_next_page++)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + } else + if (!xdr_ressize_check(rqstp, p)) + return 0; + + return 1; } /* SETACL */ -static bool -nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_attrstat *resp = rqstp->rq_resp; - return svcxdr_encode_nfsstat3(xdr, resp->status) && - svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh); + *p++ = resp->status; + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); } /* @@ -229,14 +245,12 @@ struct nfsd3_voidargs { int dummy; }; static const struct svc_procedure nfsd_acl_procedures3[3] = { [ACLPROC3_NULL] = { .pc_func = nfsd3_proc_null, - .pc_decode = nfssvc_decode_voidarg, - .pc_encode = nfssvc_encode_voidres, - .pc_argsize = sizeof(struct nfsd_voidargs), - .pc_argzero = sizeof(struct nfsd_voidargs), - .pc_ressize = sizeof(struct nfsd_voidres), + .pc_decode = nfs3svc_decode_voidarg, + .pc_encode = nfs3svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd3_voidargs), + .pc_ressize = sizeof(struct nfsd3_voidargs), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, - .pc_name = "NULL", }, [ACLPROC3_GETACL] = { .pc_func = nfsd3_proc_getacl, @@ -244,11 +258,9 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_encode = nfs3svc_encode_getaclres, .pc_release = nfs3svc_release_getacl, .pc_argsize = sizeof(struct nfsd3_getaclargs), - .pc_argzero = sizeof(struct nfsd3_getaclargs), .pc_ressize = sizeof(struct nfsd3_getaclres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+2*(1+ACL), - .pc_name = "GETACL", }, [ACLPROC3_SETACL] = { .pc_func = nfsd3_proc_setacl, @@ -256,11 +268,9 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { .pc_encode = nfs3svc_encode_setaclres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_setaclargs), - .pc_argzero = sizeof(struct nfsd3_setaclargs), .pc_ressize = sizeof(struct nfsd3_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT, - .pc_name = "SETACL", }, }; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 19cf583096d9..981a4e4c9a3c 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -8,12 +8,10 @@ #include #include #include -#include #include "cache.h" #include "xdr3.h" #include "vfs.h" -#include "filecache.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -68,15 +66,12 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp) { struct nfsd3_sattrargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - struct nfsd_attrs attrs = { - .na_iattr = &argp->attrs, - }; dprintk("nfsd: SETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, + resp->status = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, argp->check_guard, argp->guardtime); return rpc_success; } @@ -129,7 +124,7 @@ nfsd3_proc_access(struct svc_rqst *rqstp) static __be32 nfsd3_proc_readlink(struct svc_rqst *rqstp) { - struct nfsd_fhandle *argp = rqstp->rq_argp; + struct nfsd3_readlinkargs *argp = rqstp->rq_argp; struct nfsd3_readlinkres *resp = rqstp->rq_resp; dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); @@ -137,9 +132,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp) /* Read the symlink. */ fh_copy(&resp->fh, &argp->fh); resp->len = NFS3_MAXPATHLEN; - resp->pages = rqstp->rq_next_page++; - resp->status = nfsd_readlink(rqstp, &resp->fh, - page_address(*resp->pages), &resp->len); + resp->status = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len); return rpc_success; } @@ -151,43 +144,25 @@ nfsd3_proc_read(struct svc_rqst *rqstp) { struct nfsd3_readargs *argp = rqstp->rq_argp; struct nfsd3_readres *resp = rqstp->rq_resp; - unsigned int len; - int v; + u32 max_blocksize = svc_max_payload(rqstp); + unsigned long cnt = min(argp->count, max_blocksize); dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), (unsigned long) argp->count, (unsigned long long) argp->offset); - argp->count = min_t(u32, argp->count, svc_max_payload(rqstp)); - argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); - if (argp->offset > (u64)OFFSET_MAX) - argp->offset = (u64)OFFSET_MAX; - if (argp->offset + argp->count > (u64)OFFSET_MAX) - argp->count = (u64)OFFSET_MAX - argp->offset; - - v = 0; - len = argp->count; - resp->pages = rqstp->rq_next_page; - while (len > 0) { - struct page *page = *(rqstp->rq_next_page++); - - rqstp->rq_vec[v].iov_base = page_address(page); - rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); - len -= rqstp->rq_vec[v].iov_len; - v++; - } - /* Obtain buffer pointer for payload. * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - resp->count = argp->count; + resp->count = cnt; svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, v, &resp->count, &resp->eof); + rqstp->rq_vec, argp->vlen, &resp->count, + &resp->eof); return rpc_success; } @@ -215,147 +190,32 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, &argp->payload); - + nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, + &argp->first, cnt); + if (!nvecs) { + resp->status = nfserr_io; + goto out; + } resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, rqstp->rq_vec, nvecs, &cnt, resp->committed, resp->verf); resp->count = cnt; +out: return rpc_success; } /* - * Implement NFSv3's unchecked, guarded, and exclusive CREATE - * semantics for regular files. Except for the created file, - * this operation is stateless on the server. - * - * Upon return, caller must release @fhp and @resfhp. + * With NFSv3, CREATE processing is a lot easier than with NFSv2. + * At least in theory; we'll see how it fares in practice when the + * first reports about SunOS compatibility problems start to pour in... */ -static __be32 -nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct svc_fh *resfhp, struct nfsd3_createargs *argp) -{ - struct iattr *iap = &argp->attrs; - struct dentry *parent, *child; - struct nfsd_attrs attrs = { - .na_iattr = iap, - }; - __u32 v_mtime, v_atime; - struct inode *inode; - __be32 status; - int host_err; - - if (isdotent(argp->name, argp->len)) - return nfserr_exist; - if (!(iap->ia_valid & ATTR_MODE)) - iap->ia_mode = 0; - - status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (status != nfs_ok) - return status; - - parent = fhp->fh_dentry; - inode = d_inode(parent); - - host_err = fh_want_write(fhp); - if (host_err) - return nfserrno(host_err); - - inode_lock_nested(inode, I_MUTEX_PARENT); - - child = lookup_one_len(argp->name, parent, argp->len); - if (IS_ERR(child)) { - status = nfserrno(PTR_ERR(child)); - goto out; - } - - if (d_really_is_negative(child)) { - status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); - if (status != nfs_ok) - goto out; - } - - status = fh_compose(resfhp, fhp->fh_export, child, fhp); - if (status != nfs_ok) - goto out; - - v_mtime = 0; - v_atime = 0; - if (argp->createmode == NFS3_CREATE_EXCLUSIVE) { - u32 *verifier = (u32 *)argp->verf; - - /* - * Solaris 7 gets confused (bugid 4218508) if these have - * the high bit set, as do xfs filesystems without the - * "bigtime" feature. So just clear the high bits. - */ - v_mtime = verifier[0] & 0x7fffffff; - v_atime = verifier[1] & 0x7fffffff; - } - - if (d_really_is_positive(child)) { - status = nfs_ok; - - switch (argp->createmode) { - case NFS3_CREATE_UNCHECKED: - if (!d_is_reg(child)) - break; - iap->ia_valid &= ATTR_SIZE; - goto set_attr; - case NFS3_CREATE_GUARDED: - status = nfserr_exist; - break; - case NFS3_CREATE_EXCLUSIVE: - if (d_inode(child)->i_mtime.tv_sec == v_mtime && - d_inode(child)->i_atime.tv_sec == v_atime && - d_inode(child)->i_size == 0) { - break; - } - status = nfserr_exist; - } - goto out; - } - - if (!IS_POSIXACL(inode)) - iap->ia_mode &= ~current_umask(); - - fh_fill_pre_attrs(fhp); - host_err = vfs_create(inode, child, iap->ia_mode, true); - if (host_err < 0) { - status = nfserrno(host_err); - goto out; - } - fh_fill_post_attrs(fhp); - - /* A newly created file already has a file size of zero. */ - if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) - iap->ia_valid &= ~ATTR_SIZE; - if (argp->createmode == NFS3_CREATE_EXCLUSIVE) { - iap->ia_valid = ATTR_MTIME | ATTR_ATIME | - ATTR_MTIME_SET | ATTR_ATIME_SET; - iap->ia_mtime.tv_sec = v_mtime; - iap->ia_atime.tv_sec = v_atime; - iap->ia_mtime.tv_nsec = 0; - iap->ia_atime.tv_nsec = 0; - } - -set_attr: - status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); - -out: - inode_unlock(inode); - if (child && !IS_ERR(child)) - dput(child); - fh_drop_write(fhp); - return status; -} - static __be32 nfsd3_proc_create(struct svc_rqst *rqstp) { struct nfsd3_createargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - svc_fh *dirfhp, *newfhp; + svc_fh *dirfhp, *newfhp = NULL; + struct iattr *attr; dprintk("nfsd: CREATE(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -364,8 +224,21 @@ nfsd3_proc_create(struct svc_rqst *rqstp) dirfhp = fh_copy(&resp->dirfh, &argp->fh); newfhp = fh_init(&resp->fh, NFS3_FHSIZE); + attr = &argp->attrs; - resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp); + /* Unfudge the mode bits */ + attr->ia_mode &= ~S_IFMT; + if (!(attr->ia_valid & ATTR_MODE)) { + attr->ia_valid |= ATTR_MODE; + attr->ia_mode = S_IFREG; + } else { + attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG; + } + + /* Now create the file and set attributes */ + resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, + attr, newfhp, argp->createmode, + (u32 *)argp->verf, NULL, NULL); return rpc_success; } @@ -377,9 +250,6 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd3_createargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - struct nfsd_attrs attrs = { - .na_iattr = &argp->attrs, - }; dprintk("nfsd: MKDIR(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -390,7 +260,8 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &attrs, S_IFDIR, 0, &resp->fh); + &argp->attrs, S_IFDIR, 0, &resp->fh); + fh_unlock(&resp->dirfh); return rpc_success; } @@ -399,9 +270,6 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) { struct nfsd3_symlinkargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - struct nfsd_attrs attrs = { - .na_iattr = &argp->attrs, - }; if (argp->tlen == 0) { resp->status = nfserr_inval; @@ -428,7 +296,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, - argp->flen, argp->tname, &attrs, &resp->fh); + argp->flen, argp->tname, &resp->fh); kfree(argp->tname); out: return rpc_success; @@ -442,9 +310,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) { struct nfsd3_mknodargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - struct nfsd_attrs attrs = { - .na_iattr = &argp->attrs, - }; int type; dev_t rdev = 0; @@ -470,7 +335,8 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) type = nfs3_ftypes[argp->ftype]; resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &attrs, type, rdev, &resp->fh); + &argp->attrs, type, rdev, &resp->fh); + fh_unlock(&resp->dirfh); out: return rpc_success; } @@ -493,6 +359,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); + fh_unlock(&resp->fh); return rpc_success; } @@ -513,6 +380,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); + fh_unlock(&resp->fh); return rpc_success; } @@ -558,26 +426,6 @@ nfsd3_proc_link(struct svc_rqst *rqstp) return rpc_success; } -static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, - struct nfsd3_readdirres *resp, - u32 count) -{ - struct xdr_buf *buf = &resp->dirlist; - struct xdr_stream *xdr = &resp->xdr; - unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen, - svc_max_payload(rqstp)); - - memset(buf, 0, sizeof(*buf)); - - /* Reserve room for the NULL ptr & eof flag (-2 words) */ - buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf); - buf->buflen -= XDR_UNIT * 2; - buf->pages = rqstp->rq_next_page; - rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; - - xdr_init_encode_pages(xdr, buf, buf->pages, NULL); -} - /* * Read a portion of a directory. */ @@ -586,26 +434,53 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) { struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; - loff_t offset; + int count = 0; + struct page **p; + caddr_t page_addr = NULL; dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, (u32) argp->cookie); - nfsd3_init_dirlist_pages(rqstp, resp, argp->count); + /* Make sure we've room for the NULL ptr & eof flag, and shrink to + * client read size */ + count = (argp->count >> 2) - 2; + /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); - resp->common.err = nfs_ok; - resp->cookie_offset = 0; - resp->rqstp = rqstp; - offset = argp->cookie; - resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, - &resp->common, nfs3svc_encode_entry3); - memcpy(resp->verf, argp->verf, 8); - nfs3svc_encode_cookie3(resp, offset); - /* Recycle only pages that were part of the reply */ - rqstp->rq_next_page = resp->xdr.page_ptr + 1; + resp->buflen = count; + resp->common.err = nfs_ok; + resp->buffer = argp->buffer; + resp->rqstp = rqstp; + resp->status = nfsd_readdir(rqstp, &resp->fh, (loff_t *)&argp->cookie, + &resp->common, nfs3svc_encode_entry); + memcpy(resp->verf, argp->verf, 8); + count = 0; + for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { + page_addr = page_address(*p); + + if (((caddr_t)resp->buffer >= page_addr) && + ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { + count += (caddr_t)resp->buffer - page_addr; + break; + } + count += PAGE_SIZE; + } + resp->count = count >> 2; + if (resp->offset) { + loff_t offset = argp->cookie; + + if (unlikely(resp->offset1)) { + /* we ended up with offset on a page boundary */ + *resp->offset = htonl(offset >> 32); + *resp->offset1 = htonl(offset & 0xffffffff); + resp->offset1 = NULL; + } else { + xdr_encode_hyper(resp->offset, offset); + } + resp->offset = NULL; + } return rpc_success; } @@ -619,17 +494,25 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) { struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; + int count = 0; loff_t offset; + struct page **p; + caddr_t page_addr = NULL; dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, (u32) argp->cookie); - nfsd3_init_dirlist_pages(rqstp, resp, argp->count); + /* Convert byte count to number of words (i.e. >> 2), + * and reserve room for the NULL ptr & eof flag (-2 words) */ + resp->count = (argp->count >> 2) - 2; + /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); + resp->common.err = nfs_ok; - resp->cookie_offset = 0; + resp->buffer = argp->buffer; + resp->buflen = resp->count; resp->rqstp = rqstp; offset = argp->cookie; @@ -643,12 +526,30 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) } resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, - &resp->common, nfs3svc_encode_entryplus3); + &resp->common, nfs3svc_encode_entry_plus); memcpy(resp->verf, argp->verf, 8); - nfs3svc_encode_cookie3(resp, offset); + for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { + page_addr = page_address(*p); - /* Recycle only pages that were part of the reply */ - rqstp->rq_next_page = resp->xdr.page_ptr + 1; + if (((caddr_t)resp->buffer >= page_addr) && + ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { + count += (caddr_t)resp->buffer - page_addr; + break; + } + count += PAGE_SIZE; + } + resp->count = count >> 2; + if (resp->offset) { + if (unlikely(resp->offset1)) { + /* we ended up with offset on a page boundary */ + *resp->offset = htonl(offset >> 32); + *resp->offset1 = htonl(offset & 0xffffffff); + resp->offset1 = NULL; + } else { + xdr_encode_hyper(resp->offset, offset); + } + resp->offset = NULL; + } out: return rpc_success; @@ -764,21 +665,20 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) { struct nfsd3_commitargs *argp = rqstp->rq_argp; struct nfsd3_commitres *resp = rqstp->rq_resp; - struct nfsd_file *nf; dprintk("nfsd: COMMIT(3) %s %u@%Lu\n", SVCFH_fmt(&argp->fh), argp->count, (unsigned long long) argp->offset); - fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE | - NFSD_MAY_NOT_BREAK_LEASE, &nf); - if (resp->status) + if (argp->offset > NFS_OFFSET_MAX) { + resp->status = nfserr_inval; goto out; - resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, + } + + fh_copy(&resp->fh, &argp->fh); + resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count, resp->verf); - nfsd_file_put(nf); out: return rpc_success; } @@ -788,14 +688,18 @@ out: * NFSv3 Server procedures. * Only the results of non-idempotent operations are cached. */ +#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle #define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat #define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat #define nfsd3_mkdirargs nfsd3_createargs #define nfsd3_readdirplusargs nfsd3_readdirargs #define nfsd3_fhandleargs nfsd_fhandle +#define nfsd3_fhandleres nfsd3_attrstat #define nfsd3_attrstatres nfsd3_attrstat #define nfsd3_wccstatres nfsd3_attrstat #define nfsd3_createres nfsd3_diropres +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; #define ST 1 /* status*/ #define FH 17 /* filehandle with length */ @@ -806,26 +710,22 @@ out: static const struct svc_procedure nfsd_procedures3[22] = { [NFS3PROC_NULL] = { .pc_func = nfsd3_proc_null, - .pc_decode = nfssvc_decode_voidarg, - .pc_encode = nfssvc_encode_voidres, - .pc_argsize = sizeof(struct nfsd_voidargs), - .pc_argzero = sizeof(struct nfsd_voidargs), - .pc_ressize = sizeof(struct nfsd_voidres), + .pc_decode = nfs3svc_decode_voidarg, + .pc_encode = nfs3svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd3_voidargs), + .pc_ressize = sizeof(struct nfsd3_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, - .pc_name = "NULL", }, [NFS3PROC_GETATTR] = { .pc_func = nfsd3_proc_getattr, .pc_decode = nfs3svc_decode_fhandleargs, - .pc_encode = nfs3svc_encode_getattrres, + .pc_encode = nfs3svc_encode_attrstatres, .pc_release = nfs3svc_release_fhandle, - .pc_argsize = sizeof(struct nfsd_fhandle), - .pc_argzero = sizeof(struct nfsd_fhandle), + .pc_argsize = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_attrstatres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, - .pc_name = "GETATTR", }, [NFS3PROC_SETATTR] = { .pc_func = nfsd3_proc_setattr, @@ -833,23 +733,19 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_sattrargs), - .pc_argzero = sizeof(struct nfsd3_sattrargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, - .pc_name = "SETATTR", }, [NFS3PROC_LOOKUP] = { .pc_func = nfsd3_proc_lookup, .pc_decode = nfs3svc_decode_diropargs, - .pc_encode = nfs3svc_encode_lookupres, + .pc_encode = nfs3svc_encode_diropres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_diropargs), - .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_diropres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+FH+pAT+pAT, - .pc_name = "LOOKUP", }, [NFS3PROC_ACCESS] = { .pc_func = nfsd3_proc_access, @@ -857,23 +753,19 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_accessres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_accessargs), - .pc_argzero = sizeof(struct nfsd3_accessargs), .pc_ressize = sizeof(struct nfsd3_accessres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+1, - .pc_name = "ACCESS", }, [NFS3PROC_READLINK] = { .pc_func = nfsd3_proc_readlink, - .pc_decode = nfs3svc_decode_fhandleargs, + .pc_decode = nfs3svc_decode_readlinkargs, .pc_encode = nfs3svc_encode_readlinkres, .pc_release = nfs3svc_release_fhandle, - .pc_argsize = sizeof(struct nfsd_fhandle), - .pc_argzero = sizeof(struct nfsd_fhandle), + .pc_argsize = sizeof(struct nfsd3_readlinkargs), .pc_ressize = sizeof(struct nfsd3_readlinkres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4, - .pc_name = "READLINK", }, [NFS3PROC_READ] = { .pc_func = nfsd3_proc_read, @@ -881,11 +773,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readargs), - .pc_argzero = sizeof(struct nfsd3_readargs), .pc_ressize = sizeof(struct nfsd3_readres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4, - .pc_name = "READ", }, [NFS3PROC_WRITE] = { .pc_func = nfsd3_proc_write, @@ -893,11 +783,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_writeres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_writeargs), - .pc_argzero = sizeof(struct nfsd3_writeargs), .pc_ressize = sizeof(struct nfsd3_writeres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC+4, - .pc_name = "WRITE", }, [NFS3PROC_CREATE] = { .pc_func = nfsd3_proc_create, @@ -905,11 +793,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_createargs), - .pc_argzero = sizeof(struct nfsd3_createargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, - .pc_name = "CREATE", }, [NFS3PROC_MKDIR] = { .pc_func = nfsd3_proc_mkdir, @@ -917,11 +803,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_mkdirargs), - .pc_argzero = sizeof(struct nfsd3_mkdirargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, - .pc_name = "MKDIR", }, [NFS3PROC_SYMLINK] = { .pc_func = nfsd3_proc_symlink, @@ -929,11 +813,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_symlinkargs), - .pc_argzero = sizeof(struct nfsd3_symlinkargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, - .pc_name = "SYMLINK", }, [NFS3PROC_MKNOD] = { .pc_func = nfsd3_proc_mknod, @@ -941,11 +823,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_mknodargs), - .pc_argzero = sizeof(struct nfsd3_mknodargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, - .pc_name = "MKNOD", }, [NFS3PROC_REMOVE] = { .pc_func = nfsd3_proc_remove, @@ -953,11 +833,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_diropargs), - .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, - .pc_name = "REMOVE", }, [NFS3PROC_RMDIR] = { .pc_func = nfsd3_proc_rmdir, @@ -965,11 +843,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_diropargs), - .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, - .pc_name = "RMDIR", }, [NFS3PROC_RENAME] = { .pc_func = nfsd3_proc_rename, @@ -977,11 +853,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_renameres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_renameargs), - .pc_argzero = sizeof(struct nfsd3_renameargs), .pc_ressize = sizeof(struct nfsd3_renameres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC+WC, - .pc_name = "RENAME", }, [NFS3PROC_LINK] = { .pc_func = nfsd3_proc_link, @@ -989,11 +863,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_linkres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_linkargs), - .pc_argzero = sizeof(struct nfsd3_linkargs), .pc_ressize = sizeof(struct nfsd3_linkres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+pAT+WC, - .pc_name = "LINK", }, [NFS3PROC_READDIR] = { .pc_func = nfsd3_proc_readdir, @@ -1001,10 +873,8 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readdirres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readdirargs), - .pc_argzero = sizeof(struct nfsd3_readdirargs), .pc_ressize = sizeof(struct nfsd3_readdirres), .pc_cachetype = RC_NOCACHE, - .pc_name = "READDIR", }, [NFS3PROC_READDIRPLUS] = { .pc_func = nfsd3_proc_readdirplus, @@ -1012,43 +882,35 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readdirres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readdirplusargs), - .pc_argzero = sizeof(struct nfsd3_readdirplusargs), .pc_ressize = sizeof(struct nfsd3_readdirres), .pc_cachetype = RC_NOCACHE, - .pc_name = "READDIRPLUS", }, [NFS3PROC_FSSTAT] = { .pc_func = nfsd3_proc_fsstat, .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_fsstatres, .pc_argsize = sizeof(struct nfsd3_fhandleargs), - .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_fsstatres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+2*6+1, - .pc_name = "FSSTAT", }, [NFS3PROC_FSINFO] = { .pc_func = nfsd3_proc_fsinfo, .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_fsinfores, .pc_argsize = sizeof(struct nfsd3_fhandleargs), - .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_fsinfores), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+12, - .pc_name = "FSINFO", }, [NFS3PROC_PATHCONF] = { .pc_func = nfsd3_proc_pathconf, .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_pathconfres, .pc_argsize = sizeof(struct nfsd3_fhandleargs), - .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_pathconfres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+6, - .pc_name = "PATHCONF", }, [NFS3PROC_COMMIT] = { .pc_func = nfsd3_proc_commit, @@ -1056,11 +918,9 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_commitres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_commitargs), - .pc_argzero = sizeof(struct nfsd3_commitargs), .pc_ressize = sizeof(struct nfsd3_commitres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+WC+2, - .pc_name = "COMMIT", }, }; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 3308dd671ef0..716566da400e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -14,26 +14,13 @@ #include "netns.h" #include "vfs.h" -/* - * Force construction of an empty post-op attr - */ -static const struct svc_fh nfs3svc_null_fh = { - .fh_no_wcc = true, -}; +#define NFSDDBG_FACILITY NFSDDBG_XDR -/* - * time_delta. {1, 0} means the server is accurate only - * to the nearest second. - */ -static const struct timespec64 nfs3svc_time_delta = { - .tv_sec = 1, - .tv_nsec = 0, -}; /* * Mapping of S_IF* types to NFS file types */ -static const u32 nfs3_ftypes[] = { +static u32 nfs3_ftypes[] = { NF3NON, NF3FIFO, NF3CHR, NF3BAD, NF3DIR, NF3BAD, NF3BLK, NF3BAD, NF3REG, NF3BAD, NF3LNK, NF3BAD, @@ -42,938 +29,824 @@ static const u32 nfs3_ftypes[] = { /* - * Basic NFSv3 data types (RFC 1813 Sections 2.5 and 2.6) + * XDR functions for basic NFS types */ +static __be32 * +encode_time3(__be32 *p, struct timespec64 *time) +{ + *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); + return p; +} static __be32 * -encode_nfstime3(__be32 *p, const struct timespec64 *time) +decode_time3(__be32 *p, struct timespec64 *time) { - *p++ = cpu_to_be32((u32)time->tv_sec); - *p++ = cpu_to_be32(time->tv_nsec); + time->tv_sec = ntohl(*p++); + time->tv_nsec = ntohl(*p++); + return p; +} + +static __be32 * +decode_fh(__be32 *p, struct svc_fh *fhp) +{ + unsigned int size; + fh_init(fhp, NFS3_FHSIZE); + size = ntohl(*p++); + if (size > NFS3_FHSIZE) + return NULL; + + memcpy(&fhp->fh_handle.fh_base, p, size); + fhp->fh_handle.fh_size = size; + return p + XDR_QUADLEN(size); +} + +/* Helper function for NFSv3 ACL code */ +__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + +static __be32 * +encode_fh(__be32 *p, struct svc_fh *fhp) +{ + unsigned int size = fhp->fh_handle.fh_size; + *p++ = htonl(size); + if (size) p[XDR_QUADLEN(size)-1]=0; + memcpy(p, &fhp->fh_handle.fh_base, size); + return p + XDR_QUADLEN(size); +} + +/* + * Decode a file name and make sure that the path contains + * no slashes or null bytes. + */ +static __be32 * +decode_filename(__be32 *p, char **namp, unsigned int *lenp) +{ + char *name; + unsigned int i; + + if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0' || *name == '/') + return NULL; + } + } return p; } -static bool -svcxdr_decode_nfstime3(struct xdr_stream *xdr, struct timespec64 *timep) +static __be32 * +decode_sattr3(__be32 *p, struct iattr *iap, struct user_namespace *userns) { - __be32 *p; - - p = xdr_inline_decode(xdr, XDR_UNIT * 2); - if (!p) - return false; - timep->tv_sec = be32_to_cpup(p++); - timep->tv_nsec = be32_to_cpup(p); - - return true; -} - -/** - * svcxdr_decode_nfs_fh3 - Decode an NFSv3 file handle - * @xdr: XDR stream positioned at an undecoded NFSv3 FH - * @fhp: OUT: filled-in server file handle - * - * Return values: - * %false: The encoded file handle was not valid - * %true: @fhp has been initialized - */ -bool -svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) -{ - __be32 *p; - u32 size; - - if (xdr_stream_decode_u32(xdr, &size) < 0) - return false; - if (size == 0 || size > NFS3_FHSIZE) - return false; - p = xdr_inline_decode(xdr, size); - if (!p) - return false; - fh_init(fhp, NFS3_FHSIZE); - fhp->fh_handle.fh_size = size; - memcpy(&fhp->fh_handle.fh_raw, p, size); - - return true; -} - -/** - * svcxdr_encode_nfsstat3 - Encode an NFSv3 status code - * @xdr: XDR stream - * @status: status value to encode - * - * Return values: - * %false: Send buffer space was exhausted - * %true: Success - */ -bool -svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, sizeof(status)); - if (!p) - return false; - *p = status; - - return true; -} - -static bool -svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) -{ - u32 size = fhp->fh_handle.fh_size; - __be32 *p; - - p = xdr_reserve_space(xdr, XDR_UNIT + size); - if (!p) - return false; - *p++ = cpu_to_be32(size); - if (size) - p[XDR_QUADLEN(size) - 1] = 0; - memcpy(p, &fhp->fh_handle.fh_raw, size); - - return true; -} - -static bool -svcxdr_encode_post_op_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) -{ - if (xdr_stream_encode_item_present(xdr) < 0) - return false; - if (!svcxdr_encode_nfs_fh3(xdr, fhp)) - return false; - - return true; -} - -static bool -svcxdr_encode_cookieverf3(struct xdr_stream *xdr, const __be32 *verf) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, NFS3_COOKIEVERFSIZE); - if (!p) - return false; - memcpy(p, verf, NFS3_COOKIEVERFSIZE); - - return true; -} - -static bool -svcxdr_encode_writeverf3(struct xdr_stream *xdr, const __be32 *verf) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, NFS3_WRITEVERFSIZE); - if (!p) - return false; - memcpy(p, verf, NFS3_WRITEVERFSIZE); - - return true; -} - -static bool -svcxdr_decode_filename3(struct xdr_stream *xdr, char **name, unsigned int *len) -{ - u32 size, i; - __be32 *p; - char *c; - - if (xdr_stream_decode_u32(xdr, &size) < 0) - return false; - if (size == 0 || size > NFS3_MAXNAMLEN) - return false; - p = xdr_inline_decode(xdr, size); - if (!p) - return false; - - *len = size; - *name = (char *)p; - for (i = 0, c = *name; i < size; i++, c++) { - if (*c == '\0' || *c == '/') - return false; - } - - return true; -} - -static bool -svcxdr_decode_diropargs3(struct xdr_stream *xdr, struct svc_fh *fhp, - char **name, unsigned int *len) -{ - return svcxdr_decode_nfs_fh3(xdr, fhp) && - svcxdr_decode_filename3(xdr, name, len); -} - -static bool -svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, - struct iattr *iap) -{ - u32 set_it; + u32 tmp; iap->ia_valid = 0; - if (xdr_stream_decode_bool(xdr, &set_it) < 0) - return false; - if (set_it) { - u32 mode; - - if (xdr_stream_decode_u32(xdr, &mode) < 0) - return false; + if (*p++) { iap->ia_valid |= ATTR_MODE; - iap->ia_mode = mode; + iap->ia_mode = ntohl(*p++); } - if (xdr_stream_decode_bool(xdr, &set_it) < 0) - return false; - if (set_it) { - u32 uid; - - if (xdr_stream_decode_u32(xdr, &uid) < 0) - return false; - iap->ia_uid = make_kuid(nfsd_user_namespace(rqstp), uid); + if (*p++) { + iap->ia_uid = make_kuid(userns, ntohl(*p++)); if (uid_valid(iap->ia_uid)) iap->ia_valid |= ATTR_UID; } - if (xdr_stream_decode_bool(xdr, &set_it) < 0) - return false; - if (set_it) { - u32 gid; - - if (xdr_stream_decode_u32(xdr, &gid) < 0) - return false; - iap->ia_gid = make_kgid(nfsd_user_namespace(rqstp), gid); + if (*p++) { + iap->ia_gid = make_kgid(userns, ntohl(*p++)); if (gid_valid(iap->ia_gid)) iap->ia_valid |= ATTR_GID; } - if (xdr_stream_decode_bool(xdr, &set_it) < 0) - return false; - if (set_it) { - u64 newsize; + if (*p++) { + u64 newsize; - if (xdr_stream_decode_u64(xdr, &newsize) < 0) - return false; iap->ia_valid |= ATTR_SIZE; - iap->ia_size = newsize; + p = xdr_decode_hyper(p, &newsize); + iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX); } - if (xdr_stream_decode_u32(xdr, &set_it) < 0) - return false; - switch (set_it) { - case DONT_CHANGE: - break; - case SET_TO_SERVER_TIME: + if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ iap->ia_valid |= ATTR_ATIME; - break; - case SET_TO_CLIENT_TIME: - if (!svcxdr_decode_nfstime3(xdr, &iap->ia_atime)) - return false; + } else if (tmp == 2) { /* set to client time */ iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; - break; - default: - return false; + iap->ia_atime.tv_sec = ntohl(*p++); + iap->ia_atime.tv_nsec = ntohl(*p++); } - if (xdr_stream_decode_u32(xdr, &set_it) < 0) - return false; - switch (set_it) { - case DONT_CHANGE: - break; - case SET_TO_SERVER_TIME: + if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ iap->ia_valid |= ATTR_MTIME; - break; - case SET_TO_CLIENT_TIME: - if (!svcxdr_decode_nfstime3(xdr, &iap->ia_mtime)) - return false; + } else if (tmp == 2) { /* set to client time */ iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; - break; - default: - return false; + iap->ia_mtime.tv_sec = ntohl(*p++); + iap->ia_mtime.tv_nsec = ntohl(*p++); } - - return true; + return p; } -static bool -svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args) +static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp) { - __be32 *p; - u32 check; - - if (xdr_stream_decode_bool(xdr, &check) < 0) - return false; - if (check) { - p = xdr_inline_decode(xdr, XDR_UNIT * 2); - if (!p) - return false; - args->check_guard = 1; - args->guardtime = be32_to_cpup(p); - } else - args->check_guard = 0; - - return true; -} - -static bool -svcxdr_decode_specdata3(struct xdr_stream *xdr, struct nfsd3_mknodargs *args) -{ - __be32 *p; - - p = xdr_inline_decode(xdr, XDR_UNIT * 2); - if (!p) - return false; - args->major = be32_to_cpup(p++); - args->minor = be32_to_cpup(p); - - return true; -} - -static bool -svcxdr_decode_devicedata3(struct svc_rqst *rqstp, struct xdr_stream *xdr, - struct nfsd3_mknodargs *args) -{ - return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs) && - svcxdr_decode_specdata3(xdr, args); -} - -static bool -svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, - const struct svc_fh *fhp, const struct kstat *stat) -{ - struct user_namespace *userns = nfsd_user_namespace(rqstp); - __be32 *p; - u64 fsid; - - p = xdr_reserve_space(xdr, XDR_UNIT * 21); - if (!p) - return false; - - *p++ = cpu_to_be32(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); - *p++ = cpu_to_be32((u32)(stat->mode & S_IALLUGO)); - *p++ = cpu_to_be32((u32)stat->nlink); - *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid)); - *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid)); - if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) - p = xdr_encode_hyper(p, (u64)NFS3_MAXPATHLEN); - else - p = xdr_encode_hyper(p, (u64)stat->size); - - /* used */ - p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); - - /* rdev */ - *p++ = cpu_to_be32((u32)MAJOR(stat->rdev)); - *p++ = cpu_to_be32((u32)MINOR(stat->rdev)); - + u64 f; switch(fsid_source(fhp)) { + default: + case FSIDSOURCE_DEV: + p = xdr_encode_hyper(p, (u64)huge_encode_dev + (fhp->fh_dentry->d_sb->s_dev)); + break; case FSIDSOURCE_FSID: - fsid = (u64)fhp->fh_export->ex_fsid; + p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); break; case FSIDSOURCE_UUID: - fsid = ((u64 *)fhp->fh_export->ex_uuid)[0]; - fsid ^= ((u64 *)fhp->fh_export->ex_uuid)[1]; + f = ((u64*)fhp->fh_export->ex_uuid)[0]; + f ^= ((u64*)fhp->fh_export->ex_uuid)[1]; + p = xdr_encode_hyper(p, f); break; - default: - fsid = (u64)huge_encode_dev(fhp->fh_dentry->d_sb->s_dev); } - p = xdr_encode_hyper(p, fsid); + return p; +} - /* fileid */ +static __be32 * +encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, + struct kstat *stat) +{ + struct user_namespace *userns = nfsd_user_namespace(rqstp); + *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); + *p++ = htonl((u32) (stat->mode & S_IALLUGO)); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); + *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); + if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { + p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); + } else { + p = xdr_encode_hyper(p, (u64) stat->size); + } + p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); + *p++ = htonl((u32) MAJOR(stat->rdev)); + *p++ = htonl((u32) MINOR(stat->rdev)); + p = encode_fsid(p, fhp); p = xdr_encode_hyper(p, stat->ino); + p = encode_time3(p, &stat->atime); + p = encode_time3(p, &stat->mtime); + p = encode_time3(p, &stat->ctime); - p = encode_nfstime3(p, &stat->atime); - p = encode_nfstime3(p, &stat->mtime); - encode_nfstime3(p, &stat->ctime); - - return true; + return p; } -static bool -svcxdr_encode_wcc_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) +static __be32 * +encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { - __be32 *p; - - p = xdr_reserve_space(xdr, XDR_UNIT * 6); - if (!p) - return false; - p = xdr_encode_hyper(p, (u64)fhp->fh_pre_size); - p = encode_nfstime3(p, &fhp->fh_pre_mtime); - encode_nfstime3(p, &fhp->fh_pre_ctime); - - return true; -} - -static bool -svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) -{ - if (!fhp->fh_pre_saved) { - if (xdr_stream_encode_item_absent(xdr) < 0) - return false; - return true; - } - - if (xdr_stream_encode_item_present(xdr) < 0) - return false; - return svcxdr_encode_wcc_attr(xdr, fhp); -} - -/** - * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes - * @rqstp: Context of a completed RPC transaction - * @xdr: XDR stream - * @fhp: File handle to encode - * - * Return values: - * %false: Send buffer space was exhausted - * %true: Success - */ -bool -svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, - const struct svc_fh *fhp) -{ - struct dentry *dentry = fhp->fh_dentry; - struct kstat stat; - - /* - * The inode may be NULL if the call failed because of a - * stale file handle. In this case, no attributes are - * returned. - */ - if (fhp->fh_no_wcc || !dentry || !d_really_is_positive(dentry)) - goto no_post_op_attrs; - if (fh_getattr(fhp, &stat) != nfs_ok) - goto no_post_op_attrs; - - if (xdr_stream_encode_item_present(xdr) < 0) - return false; - lease_get_mtime(d_inode(dentry), &stat.mtime); - if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &stat)) - return false; - - return true; - -no_post_op_attrs: - return xdr_stream_encode_item_absent(xdr) > 0; + /* Attributes to follow */ + *p++ = xdr_one; + return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr); } /* - * Encode weak cache consistency data + * Encode post-operation attributes. + * The inode may be NULL if the call failed because of a stale file + * handle. In this case, no attributes are returned. */ -static bool -svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr, - const struct svc_fh *fhp) +static __be32 * +encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; + if (dentry && d_really_is_positive(dentry)) { + __be32 err; + struct kstat stat; - if (!dentry || !d_really_is_positive(dentry) || !fhp->fh_post_saved) - goto neither; + err = fh_getattr(fhp, &stat); + if (!err) { + *p++ = xdr_one; /* attributes follow */ + lease_get_mtime(d_inode(dentry), &stat.mtime); + return encode_fattr3(rqstp, p, fhp, &stat); + } + } + *p++ = xdr_zero; + return p; +} - /* before */ - if (!svcxdr_encode_pre_op_attr(xdr, fhp)) - return false; +/* Helper for NFSv3 ACLs */ +__be32 * +nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +{ + return encode_post_op_attr(rqstp, p, fhp); +} - /* after */ - if (xdr_stream_encode_item_present(xdr) < 0) - return false; - if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &fhp->fh_post_attr)) - return false; +/* + * Enocde weak cache consistency data + */ +static __be32 * +encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +{ + struct dentry *dentry = fhp->fh_dentry; - return true; + if (dentry && d_really_is_positive(dentry) && fhp->fh_post_saved) { + if (fhp->fh_pre_saved) { + *p++ = xdr_one; + p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size); + p = encode_time3(p, &fhp->fh_pre_mtime); + p = encode_time3(p, &fhp->fh_pre_ctime); + } else { + *p++ = xdr_zero; + } + return encode_saved_post_attr(rqstp, p, fhp); + } + /* no pre- or post-attrs */ + *p++ = xdr_zero; + return encode_post_op_attr(rqstp, p, fhp); +} -neither: - if (xdr_stream_encode_item_absent(xdr) < 0) - return false; - if (!svcxdr_encode_post_op_attr(rqstp, xdr, fhp)) - return false; +/* + * Fill in the pre_op attr for the wcc data + */ +void fill_pre_wcc(struct svc_fh *fhp) +{ + struct inode *inode; + struct kstat stat; + __be32 err; - return true; + if (fhp->fh_pre_saved) + return; + + inode = d_inode(fhp->fh_dentry); + err = fh_getattr(fhp, &stat); + if (err) { + /* Grab the times from inode anyway */ + stat.mtime = inode->i_mtime; + stat.ctime = inode->i_ctime; + stat.size = inode->i_size; + } + + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; + fhp->fh_pre_size = stat.size; + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + fhp->fh_pre_saved = true; +} + +/* + * Fill in the post_op attr for the wcc data + */ +void fill_post_wcc(struct svc_fh *fhp) +{ + __be32 err; + + if (fhp->fh_post_saved) + printk("nfsd: inode locked twice during operation.\n"); + + err = fh_getattr(fhp, &fhp->fh_post_attr); + fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, + d_inode(fhp->fh_dentry)); + if (err) { + fhp->fh_post_saved = false; + /* Grab the ctime anyway - set_change_info might use it */ + fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; + } else + fhp->fh_post_saved = true; } /* * XDR decode functions */ +int +nfs3svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +{ + return 1; +} -bool -nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_fhandle *args = rqstp->rq_argp; - return svcxdr_decode_nfs_fh3(xdr, &args->fh); + p = decode_fh(p, &args->fh); + if (!p) + return 0; + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_sattrargs *args = rqstp->rq_argp; - return svcxdr_decode_nfs_fh3(xdr, &args->fh) && - svcxdr_decode_sattr3(rqstp, xdr, &args->attrs) && - svcxdr_decode_sattrguard3(xdr, args); + p = decode_fh(p, &args->fh); + if (!p) + return 0; + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); + + if ((args->check_guard = ntohl(*p++)) != 0) { + struct timespec64 time; + p = decode_time3(p, &time); + args->guardtime = time.tv_sec; + } + + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_diropargs *args = rqstp->rq_argp; - return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len); + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_accessargs *args = rqstp->rq_argp; - if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u32(xdr, &args->access) < 0) - return false; + p = decode_fh(p, &args->fh); + if (!p) + return 0; + args->access = ntohl(*p++); - return true; + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readargs *args = rqstp->rq_argp; - - if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return false; - - return true; -} - -bool -nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - struct nfsd3_writeargs *args = rqstp->rq_argp; + unsigned int len; + int v; u32 max_blocksize = svc_max_payload(rqstp); - if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &args->stable) < 0) - return false; + p = decode_fh(p, &args->fh); + if (!p) + return 0; + p = xdr_decode_hyper(p, &args->offset); - /* opaque data */ - if (xdr_stream_decode_u32(xdr, &args->len) < 0) - return false; + args->count = ntohl(*p++); + len = min(args->count, max_blocksize); - /* request sanity */ + /* set up the kvec */ + v=0; + while (len > 0) { + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); + len -= rqstp->rq_vec[v].iov_len; + v++; + } + args->vlen = v; + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +{ + struct nfsd3_writeargs *args = rqstp->rq_argp; + unsigned int len, hdr, dlen; + u32 max_blocksize = svc_max_payload(rqstp); + struct kvec *head = rqstp->rq_arg.head; + struct kvec *tail = rqstp->rq_arg.tail; + + p = decode_fh(p, &args->fh); + if (!p) + return 0; + p = xdr_decode_hyper(p, &args->offset); + + args->count = ntohl(*p++); + args->stable = ntohl(*p++); + len = args->len = ntohl(*p++); + if ((void *)p > head->iov_base + head->iov_len) + return 0; + /* + * The count must equal the amount of data passed. + */ if (args->count != args->len) - return false; + return 0; + + /* + * Check to make sure that we got the right number of + * bytes. + */ + hdr = (void*)p - head->iov_base; + dlen = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len - hdr; + /* + * Round the length of the data which was specified up to + * the next multiple of XDR units and then compare that + * against the length which was actually received. + * Note that when RPCSEC/GSS (for example) is used, the + * data buffer can be padded so dlen might be larger + * than required. It must never be smaller. + */ + if (dlen < XDR_QUADLEN(len)*4) + return 0; + if (args->count > max_blocksize) { args->count = max_blocksize; - args->len = max_blocksize; + len = args->len = max_blocksize; } - return xdr_stream_subsegment(xdr, &args->payload, args->count); + args->first.iov_base = (void *)p; + args->first.iov_len = head->iov_len - hdr; + return 1; } -bool -nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_createargs *args = rqstp->rq_argp; - if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) - return false; - if (xdr_stream_decode_u32(xdr, &args->createmode) < 0) - return false; - switch (args->createmode) { + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + switch (args->createmode = ntohl(*p++)) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: - return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); + break; case NFS3_CREATE_EXCLUSIVE: - args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE); - if (!args->verf) - return false; + args->verf = p; + p += 2; break; default: - return false; + return 0; } - return true; + + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_createargs *args = rqstp->rq_argp; - return svcxdr_decode_diropargs3(xdr, &args->fh, - &args->name, &args->len) && - svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); + if (!(p = decode_fh(p, &args->fh)) || + !(p = decode_filename(p, &args->name, &args->len))) + return 0; + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); + + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_symlinkargs *args = rqstp->rq_argp; - struct kvec *head = rqstp->rq_arg.head; + char *base = (char *)p; + size_t dlen; - if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen)) - return false; - if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs)) - return false; - if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) - return false; + if (!(p = decode_fh(p, &args->ffh)) || + !(p = decode_filename(p, &args->fname, &args->flen))) + return 0; + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); - /* symlink_data */ - args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); - args->first.iov_base = xdr_inline_decode(xdr, args->tlen); - return args->first.iov_base != NULL; + args->tlen = ntohl(*p++); + + args->first.iov_base = p; + args->first.iov_len = rqstp->rq_arg.head[0].iov_len; + args->first.iov_len -= (char *)p - base; + + dlen = args->first.iov_len + rqstp->rq_arg.page_len + + rqstp->rq_arg.tail[0].iov_len; + if (dlen < XDR_QUADLEN(args->tlen) << 2) + return 0; + return 1; } -bool -nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_mknodargs *args = rqstp->rq_argp; - if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) - return false; - if (xdr_stream_decode_u32(xdr, &args->ftype) < 0) - return false; - switch (args->ftype) { - case NF3CHR: - case NF3BLK: - return svcxdr_decode_devicedata3(rqstp, xdr, args); - case NF3SOCK: - case NF3FIFO: - return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); - case NF3REG: - case NF3DIR: - case NF3LNK: - /* Valid XDR but illegal file types */ - break; - default: - return false; + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + args->ftype = ntohl(*p++); + + if (args->ftype == NF3BLK || args->ftype == NF3CHR + || args->ftype == NF3SOCK || args->ftype == NF3FIFO) + p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); + + if (args->ftype == NF3BLK || args->ftype == NF3CHR) { + args->major = ntohl(*p++); + args->minor = ntohl(*p++); } - return true; + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_renameargs *args = rqstp->rq_argp; - return svcxdr_decode_diropargs3(xdr, &args->ffh, - &args->fname, &args->flen) && - svcxdr_decode_diropargs3(xdr, &args->tfh, - &args->tname, &args->tlen); + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p) +{ + struct nfsd3_readlinkargs *args = rqstp->rq_argp; + + p = decode_fh(p, &args->fh); + if (!p) + return 0; + args->buffer = page_address(*(rqstp->rq_next_page++)); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_linkargs *args = rqstp->rq_argp; - return svcxdr_decode_nfs_fh3(xdr, &args->ffh) && - svcxdr_decode_diropargs3(xdr, &args->tfh, - &args->tname, &args->tlen); + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readdirargs *args = rqstp->rq_argp; + int len; + u32 max_blocksize = svc_max_payload(rqstp); - if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) - return false; - args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); - if (!args->verf) - return false; - if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return false; + p = decode_fh(p, &args->fh); + if (!p) + return 0; + p = xdr_decode_hyper(p, &args->cookie); + args->verf = p; p += 2; + args->dircount = ~0; + args->count = ntohl(*p++); + len = args->count = min_t(u32, args->count, max_blocksize); - return true; + while (len > 0) { + struct page *p = *(rqstp->rq_next_page++); + if (!args->buffer) + args->buffer = page_address(p); + len -= PAGE_SIZE; + } + + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readdirargs *args = rqstp->rq_argp; - u32 dircount; + int len; + u32 max_blocksize = svc_max_payload(rqstp); - if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) - return false; - args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); - if (!args->verf) - return false; - /* dircount is ignored */ - if (xdr_stream_decode_u32(xdr, &dircount) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return false; + p = decode_fh(p, &args->fh); + if (!p) + return 0; + p = xdr_decode_hyper(p, &args->cookie); + args->verf = p; p += 2; + args->dircount = ntohl(*p++); + args->count = ntohl(*p++); - return true; + len = args->count = min(args->count, max_blocksize); + while (len > 0) { + struct page *p = *(rqstp->rq_next_page++); + if (!args->buffer) + args->buffer = page_address(p); + len -= PAGE_SIZE; + } + + return xdr_argsize_check(rqstp, p); } -bool -nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_commitargs *args = rqstp->rq_argp; + p = decode_fh(p, &args->fh); + if (!p) + return 0; + p = xdr_decode_hyper(p, &args->offset); + args->count = ntohl(*p++); - if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u64(xdr, &args->offset) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return false; - - return true; + return xdr_argsize_check(rqstp, p); } /* * XDR encode functions */ +int +nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +{ + return xdr_ressize_check(rqstp, p); +} + /* GETATTR */ -bool -nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_attrstat *resp = rqstp->rq_resp; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime); - if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat)) - return false; - break; + *p++ = resp->status; + if (resp->status == 0) { + lease_get_mtime(d_inode(resp->fh.fh_dentry), + &resp->stat.mtime); + p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); } - - return true; + return xdr_ressize_check(rqstp, p); } /* SETATTR, REMOVE, RMDIR */ -bool -nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_attrstat *resp = rqstp->rq_resp; - return svcxdr_encode_nfsstat3(xdr, resp->status) && - svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh); + *p++ = resp->status; + p = encode_wcc_data(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); } /* LOOKUP */ -bool -nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_diropres *resp = rqstp->rq_resp; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh)) - return false; - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) - return false; + *p++ = resp->status; + if (resp->status == 0) { + p = encode_fh(p, &resp->fh); + p = encode_post_op_attr(rqstp, p, &resp->fh); } - - return true; + p = encode_post_op_attr(rqstp, p, &resp->dirfh); + return xdr_ressize_check(rqstp, p); } /* ACCESS */ -bool -nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_accessres *resp = rqstp->rq_resp; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - if (xdr_stream_encode_u32(xdr, resp->access) < 0) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - } - - return true; + *p++ = resp->status; + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); } /* READLINK */ -bool -nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readlinkres *resp = rqstp->rq_resp; - struct kvec *head = rqstp->rq_res.head; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - if (xdr_stream_encode_u32(xdr, resp->len) < 0) - return false; - xdr_write_pages(xdr, resp->pages, 0, resp->len); - if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - } - - return true; + *p++ = resp->status; + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->len); + xdr_ressize_check(rqstp, p); + rqstp->rq_res.page_len = resp->len; + if (resp->len & 3) { + /* need to pad the tail */ + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); + } + return 1; + } else + return xdr_ressize_check(rqstp, p); } /* READ */ -bool -nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readres *resp = rqstp->rq_resp; - struct kvec *head = rqstp->rq_res.head; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return false; - if (xdr_stream_encode_bool(xdr, resp->eof) < 0) - return false; - if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return false; - xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, - resp->count); - if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - } - - return true; + *p++ = resp->status; + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->count); + *p++ = htonl(resp->eof); + *p++ = htonl(resp->count); /* xdr opaque count */ + xdr_ressize_check(rqstp, p); + /* now update rqstp->rq_res to reflect data as well */ + rqstp->rq_res.page_len = resp->count; + if (resp->count & 3) { + /* need to pad the tail */ + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); + } + return 1; + } else + return xdr_ressize_check(rqstp, p); } /* WRITE */ -bool -nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_writeres *resp = rqstp->rq_resp; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return false; - if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return false; - if (xdr_stream_encode_u32(xdr, resp->committed) < 0) - return false; - if (!svcxdr_encode_writeverf3(xdr, resp->verf)) - return false; - break; - default: - if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return false; + *p++ = resp->status; + p = encode_wcc_data(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->count); + *p++ = htonl(resp->committed); + *p++ = resp->verf[0]; + *p++ = resp->verf[1]; } - - return true; + return xdr_ressize_check(rqstp, p); } /* CREATE, MKDIR, SYMLINK, MKNOD */ -bool -nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_diropres *resp = rqstp->rq_resp; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh)) - return false; - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) - return false; - break; - default: - if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) - return false; + *p++ = resp->status; + if (resp->status == 0) { + *p++ = xdr_one; + p = encode_fh(p, &resp->fh); + p = encode_post_op_attr(rqstp, p, &resp->fh); } - - return true; + p = encode_wcc_data(rqstp, p, &resp->dirfh); + return xdr_ressize_check(rqstp, p); } /* RENAME */ -bool -nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_renameres *resp = rqstp->rq_resp; - return svcxdr_encode_nfsstat3(xdr, resp->status) && - svcxdr_encode_wcc_data(rqstp, xdr, &resp->ffh) && - svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh); + *p++ = resp->status; + p = encode_wcc_data(rqstp, p, &resp->ffh); + p = encode_wcc_data(rqstp, p, &resp->tfh); + return xdr_ressize_check(rqstp, p); } /* LINK */ -bool -nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_linkres *resp = rqstp->rq_resp; - return svcxdr_encode_nfsstat3(xdr, resp->status) && - svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh) && - svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh); + *p++ = resp->status; + p = encode_post_op_attr(rqstp, p, &resp->fh); + p = encode_wcc_data(rqstp, p, &resp->tfh); + return xdr_ressize_check(rqstp, p); } /* READDIR */ -bool -nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_readdirres *resp = rqstp->rq_resp; - struct xdr_buf *dirlist = &resp->dirlist; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) - return false; - xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); - /* no more entries */ - if (xdr_stream_encode_item_absent(xdr) < 0) - return false; - if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) - return false; - } + *p++ = resp->status; + p = encode_post_op_attr(rqstp, p, &resp->fh); - return true; + if (resp->status == 0) { + /* stupid readdir cookie */ + memcpy(p, resp->verf, 8); p += 2; + xdr_ressize_check(rqstp, p); + if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE) + return 1; /*No room for trailer */ + rqstp->rq_res.page_len = (resp->count) << 2; + + /* add the 'tail' to the end of the 'head' page - page 0. */ + rqstp->rq_res.tail[0].iov_base = p; + *p++ = 0; /* no more entries */ + *p++ = htonl(resp->common.err == nfserr_eof); + rqstp->rq_res.tail[0].iov_len = 2<<2; + return 1; + } else + return xdr_ressize_check(rqstp, p); +} + +static __be32 * +encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, + int namlen, u64 ino) +{ + *p++ = xdr_one; /* mark entry present */ + p = xdr_encode_hyper(p, ino); /* file id */ + p = xdr_encode_array(p, name, namlen);/* name length & name */ + + cd->offset = p; /* remember pointer */ + p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */ + + return p; } static __be32 @@ -1014,323 +887,267 @@ out: return rv; } -/** - * nfs3svc_encode_cookie3 - Encode a directory offset cookie - * @resp: readdir result context - * @offset: offset cookie to encode - * - * The buffer space for the offset cookie has already been reserved - * by svcxdr_encode_entry3_common(). - */ -void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset) +static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino) { - __be64 cookie = cpu_to_be64(offset); + struct svc_fh *fh = &cd->scratch; + __be32 err; - if (!resp->cookie_offset) - return; - write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie, - sizeof(cookie)); - resp->cookie_offset = 0; -} - -static bool -svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name, - int namlen, loff_t offset, u64 ino) -{ - struct xdr_buf *dirlist = &resp->dirlist; - struct xdr_stream *xdr = &resp->xdr; - - if (xdr_stream_encode_item_present(xdr) < 0) - return false; - /* fileid */ - if (xdr_stream_encode_u64(xdr, ino) < 0) - return false; - /* name */ - if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS3_MAXNAMLEN)) < 0) - return false; - /* cookie */ - resp->cookie_offset = dirlist->len; - if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0) - return false; - - return true; -} - -/** - * nfs3svc_encode_entry3 - encode one NFSv3 READDIR entry - * @data: directory context - * @name: name of the object to be encoded - * @namlen: length of that name, in bytes - * @offset: the offset of the previous entry - * @ino: the fileid of this entry - * @d_type: unused - * - * Return values: - * %0: Entry was successfully encoded. - * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err - * - * On exit, the following fields are updated: - * - resp->xdr - * - resp->common.err - * - resp->cookie_offset - */ -int nfs3svc_encode_entry3(void *data, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) -{ - struct readdir_cd *ccd = data; - struct nfsd3_readdirres *resp = container_of(ccd, - struct nfsd3_readdirres, - common); - unsigned int starting_length = resp->dirlist.len; - - /* The offset cookie for the previous entry */ - nfs3svc_encode_cookie3(resp, offset); - - if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino)) - goto out_toosmall; - - xdr_commit_encode(&resp->xdr); - resp->common.err = nfs_ok; - return 0; - -out_toosmall: - resp->cookie_offset = 0; - resp->common.err = nfserr_toosmall; - resp->dirlist.len = starting_length; - return -EINVAL; -} - -static bool -svcxdr_encode_entry3_plus(struct nfsd3_readdirres *resp, const char *name, - int namlen, u64 ino) -{ - struct xdr_stream *xdr = &resp->xdr; - struct svc_fh *fhp = &resp->scratch; - bool result; - - result = false; - fh_init(fhp, NFS3_FHSIZE); - if (compose_entry_fh(resp, fhp, name, namlen, ino) != nfs_ok) - goto out_noattrs; - - if (!svcxdr_encode_post_op_attr(resp->rqstp, xdr, fhp)) + fh_init(fh, NFS3_FHSIZE); + err = compose_entry_fh(cd, fh, name, namlen, ino); + if (err) { + *p++ = 0; + *p++ = 0; goto out; - if (!svcxdr_encode_post_op_fh3(xdr, fhp)) - goto out; - result = true; - + } + p = encode_post_op_attr(cd->rqstp, p, fh); + *p++ = xdr_one; /* yes, a file handle follows */ + p = encode_fh(p, fh); out: - fh_put(fhp); - return result; - -out_noattrs: - if (xdr_stream_encode_item_absent(xdr) < 0) - return false; - if (xdr_stream_encode_item_absent(xdr) < 0) - return false; - return true; + fh_put(fh); + return p; } -/** - * nfs3svc_encode_entryplus3 - encode one NFSv3 READDIRPLUS entry - * @data: directory context - * @name: name of the object to be encoded - * @namlen: length of that name, in bytes - * @offset: the offset of the previous entry - * @ino: the fileid of this entry - * @d_type: unused - * - * Return values: - * %0: Entry was successfully encoded. - * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err - * - * On exit, the following fields are updated: - * - resp->xdr - * - resp->common.err - * - resp->cookie_offset +/* + * Encode a directory entry. This one works for both normal readdir + * and readdirplus. + * The normal readdir reply requires 2 (fileid) + 1 (stringlen) + * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen. + * + * The readdirplus baggage is 1+21 words for post_op_attr, plus the + * file handle. */ -int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) + +#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1) +#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) +static int +encode_entry(struct readdir_cd *ccd, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type, int plus) { - struct readdir_cd *ccd = data; - struct nfsd3_readdirres *resp = container_of(ccd, - struct nfsd3_readdirres, - common); - unsigned int starting_length = resp->dirlist.len; + struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, + common); + __be32 *p = cd->buffer; + caddr_t curr_page_addr = NULL; + struct page ** page; + int slen; /* string (name) length */ + int elen; /* estimated entry length in words */ + int num_entry_words = 0; /* actual number of words */ - /* The offset cookie for the previous entry */ - nfs3svc_encode_cookie3(resp, offset); + if (cd->offset) { + u64 offset64 = offset; - if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino)) - goto out_toosmall; - if (!svcxdr_encode_entry3_plus(resp, name, namlen, ino)) - goto out_toosmall; + if (unlikely(cd->offset1)) { + /* we ended up with offset on a page boundary */ + *cd->offset = htonl(offset64 >> 32); + *cd->offset1 = htonl(offset64 & 0xffffffff); + cd->offset1 = NULL; + } else { + xdr_encode_hyper(cd->offset, offset64); + } + cd->offset = NULL; + } - xdr_commit_encode(&resp->xdr); - resp->common.err = nfs_ok; + /* + dprintk("encode_entry(%.*s @%ld%s)\n", + namlen, name, (long) offset, plus? " plus" : ""); + */ + + /* truncate filename if too long */ + namlen = min(namlen, NFS3_MAXNAMLEN); + + slen = XDR_QUADLEN(namlen); + elen = slen + NFS3_ENTRY_BAGGAGE + + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0); + + if (cd->buflen < elen) { + cd->common.err = nfserr_toosmall; + return -EINVAL; + } + + /* determine which page in rq_respages[] we are currently filling */ + for (page = cd->rqstp->rq_respages + 1; + page < cd->rqstp->rq_next_page; page++) { + curr_page_addr = page_address(*page); + + if (((caddr_t)cd->buffer >= curr_page_addr) && + ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) + break; + } + + if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) { + /* encode entry in current page */ + + p = encode_entry_baggage(cd, p, name, namlen, ino); + + if (plus) + p = encode_entryplus_baggage(cd, p, name, namlen, ino); + num_entry_words = p - cd->buffer; + } else if (*(page+1) != NULL) { + /* temporarily encode entry into next page, then move back to + * current and next page in rq_respages[] */ + __be32 *p1, *tmp; + int len1, len2; + + /* grab next page for temporary storage of entry */ + p1 = tmp = page_address(*(page+1)); + + p1 = encode_entry_baggage(cd, p1, name, namlen, ino); + + if (plus) + p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino); + + /* determine entry word length and lengths to go in pages */ + num_entry_words = p1 - tmp; + len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer; + if ((num_entry_words << 2) < len1) { + /* the actual number of words in the entry is less + * than elen and can still fit in the current page + */ + memmove(p, tmp, num_entry_words << 2); + p += num_entry_words; + + /* update offset */ + cd->offset = cd->buffer + (cd->offset - tmp); + } else { + unsigned int offset_r = (cd->offset - tmp) << 2; + + /* update pointer to offset location. + * This is a 64bit quantity, so we need to + * deal with 3 cases: + * - entirely in first page + * - entirely in second page + * - 4 bytes in each page + */ + if (offset_r + 8 <= len1) { + cd->offset = p + (cd->offset - tmp); + } else if (offset_r >= len1) { + cd->offset -= len1 >> 2; + } else { + /* sitting on the fence */ + BUG_ON(offset_r != len1 - 4); + cd->offset = p + (cd->offset - tmp); + cd->offset1 = tmp; + } + + len2 = (num_entry_words << 2) - len1; + + /* move from temp page to current and next pages */ + memmove(p, tmp, len1); + memmove(tmp, (caddr_t)tmp+len1, len2); + + p = tmp + (len2 >> 2); + } + } + else { + cd->common.err = nfserr_toosmall; + return -EINVAL; + } + + cd->buflen -= num_entry_words; + cd->buffer = p; + cd->common.err = nfs_ok; return 0; -out_toosmall: - resp->cookie_offset = 0; - resp->common.err = nfserr_toosmall; - resp->dirlist.len = starting_length; - return -EINVAL; } -static bool -svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, - const struct nfsd3_fsstatres *resp) +int +nfs3svc_encode_entry(void *cd, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int d_type) { - const struct kstatfs *s = &resp->stats; - u64 bs = s->f_bsize; - __be32 *p; + return encode_entry(cd, name, namlen, offset, ino, d_type, 0); +} - p = xdr_reserve_space(xdr, XDR_UNIT * 13); - if (!p) - return false; - p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ - p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ - p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ - p = xdr_encode_hyper(p, s->f_files); /* total inodes */ - p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ - p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ - *p = cpu_to_be32(resp->invarsec); /* mean unchanged time */ - - return true; +int +nfs3svc_encode_entry_plus(void *cd, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) +{ + return encode_entry(cd, name, namlen, offset, ino, d_type, 1); } /* FSSTAT */ -bool -nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_fsstatres *resp = rqstp->rq_resp; + struct kstatfs *s = &resp->stats; + u64 bs = s->f_bsize; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return false; - if (!svcxdr_encode_fsstat3resok(xdr, resp)) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return false; + *p++ = resp->status; + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ + p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ + p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ + p = xdr_encode_hyper(p, s->f_files); /* total inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ + *p++ = htonl(resp->invarsec); /* mean unchanged time */ } - - return true; -} - -static bool -svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr, - const struct nfsd3_fsinfores *resp) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, XDR_UNIT * 12); - if (!p) - return false; - *p++ = cpu_to_be32(resp->f_rtmax); - *p++ = cpu_to_be32(resp->f_rtpref); - *p++ = cpu_to_be32(resp->f_rtmult); - *p++ = cpu_to_be32(resp->f_wtmax); - *p++ = cpu_to_be32(resp->f_wtpref); - *p++ = cpu_to_be32(resp->f_wtmult); - *p++ = cpu_to_be32(resp->f_dtpref); - p = xdr_encode_hyper(p, resp->f_maxfilesize); - p = encode_nfstime3(p, &nfs3svc_time_delta); - *p = cpu_to_be32(resp->f_properties); - - return true; + return xdr_ressize_check(rqstp, p); } /* FSINFO */ -bool -nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_fsinfores *resp = rqstp->rq_resp; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return false; - if (!svcxdr_encode_fsinfo3resok(xdr, resp)) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return false; + *p++ = resp->status; + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + *p++ = htonl(resp->f_rtmax); + *p++ = htonl(resp->f_rtpref); + *p++ = htonl(resp->f_rtmult); + *p++ = htonl(resp->f_wtmax); + *p++ = htonl(resp->f_wtpref); + *p++ = htonl(resp->f_wtmult); + *p++ = htonl(resp->f_dtpref); + p = xdr_encode_hyper(p, resp->f_maxfilesize); + *p++ = xdr_one; + *p++ = xdr_zero; + *p++ = htonl(resp->f_properties); } - return true; -} - -static bool -svcxdr_encode_pathconf3resok(struct xdr_stream *xdr, - const struct nfsd3_pathconfres *resp) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, XDR_UNIT * 6); - if (!p) - return false; - *p++ = cpu_to_be32(resp->p_link_max); - *p++ = cpu_to_be32(resp->p_name_max); - p = xdr_encode_bool(p, resp->p_no_trunc); - p = xdr_encode_bool(p, resp->p_chown_restricted); - p = xdr_encode_bool(p, resp->p_case_insensitive); - xdr_encode_bool(p, resp->p_case_preserving); - - return true; + return xdr_ressize_check(rqstp, p); } /* PATHCONF */ -bool -nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_pathconfres *resp = rqstp->rq_resp; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return false; - if (!svcxdr_encode_pathconf3resok(xdr, resp)) - return false; - break; - default: - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) - return false; + *p++ = resp->status; + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + *p++ = htonl(resp->p_link_max); + *p++ = htonl(resp->p_name_max); + *p++ = htonl(resp->p_no_trunc); + *p++ = htonl(resp->p_chown_restricted); + *p++ = htonl(resp->p_case_insensitive); + *p++ = htonl(resp->p_case_preserving); } - return true; + return xdr_ressize_check(rqstp, p); } /* COMMIT */ -bool -nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_commitres *resp = rqstp->rq_resp; - if (!svcxdr_encode_nfsstat3(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return false; - if (!svcxdr_encode_writeverf3(xdr, resp->verf)) - return false; - break; - default: - if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) - return false; + *p++ = resp->status; + p = encode_wcc_data(rqstp, p, &resp->fh); + /* Write verifier */ + if (resp->status == 0) { + *p++ = resp->verf[0]; + *p++ = resp->verf[1]; } - - return true; + return xdr_ressize_check(rqstp, p); } /* diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index bb8e2f6d7d03..71292a0d6f09 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -751,26 +751,57 @@ out_estate: return ret; } -__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, - struct nfsd_attrs *attr) +__be32 +nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl) { + __be32 error; int host_error; + struct dentry *dentry; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; unsigned int flags = 0; - if (!acl) - return nfs_ok; + /* Get inode */ + error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); + if (error) + return error; - if (type == NF4DIR) + dentry = fhp->fh_dentry; + inode = d_inode(dentry); + + if (S_ISDIR(inode->i_mode)) flags = NFS4_ACL_DIR; - host_error = nfs4_acl_nfsv4_to_posix(acl, &attr->na_pacl, - &attr->na_dpacl, flags); + host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); if (host_error == -EINVAL) return nfserr_attrnotsupp; + if (host_error < 0) + goto out_nfserr; + + fh_lock(fhp); + + host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl); + if (host_error < 0) + goto out_drop_lock; + + if (S_ISDIR(inode->i_mode)) { + host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl); + } + +out_drop_lock: + fh_unlock(fhp); + + posix_acl_release(pacl); + posix_acl_release(dpacl); +out_nfserr: + if (host_error == -EOPNOTSUPP) + return nfserr_attrnotsupp; else return nfserrno(host_error); } + static short ace2type(struct nfs4_ace *ace) { diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 4eae2c5af2ed..f5b7ad0847f2 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -76,17 +76,6 @@ static __be32 *xdr_encode_empty_array(__be32 *p) * 1 Protocol" */ -static void encode_uint32(struct xdr_stream *xdr, u32 n) -{ - WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0); -} - -static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, - size_t len) -{ - WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); -} - /* * nfs_cb_opnum4 * @@ -132,7 +121,7 @@ static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh) BUG_ON(length > NFS4_FHSIZE); p = xdr_reserve_space(xdr, 4 + length); - xdr_encode_opaque(p, &fh->fh_raw, length); + xdr_encode_opaque(p, &fh->fh_base, length); } /* @@ -339,24 +328,6 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, hdr->nops++; } -/* - * CB_RECALLANY4args - * - * struct CB_RECALLANY4args { - * uint32_t craa_objects_to_keep; - * bitmap4 craa_type_mask; - * }; - */ -static void -encode_cb_recallany4args(struct xdr_stream *xdr, - struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra) -{ - encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY); - encode_uint32(xdr, ra->ra_keep); - encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval)); - hdr->nops++; -} - /* * CB_SEQUENCE4args * @@ -511,26 +482,6 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, encode_cb_nops(&hdr); } -/* - * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects - */ -static void -nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req, - struct xdr_stream *xdr, const void *data) -{ - const struct nfsd4_callback *cb = data; - struct nfsd4_cb_recall_any *ra; - struct nfs4_cb_compound_hdr hdr = { - .ident = cb->cb_clp->cl_cb_ident, - .minorversion = cb->cb_clp->cl_minorversion, - }; - - ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb); - encode_cb_compound4args(xdr, &hdr); - encode_cb_sequence4args(xdr, cb, &hdr); - encode_cb_recallany4args(xdr, &hdr, ra); - encode_cb_nops(&hdr); -} /* * NFSv4.0 and NFSv4.1 XDR decode functions @@ -569,28 +520,6 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } -/* - * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects - */ -static int -nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp, - struct xdr_stream *xdr, - void *data) -{ - struct nfsd4_callback *cb = data; - struct nfs4_cb_compound_hdr hdr; - int status; - - status = decode_cb_compound4res(xdr, &hdr); - if (unlikely(status)) - return status; - status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status || cb->cb_seq_status)) - return status; - status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status); - return status; -} - #ifdef CONFIG_NFSD_PNFS /* * CB_LAYOUTRECALL4args @@ -750,7 +679,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * case NFS4_OK: * write_response4 coa_resok4; * default: - * length4 coa_bytes_copied; + * length4 coa_bytes_copied; * }; * struct CB_OFFLOAD4args { * nfs_fh4 coa_fh; @@ -759,22 +688,21 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * }; */ static void encode_offload_info4(struct xdr_stream *xdr, - const struct nfsd4_cb_offload *cbo) + __be32 nfserr, + const struct nfsd4_copy *cp) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p = cbo->co_nfserr; - switch (cbo->co_nfserr) { - case nfs_ok: + *p++ = nfserr; + if (!nfserr) { p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); p = xdr_encode_empty_array(p); - p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written); - *p++ = cpu_to_be32(cbo->co_res.wr_stable_how); - p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data, + p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written); + *p++ = cpu_to_be32(cp->cp_res.wr_stable_how); + p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data, NFS4_VERIFIER_SIZE); - break; - default: + } else { p = xdr_reserve_space(xdr, 8); /* We always return success if bytes were written */ p = xdr_encode_hyper(p, 0); @@ -782,16 +710,18 @@ static void encode_offload_info4(struct xdr_stream *xdr, } static void encode_cb_offload4args(struct xdr_stream *xdr, - const struct nfsd4_cb_offload *cbo, + __be32 nfserr, + const struct knfsd_fh *fh, + const struct nfsd4_copy *cp, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p = cpu_to_be32(OP_CB_OFFLOAD); - encode_nfs_fh4(xdr, &cbo->co_fh); - encode_stateid4(xdr, &cbo->co_res.cb_stateid); - encode_offload_info4(xdr, cbo); + *p++ = cpu_to_be32(OP_CB_OFFLOAD); + encode_nfs_fh4(xdr, fh); + encode_stateid4(xdr, &cp->cp_res.cb_stateid); + encode_offload_info4(xdr, nfserr, cp); hdr->nops++; } @@ -801,8 +731,8 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, const void *data) { const struct nfsd4_callback *cb = data; - const struct nfsd4_cb_offload *cbo = - container_of(cb, struct nfsd4_cb_offload, co_cb); + const struct nfsd4_copy *cp = + container_of(cb, struct nfsd4_copy, cp_cb); struct nfs4_cb_compound_hdr hdr = { .ident = 0, .minorversion = cb->cb_clp->cl_minorversion, @@ -810,7 +740,7 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, encode_cb_compound4args(xdr, &hdr); encode_cb_sequence4args(xdr, cb, &hdr); - encode_cb_offload4args(xdr, cbo, &hdr); + encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr); encode_cb_nops(&hdr); } @@ -854,7 +784,6 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { #endif PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), - PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; @@ -1012,43 +941,37 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_client = client; clp->cl_cb_cred = cred; - rcu_read_lock(); - trace_nfsd_cb_setup(clp, rpc_peeraddr2str(client, RPC_DISPLAY_NETID), - args.authflavor); - rcu_read_unlock(); + trace_nfsd_cb_setup(clp); return 0; } -static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate) -{ - if (clp->cl_cb_state != newstate) { - clp->cl_cb_state = newstate; - trace_nfsd_cb_state(clp); - } -} - static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) { if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; - nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN); + clp->cl_cb_state = NFSD4_CB_DOWN; + trace_nfsd_cb_state(clp); } static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) { if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; - nfsd4_mark_cb_state(clp, NFSD4_CB_FAULT); + clp->cl_cb_state = NFSD4_CB_FAULT; + trace_nfsd_cb_state(clp); } static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) { struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); + trace_nfsd_cb_done(clp, task->tk_status); if (task->tk_status) nfsd4_mark_cb_down(clp, task->tk_status); - else - nfsd4_mark_cb_state(clp, NFSD4_CB_UP); + else { + clp->cl_cb_state = NFSD4_CB_UP; + trace_nfsd_cb_state(clp); + } } static void nfsd4_cb_probe_release(void *calldata) @@ -1072,8 +995,8 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { */ void nfsd4_probe_callback(struct nfs4_client *clp) { - trace_nfsd_cb_probe(clp); - nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + trace_nfsd_cb_state(clp); set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); nfsd4_run_cb(&clp->cl_cb_null); } @@ -1086,10 +1009,11 @@ void nfsd4_probe_callback_sync(struct nfs4_client *clp) void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { - nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; spin_lock(&clp->cl_lock); memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); spin_unlock(&clp->cl_lock); + trace_nfsd_cb_state(clp); } /* @@ -1246,6 +1170,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) struct nfsd4_callback *cb = calldata; struct nfs4_client *clp = cb->cb_clp; + trace_nfsd_cb_done(clp, task->tk_status); + if (!nfsd4_cb_sequence_done(task, cb)) return; @@ -1305,9 +1231,6 @@ void nfsd4_destroy_callback_queue(void) /* must be called under the state lock */ void nfsd4_shutdown_callback(struct nfs4_client *clp) { - if (clp->cl_cb_state != NFSD4_CB_UNKNOWN) - trace_nfsd_cb_shutdown(clp); - set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); /* * Note this won't actually result in a null callback; @@ -1353,6 +1276,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) * kill the old client: */ if (clp->cl_cb_client) { + trace_nfsd_cb_shutdown(clp); rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; put_cred(clp->cl_cb_cred); @@ -1398,6 +1322,8 @@ nfsd4_run_cb_work(struct work_struct *work) struct rpc_clnt *clnt; int flags; + trace_nfsd_cb_work(clp, cb->cb_msg.rpc_proc->p_name); + if (cb->cb_need_restart) { cb->cb_need_restart = false; } else { @@ -1419,7 +1345,7 @@ nfsd4_run_cb_work(struct work_struct *work) * Don't send probe messages for 4.1 or later. */ if (!cb->cb_ops && clp->cl_minorversion) { - nfsd4_mark_cb_state(clp, NFSD4_CB_UP); + clp->cl_cb_state = NFSD4_CB_UP; nfsd41_destroy_cb(cb); return; } @@ -1445,21 +1371,11 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_holds_slot = false; } -/** - * nfsd4_run_cb - queue up a callback job to run - * @cb: callback to queue - * - * Kick off a callback to do its thing. Returns false if it was already - * on a queue, true otherwise. - */ -bool nfsd4_run_cb(struct nfsd4_callback *cb) +void nfsd4_run_cb(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - bool queued; nfsd41_cb_inflight_begin(clp); - queued = nfsd4_queue_cb(cb); - if (!queued) + if (!nfsd4_queue_cb(cb)) nfsd41_cb_inflight_end(clp); - return queued; } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5e9809aff37e..f92161ce1f97 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -41,7 +41,6 @@ #include "idmap.h" #include "nfsd.h" #include "netns.h" -#include "vfs.h" /* * Turn off idmapping when using AUTH_SYS. @@ -83,8 +82,8 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) new->id = itm->id; new->type = itm->type; - strscpy(new->name, itm->name, sizeof(new->name)); - strscpy(new->authname, itm->authname, sizeof(new->authname)); + strlcpy(new->name, itm->name, sizeof(new->name)); + strlcpy(new->authname, itm->authname, sizeof(new->authname)); } static void @@ -549,7 +548,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen return nfserr_badowner; memcpy(key.name, name, namelen); key.name[namelen] = '\0'; - strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item); if (ret == -ENOENT) return nfserr_badowner; @@ -585,7 +584,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr, int ret; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) return encode_ascii_id(xdr, id); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index e4e23b2a3e65..2673019d30ec 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -421,7 +421,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL); if (!new) return nfserr_jukebox; - memcpy(&new->lo_seg, seg, sizeof(new->lo_seg)); + memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg)); new->lo_state = ls; spin_lock(&fp->fi_lock); @@ -657,7 +657,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) ktime_t now, cutoff; const struct nfsd4_layout_ops *ops; - trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task); + switch (task->tk_status) { case 0: case -NFS4ERR_DELAY: diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2c0de247083a..e84996c3867c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -37,9 +37,6 @@ #include #include #include -#include -#include - #include #include @@ -53,16 +50,34 @@ #include "pnfs.h" #include "trace.h" -static bool inter_copy_offload_enable; -module_param(inter_copy_offload_enable, bool, 0644); -MODULE_PARM_DESC(inter_copy_offload_enable, - "Enable inter server to server copy offload. Default: false"); +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include -#ifdef CONFIG_NFSD_V4_2_INTER_SSC -static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */ -module_param(nfsd4_ssc_umount_timeout, int, 0644); -MODULE_PARM_DESC(nfsd4_ssc_umount_timeout, - "idle msecs before unmount export from source server"); +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ + struct inode *inode = d_inode(resfh->fh_dentry); + int status; + + inode_lock(inode); + status = security_inode_setsecctx(resfh->fh_dentry, + label->data, label->len); + inode_unlock(inode); + + if (status) + /* + * XXX: We should really fail the whole open, but we may + * already have created a new file, so it may be too + * late. For now this seems the least of evils: + */ + bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return; +} +#else +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ } #endif #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -129,6 +144,26 @@ is_create_with_attrs(struct nfsd4_open *open) || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); } +/* + * if error occurs when setting the acl, just clear the acl bit + * in the returned attr bitmap. + */ +static void +do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl, u32 *bmval) +{ + __be32 status; + + status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); + if (status) + /* + * We should probably fail the whole open at this point, + * but we've already created the file, so it's too late; + * So this seems the least of evils: + */ + bmval[0] &= ~FATTR4_WORD0_ACL; +} + static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -142,6 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src) static __be32 do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode) { + __be32 status; if (open->op_truncate && !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) @@ -156,7 +192,9 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs if (open->op_share_deny & NFS4_SHARE_DENY_READ) accmode |= NFSD_MAY_WRITE; - return fh_verify(rqstp, current_fh, S_IFREG, accmode); + status = fh_verify(rqstp, current_fh, S_IFREG, accmode); + + return status; } static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) @@ -185,202 +223,6 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate &resfh->fh_handle); } -static inline bool nfsd4_create_is_exclusive(int createmode) -{ - return createmode == NFS4_CREATE_EXCLUSIVE || - createmode == NFS4_CREATE_EXCLUSIVE4_1; -} - -static __be32 -nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child, - struct nfsd4_open *open) -{ - struct file *filp; - struct path path; - int oflags; - - oflags = O_CREAT | O_LARGEFILE; - switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) { - case NFS4_SHARE_ACCESS_WRITE: - oflags |= O_WRONLY; - break; - case NFS4_SHARE_ACCESS_BOTH: - oflags |= O_RDWR; - break; - default: - oflags |= O_RDONLY; - } - - path.mnt = fhp->fh_export->ex_path.mnt; - path.dentry = child; - filp = dentry_create(&path, oflags, open->op_iattr.ia_mode, - current_cred()); - if (IS_ERR(filp)) - return nfserrno(PTR_ERR(filp)); - - open->op_filp = filp; - return nfs_ok; -} - -/* - * Implement NFSv4's unchecked, guarded, and exclusive create - * semantics for regular files. Open state for this new file is - * subsequently fabricated in nfsd4_process_open2(). - * - * Upon return, caller must release @fhp and @resfhp. - */ -static __be32 -nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct svc_fh *resfhp, struct nfsd4_open *open) -{ - struct iattr *iap = &open->op_iattr; - struct nfsd_attrs attrs = { - .na_iattr = iap, - .na_seclabel = &open->op_label, - }; - struct dentry *parent, *child; - __u32 v_mtime, v_atime; - struct inode *inode; - __be32 status; - int host_err; - - if (isdotent(open->op_fname, open->op_fnamelen)) - return nfserr_exist; - if (!(iap->ia_valid & ATTR_MODE)) - iap->ia_mode = 0; - - status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (status != nfs_ok) - return status; - parent = fhp->fh_dentry; - inode = d_inode(parent); - - host_err = fh_want_write(fhp); - if (host_err) - return nfserrno(host_err); - - if (is_create_with_attrs(open)) - nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs); - - inode_lock_nested(inode, I_MUTEX_PARENT); - - child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); - if (IS_ERR(child)) { - status = nfserrno(PTR_ERR(child)); - goto out; - } - - if (d_really_is_negative(child)) { - status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); - if (status != nfs_ok) - goto out; - } - - status = fh_compose(resfhp, fhp->fh_export, child, fhp); - if (status != nfs_ok) - goto out; - - v_mtime = 0; - v_atime = 0; - if (nfsd4_create_is_exclusive(open->op_createmode)) { - u32 *verifier = (u32 *)open->op_verf.data; - - /* - * Solaris 7 gets confused (bugid 4218508) if these have - * the high bit set, as do xfs filesystems without the - * "bigtime" feature. So just clear the high bits. If this - * is ever changed to use different attrs for storing the - * verifier, then do_open_lookup() will also need to be - * fixed accordingly. - */ - v_mtime = verifier[0] & 0x7fffffff; - v_atime = verifier[1] & 0x7fffffff; - } - - if (d_really_is_positive(child)) { - status = nfs_ok; - - /* NFSv4 protocol requires change attributes even though - * no change happened. - */ - fh_fill_both_attrs(fhp); - - switch (open->op_createmode) { - case NFS4_CREATE_UNCHECKED: - if (!d_is_reg(child)) - break; - - /* - * In NFSv4, we don't want to truncate the file - * now. This would be wrong if the OPEN fails for - * some other reason. Furthermore, if the size is - * nonzero, we should ignore it according to spec! - */ - open->op_truncate = (iap->ia_valid & ATTR_SIZE) && - !iap->ia_size; - break; - case NFS4_CREATE_GUARDED: - status = nfserr_exist; - break; - case NFS4_CREATE_EXCLUSIVE: - if (d_inode(child)->i_mtime.tv_sec == v_mtime && - d_inode(child)->i_atime.tv_sec == v_atime && - d_inode(child)->i_size == 0) { - open->op_created = true; - break; /* subtle */ - } - status = nfserr_exist; - break; - case NFS4_CREATE_EXCLUSIVE4_1: - if (d_inode(child)->i_mtime.tv_sec == v_mtime && - d_inode(child)->i_atime.tv_sec == v_atime && - d_inode(child)->i_size == 0) { - open->op_created = true; - goto set_attr; /* subtle */ - } - status = nfserr_exist; - } - goto out; - } - - if (!IS_POSIXACL(inode)) - iap->ia_mode &= ~current_umask(); - - fh_fill_pre_attrs(fhp); - status = nfsd4_vfs_create(fhp, child, open); - if (status != nfs_ok) - goto out; - open->op_created = true; - fh_fill_post_attrs(fhp); - - /* A newly created file already has a file size of zero. */ - if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) - iap->ia_valid &= ~ATTR_SIZE; - if (nfsd4_create_is_exclusive(open->op_createmode)) { - iap->ia_valid = ATTR_MTIME | ATTR_ATIME | - ATTR_MTIME_SET|ATTR_ATIME_SET; - iap->ia_mtime.tv_sec = v_mtime; - iap->ia_atime.tv_sec = v_atime; - iap->ia_mtime.tv_nsec = 0; - iap->ia_atime.tv_nsec = 0; - } - -set_attr: - status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); - - if (attrs.na_labelerr) - open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - if (attrs.na_aclerr) - open->op_bmval[0] &= ~FATTR4_WORD0_ACL; -out: - inode_unlock(inode); - nfsd_attrs_free(&attrs); - if (child && !IS_ERR(child)) - dput(child); - fh_drop_write(fhp); - return status; -} - static __be32 do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh) { @@ -410,33 +252,47 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru * yes | yes | GUARDED4 | GUARDED4 */ + /* + * Note: create modes (UNCHECKED,GUARDED...) are the same + * in NFSv4 as in v3 except EXCLUSIVE4_1. + */ current->fs->umask = open->op_umask; - status = nfsd4_create_file(rqstp, current_fh, *resfh, open); + status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, + open->op_fname.len, &open->op_iattr, + *resfh, open->op_createmode, + (u32 *)open->op_verf.data, + &open->op_truncate, &open->op_created); current->fs->umask = 0; + if (!status && open->op_label.len) + nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); + /* * Following rfc 3530 14.2.16, and rfc 5661 18.16.4 * use the returned bitmask to indicate which attributes * we used to store the verifier: */ - if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0) + if (nfsd_create_is_exclusive(open->op_createmode) && status == 0) open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_MODIFY); - } else { + } else + /* + * Note this may exit with the parent still locked. + * We will hold the lock until nfsd4_open's final + * lookup, to prevent renames or unlinks until we've had + * a chance to an acquire a delegation if appropriate. + */ status = nfsd_lookup(rqstp, current_fh, - open->op_fname, open->op_fnamelen, *resfh); - if (!status) - /* NFSv4 protocol requires change attributes even though - * no change happened. - */ - fh_fill_both_attrs(current_fh); - } + open->op_fname.data, open->op_fname.len, *resfh); if (status) goto out; status = nfsd_check_obj_isreg(*resfh); if (status) goto out; + if (is_create_with_attrs(open) && open->op_acl != NULL) + do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); + nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); accmode = NFSD_MAY_NOP; if (open->op_created || @@ -452,6 +308,7 @@ static __be32 do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { struct svc_fh *current_fh = &cstate->current_fh; + __be32 status; int accmode = 0; /* We don't know the target directory, and therefore can not @@ -476,7 +333,9 @@ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, str if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) accmode = NFSD_MAY_OWNER_OVERRIDE; - return do_open_permission(rqstp, current_fh, open, accmode); + status = do_open_permission(rqstp, current_fh, open, accmode); + + return status; } static void @@ -501,12 +360,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, bool reclaim = false; dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", - (int)open->op_fnamelen, open->op_fname, + (int)open->op_fname.len, open->op_fname.data, open->op_openowner); - open->op_filp = NULL; - open->op_rqstp = rqstp; - /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; @@ -517,7 +373,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * Before RECLAIM_COMPLETE done, server should deny new lock */ if (nfsd4_has_session(cstate) && - !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags) && + !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, + &cstate->session->se_client->cl_flags) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) return nfserr_grace; @@ -559,46 +416,51 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; switch (open->op_claim_type) { - case NFS4_OPEN_CLAIM_DELEGATE_CUR: - case NFS4_OPEN_CLAIM_NULL: - status = do_open_lookup(rqstp, cstate, open, &resfh); - if (status) + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_NULL: + status = do_open_lookup(rqstp, cstate, open, &resfh); + if (status) + goto out; + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + status = nfs4_check_open_reclaim(&open->op_clientid, + cstate, nn); + if (status) + goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + reclaim = true; + fallthrough; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + status = do_open_fhandle(rqstp, cstate, open); + if (status) + goto out; + resfh = &cstate->current_fh; + break; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + dprintk("NFSD: unsupported OPEN claim type %d\n", + open->op_claim_type); + status = nfserr_notsupp; goto out; - break; - case NFS4_OPEN_CLAIM_PREVIOUS: - status = nfs4_check_open_reclaim(cstate->clp); - if (status) + default: + dprintk("NFSD: Invalid OPEN claim type %d\n", + open->op_claim_type); + status = nfserr_inval; goto out; - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - reclaim = true; - fallthrough; - case NFS4_OPEN_CLAIM_FH: - case NFS4_OPEN_CLAIM_DELEG_CUR_FH: - status = do_open_fhandle(rqstp, cstate, open); - if (status) - goto out; - resfh = &cstate->current_fh; - break; - case NFS4_OPEN_CLAIM_DELEG_PREV_FH: - case NFS4_OPEN_CLAIM_DELEGATE_PREV: - status = nfserr_notsupp; - goto out; - default: - status = nfserr_inval; - goto out; } - + /* + * nfsd4_process_open2() does the actual opening of the file. If + * successful, it (1) truncates the file if open->op_truncate was + * set, (2) sets open->op_stateid, (3) sets open->op_delegation. + */ status = nfsd4_process_open2(rqstp, resfh, open); - if (status && open->op_created) - pr_warn("nfsd4_process_open2 failed to open newly-created file: status=%u\n", - be32_to_cpu(status)); + WARN(status && open->op_created, + "nfsd4_process_open2 failed to open newly-created file! status=%u\n", + be32_to_cpu(status)); if (reclaim && !status) nn->somebody_reclaimed = true; out: - if (open->op_filp) { - fput(open->op_filp); - open->op_filp = NULL; - } if (resfh && resfh != &cstate->current_fh) { fh_dup2(&cstate->current_fh, resfh); fh_put(resfh); @@ -647,7 +509,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_put(&cstate->current_fh); cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; - memcpy(&cstate->current_fh.fh_handle.fh_raw, putfh->pf_fhval, + memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); #ifdef CONFIG_NFSD_V4_2_INTER_SSC @@ -663,9 +525,11 @@ static __be32 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - fh_put(&cstate->current_fh); + __be32 status; - return exp_pseudoroot(rqstp, &cstate->current_fh); + fh_put(&cstate->current_fh); + status = exp_pseudoroot(rqstp, &cstate->current_fh); + return status; } static __be32 @@ -724,7 +588,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data)); - nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id)); + nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id)); } static __be32 @@ -732,19 +596,10 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_commit *commit = &u->commit; - struct nfsd_file *nf; - __be32 status; - status = nfsd_file_acquire(rqstp, &cstate->current_fh, NFSD_MAY_WRITE | - NFSD_MAY_NOT_BREAK_LEASE, &nf); - if (status != nfs_ok) - return status; - - status = nfsd_commit(rqstp, &cstate->current_fh, nf, commit->co_offset, + return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, commit->co_count, (__be32 *)commit->co_verf.data); - nfsd_file_put(nf); - return status; } static __be32 @@ -752,10 +607,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_create *create = &u->create; - struct nfsd_attrs attrs = { - .na_iattr = &create->cr_iattr, - .na_seclabel = &create->cr_label, - }; struct svc_fh resfh; __be32 status; dev_t rdev; @@ -771,13 +622,12 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; - status = nfsd4_acl_to_attr(create->cr_type, create->cr_acl, &attrs); current->fs->umask = create->cr_umask; switch (create->cr_type) { case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_data, &attrs, &resfh); + create->cr_data, &resfh); break; case NF4BLK: @@ -788,7 +638,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &attrs, S_IFBLK, rdev, &resfh); + &create->cr_iattr, S_IFBLK, rdev, &resfh); break; case NF4CHR: @@ -799,26 +649,26 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &attrs, S_IFCHR, rdev, &resfh); + &create->cr_iattr,S_IFCHR, rdev, &resfh); break; case NF4SOCK: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &attrs, S_IFSOCK, 0, &resfh); + &create->cr_iattr, S_IFSOCK, 0, &resfh); break; case NF4FIFO: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &attrs, S_IFIFO, 0, &resfh); + &create->cr_iattr, S_IFIFO, 0, &resfh); break; case NF4DIR: create->cr_iattr.ia_valid &= ~ATTR_SIZE; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &attrs, S_IFDIR, 0, &resfh); + &create->cr_iattr, S_IFDIR, 0, &resfh); break; default: @@ -828,17 +678,20 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (attrs.na_labelerr) - create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - if (attrs.na_aclerr) - create->cr_bmval[0] &= ~FATTR4_WORD0_ACL; + if (create->cr_label.len) + nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + + if (create->cr_acl != NULL) + do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, + create->cr_bmval); + + fh_unlock(&cstate->current_fh); set_change_info(&create->cr_cinfo, &cstate->current_fh); fh_dup2(&cstate->current_fh, &resfh); out: fh_put(&resfh); out_umask: current->fs->umask = 0; - nfsd_attrs_free(&attrs); return status; } @@ -919,16 +772,12 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; read->rd_nf = NULL; + if (read->rd_offset >= OFFSET_MAX) + return nfserr_inval; trace_nfsd_read_start(rqstp, &cstate->current_fh, read->rd_offset, read->rd_length); - read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp)); - if (read->rd_offset > (u64)OFFSET_MAX) - read->rd_offset = (u64)OFFSET_MAX; - if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX) - read->rd_length = (u64)OFFSET_MAX - read->rd_offset; - /* * If we do a zero copy read, then a client will see read data * that reflects the state of the file *after* performing the @@ -944,7 +793,12 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, &read->rd_nf, NULL); - + if (status) { + dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); + goto out; + } + status = nfs_ok; +out: read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -1006,8 +860,10 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); - if (!status) + if (!status) { + fh_unlock(&cstate->current_fh); set_change_info(&remove->rm_cinfo, &cstate->current_fh); + } return status; } @@ -1047,6 +903,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &exp, &dentry); if (err) return err; + fh_unlock(&cstate->current_fh); if (d_really_is_negative(dentry)) { exp_put(exp); err = nfserr_noent; @@ -1101,21 +958,17 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setattr *setattr = &u->setattr; - struct nfsd_attrs attrs = { - .na_iattr = &setattr->sa_iattr, - .na_seclabel = &setattr->sa_label, - }; - struct inode *inode; __be32 status = nfs_ok; - bool save_no_wcc; int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, WR_STATE, NULL, NULL); - if (status) + if (status) { + dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; + } } err = fh_want_write(&cstate->current_fh); if (err) @@ -1127,23 +980,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - inode = cstate->current_fh.fh_dentry->d_inode; - status = nfsd4_acl_to_attr(S_ISDIR(inode->i_mode) ? NF4DIR : NF4REG, - setattr->sa_acl, &attrs); - + if (setattr->sa_acl != NULL) + status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, + setattr->sa_acl); if (status) goto out; - save_no_wcc = cstate->current_fh.fh_no_wcc; - cstate->current_fh.fh_no_wcc = true; - status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, + if (setattr->sa_label.len) + status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, + &setattr->sa_label); + if (status) + goto out; + status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time64_t)0); - cstate->current_fh.fh_no_wcc = save_no_wcc; - if (!status) - status = nfserrno(attrs.na_labelerr); - if (!status) - status = nfserrno(attrs.na_aclerr); out: - nfsd_attrs_free(&attrs); fh_drop_write(&cstate->current_fh); return status; } @@ -1168,12 +1017,15 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, stateid, WR_STATE, &nf, NULL); - if (status) + if (status) { + dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; + } write->wr_how_written = write->wr_stable_how; - nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); + nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, + &write->wr_head, write->wr_buflen); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, @@ -1200,13 +1052,17 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); - if (status) + if (status) { + dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); goto out; + } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, dst_stateid, WR_STATE, dst, NULL); - if (status) + if (status) { + dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); goto out_put_src; + } /* fix up for NFS-specific error code */ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) || @@ -1239,7 +1095,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos, + status = nfsd4_clone_file_range(src, clone->cl_src_pos, dst, clone->cl_dst_pos, clone->cl_count, EX_ISSYNC(cstate->current_fh.fh_export)); @@ -1249,17 +1105,30 @@ out: return status; } -static void nfs4_put_copy(struct nfsd4_copy *copy) +void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) return; - kfree(copy->cp_src); kfree(copy); } +static bool +check_and_set_stop_copy(struct nfsd4_copy *copy) +{ + bool value; + + spin_lock(©->cp_clp->async_lock); + value = copy->stopped; + if (!copy->stopped) + copy->stopped = true; + spin_unlock(©->cp_clp->async_lock); + return value; +} + static void nfsd4_stop_copy(struct nfsd4_copy *copy) { - if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) + /* only 1 thread should stop the copy */ + if (!check_and_set_stop_copy(copy)) kthread_stop(copy->copy_task); nfs4_put_copy(copy); } @@ -1296,88 +1165,12 @@ extern void nfs_sb_deactive(struct super_block *sb); #define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" -/* - * setup a work entry in the ssc delayed unmount list. - */ -static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, - struct nfsd4_ssc_umount_item **nsui) -{ - struct nfsd4_ssc_umount_item *ni = NULL; - struct nfsd4_ssc_umount_item *work = NULL; - struct nfsd4_ssc_umount_item *tmp; - DEFINE_WAIT(wait); - __be32 status = 0; - - *nsui = NULL; - work = kzalloc(sizeof(*work), GFP_KERNEL); -try_again: - spin_lock(&nn->nfsd_ssc_lock); - list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { - if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr))) - continue; - /* found a match */ - if (ni->nsui_busy) { - /* wait - and try again */ - prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE); - spin_unlock(&nn->nfsd_ssc_lock); - - /* allow 20secs for mount/unmount for now - revisit */ - if (kthread_should_stop() || - (freezable_schedule_timeout(20*HZ) == 0)) { - finish_wait(&nn->nfsd_ssc_waitq, &wait); - kfree(work); - return nfserr_eagain; - } - finish_wait(&nn->nfsd_ssc_waitq, &wait); - goto try_again; - } - *nsui = ni; - refcount_inc(&ni->nsui_refcnt); - spin_unlock(&nn->nfsd_ssc_lock); - kfree(work); - - /* return vfsmount in (*nsui)->nsui_vfsmount */ - return 0; - } - if (work) { - strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1); - refcount_set(&work->nsui_refcnt, 2); - work->nsui_busy = true; - list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); - *nsui = work; - } else - status = nfserr_resource; - spin_unlock(&nn->nfsd_ssc_lock); - return status; -} - -static void nfsd4_ssc_update_dul(struct nfsd_net *nn, - struct nfsd4_ssc_umount_item *nsui, - struct vfsmount *ss_mnt) -{ - spin_lock(&nn->nfsd_ssc_lock); - nsui->nsui_vfsmount = ss_mnt; - nsui->nsui_busy = false; - wake_up_all(&nn->nfsd_ssc_waitq); - spin_unlock(&nn->nfsd_ssc_lock); -} - -static void nfsd4_ssc_cancel_dul(struct nfsd_net *nn, - struct nfsd4_ssc_umount_item *nsui) -{ - spin_lock(&nn->nfsd_ssc_lock); - list_del(&nsui->nsui_list); - wake_up_all(&nn->nfsd_ssc_waitq); - spin_unlock(&nn->nfsd_ssc_lock); - kfree(nsui); -} - /* * Support one copy source server for now. */ static __be32 nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, - struct nfsd4_ssc_umount_item **nsui) + struct vfsmount **mount) { struct file_system_type *type; struct vfsmount *ss_mnt; @@ -1388,14 +1181,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, char *ipaddr, *dev_name, *raw_data; int len, raw_len; __be32 status = nfserr_inval; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); naddr = &nss->u.nl4_addr; tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, naddr->addr_len, (struct sockaddr *)&tmp_addr, sizeof(tmp_addr)); - *nsui = NULL; if (tmp_addrlen == 0) goto out_err; @@ -1438,23 +1229,14 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, goto out_free_rawdata; snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); - status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui); - if (status) - goto out_free_devname; - if ((*nsui)->nsui_vfsmount) - goto out_done; - /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data); module_put(type->owner); - if (IS_ERR(ss_mnt)) { - status = nfserr_nodev; - nfsd4_ssc_cancel_dul(nn, *nsui); + if (IS_ERR(ss_mnt)) goto out_free_devname; - } - nfsd4_ssc_update_dul(nn, *nsui, ss_mnt); -out_done: + status = 0; + *mount = ss_mnt; out_free_devname: kfree(dev_name); @@ -1478,7 +1260,7 @@ out_err: static __be32 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - struct nfsd4_copy *copy) + struct nfsd4_copy *copy, struct vfsmount **mount) { struct svc_fh *s_fh = NULL; stateid_t *s_stid = ©->cp_src_stateid; @@ -1491,14 +1273,14 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, if (status) goto out; - status = nfsd4_interssc_connect(copy->cp_src, rqstp, ©->ss_nsui); + status = nfsd4_interssc_connect(©->cp_src, rqstp, mount); if (status) goto out; s_fh = &cstate->save_fh; copy->c_fh.size = s_fh->fh_handle.fh_size; - memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_raw, copy->c_fh.size); + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); copy->stateid.seqid = cpu_to_be32(s_stid->si_generation); memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, sizeof(stateid_opaque_t)); @@ -1509,26 +1291,13 @@ out: } static void -nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp, +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd_file *dst) { - struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id); - long timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout); - - nfs42_ssc_close(filp); - fput(filp); - - spin_lock(&nn->nfsd_ssc_lock); - list_del(&nsui->nsui_list); - /* - * vfsmount can be shared by multiple exports, - * decrement refcnt. If the count drops to 1 it - * will be unmounted when nsui_expire expires. - */ - refcount_dec(&nsui->nsui_refcnt); - nsui->nsui_expire = jiffies + timeout; - list_add_tail(&nsui->nsui_list, &nn->nfsd_ssc_mount_list); - spin_unlock(&nn->nfsd_ssc_lock); + nfs42_ssc_close(src->nf_file); + fput(src->nf_file); + nfsd_file_put(dst); + mntput(ss_mnt); } #else /* CONFIG_NFSD_V4_2_INTER_SSC */ @@ -1536,13 +1305,15 @@ nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp, static __be32 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - struct nfsd4_copy *copy) + struct nfsd4_copy *copy, + struct vfsmount **mount) { + *mount = NULL; return nfserr_inval; } static void -nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp, +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd_file *dst) { } @@ -1565,21 +1336,23 @@ nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, ©->nf_dst); } +static void +nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) +{ + nfsd_file_put(src); + nfsd_file_put(dst); +} + static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { - struct nfsd4_cb_offload *cbo = - container_of(cb, struct nfsd4_cb_offload, co_cb); + struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); - kfree(cbo); + nfs4_put_copy(copy); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, struct rpc_task *task) { - struct nfsd4_cb_offload *cbo = - container_of(cb, struct nfsd4_cb_offload, co_cb); - - trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task); return 1; } @@ -1590,28 +1363,20 @@ static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) { - copy->cp_res.wr_stable_how = - test_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags) ? - NFS_FILE_SYNC : NFS_UNSTABLE; - nfsd4_copy_set_sync(copy, sync); + copy->cp_res.wr_stable_how = NFS_UNSTABLE; + copy->cp_synchronous = sync; gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net); } -static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, - struct file *dst, - struct file *src) +static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) { - errseq_t since; + struct file *dst = copy->nf_dst->nf_file; + struct file *src = copy->nf_src->nf_file; ssize_t bytes_copied = 0; - u64 bytes_total = copy->cp_count; + size_t bytes_total = copy->cp_count; u64 src_pos = copy->cp_src_pos; u64 dst_pos = copy->cp_dst_pos; - int status; - loff_t end; - /* See RFC 7862 p.67: */ - if (bytes_total == 0) - bytes_total = ULLONG_MAX; do { if (kthread_should_stop()) break; @@ -1623,29 +1388,16 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, copy->cp_res.wr_bytes_written += bytes_copied; src_pos += bytes_copied; dst_pos += bytes_copied; - } while (bytes_total > 0 && nfsd4_copy_is_async(copy)); - /* for a non-zero asynchronous copy do a commit of data */ - if (nfsd4_copy_is_async(copy) && copy->cp_res.wr_bytes_written > 0) { - since = READ_ONCE(dst->f_wb_err); - end = copy->cp_dst_pos + copy->cp_res.wr_bytes_written - 1; - status = vfs_fsync_range(dst, copy->cp_dst_pos, end, 0); - if (!status) - status = filemap_check_wb_err(dst->f_mapping, since); - if (!status) - set_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags); - } + } while (bytes_total > 0 && !copy->cp_synchronous); return bytes_copied; } -static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, - struct file *src, struct file *dst, - bool sync) +static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) { __be32 status; ssize_t bytes; - bytes = _nfsd_copy_file_range(copy, dst, src); - + bytes = _nfsd_copy_file_range(copy); /* for async copy, we ignore the error, client can always retry * to get the error */ @@ -1655,6 +1407,13 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, nfsd4_init_copy_res(copy, sync); status = nfs_ok; } + + if (!copy->cp_intra) /* Inter server SSC */ + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, + copy->nf_dst); + else + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); + return status; } @@ -1663,100 +1422,71 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; dst->cp_count = src->cp_count; - dst->cp_flags = src->cp_flags; + dst->cp_synchronous = src->cp_synchronous; memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->nf_dst = nfsd_file_get(src->nf_dst); - /* for inter, nf_src doesn't exist yet */ - if (!nfsd4_ssc_is_inter(src)) + dst->cp_intra = src->cp_intra; + if (src->cp_intra) /* for inter, file_src doesn't exist yet */ dst->nf_src = nfsd_file_get(src->nf_src); memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); - memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); + memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server)); memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); - dst->ss_nsui = src->ss_nsui; -} - -static void release_copy_files(struct nfsd4_copy *copy) -{ - if (copy->nf_src) - nfsd_file_put(copy->nf_src); - if (copy->nf_dst) - nfsd_file_put(copy->nf_dst); + dst->ss_mnt = src->ss_mnt; } static void cleanup_async_copy(struct nfsd4_copy *copy) { nfs4_free_copy_state(copy); - release_copy_files(copy); - if (copy->cp_clp) { - spin_lock(©->cp_clp->async_lock); - if (!list_empty(©->copies)) - list_del_init(©->copies); - spin_unlock(©->cp_clp->async_lock); - } + nfsd_file_put(copy->nf_dst); + if (copy->cp_intra) + nfsd_file_put(copy->nf_src); + spin_lock(©->cp_clp->async_lock); + list_del(©->copies); + spin_unlock(©->cp_clp->async_lock); nfs4_put_copy(copy); } -static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) -{ - struct nfsd4_cb_offload *cbo; - - cbo = kzalloc(sizeof(*cbo), GFP_KERNEL); - if (!cbo) - return; - - memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); - memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); - cbo->co_nfserr = nfserr; - - nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, - NFSPROC4_CLNT_CB_OFFLOAD); - trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, - &cbo->co_fh, copy->cp_count, nfserr); - nfsd4_run_cb(&cbo->co_cb); -} - -/** - * nfsd4_do_async_copy - kthread function for background server-side COPY - * @data: arguments for COPY operation - * - * Return values: - * %0: Copy operation is done. - */ static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; - __be32 nfserr; + struct nfsd4_copy *cb_copy; - if (nfsd4_ssc_is_inter(copy)) { - struct file *filp; - - filp = nfs42_ssc_open(copy->ss_nsui->nsui_vfsmount, - ©->c_fh, ©->stateid); - if (IS_ERR(filp)) { - switch (PTR_ERR(filp)) { - case -EBADF: - nfserr = nfserr_wrong_type; - break; - default: - nfserr = nfserr_offload_denied; - } + if (!copy->cp_intra) { /* Inter server SSC */ + copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); + if (!copy->nf_src) { + copy->nfserr = nfserr_serverfault; + /* ss_mnt will be unmounted by the laundromat */ + goto do_callback; + } + copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, + ©->stateid); + if (IS_ERR(copy->nf_src->nf_file)) { + copy->nfserr = nfserr_offload_denied; /* ss_mnt will be unmounted by the laundromat */ goto do_callback; } - nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, - false); - nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst); - } else { - nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, - copy->nf_dst->nf_file, false); } + copy->nfserr = nfsd4_do_copy(copy, 0); do_callback: - nfsd4_send_cb_offload(copy, nfserr); + cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); + if (!cb_copy) + goto out; + refcount_set(&cb_copy->refcount, 1); + memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); + cb_copy->cp_clp = copy->cp_clp; + cb_copy->nfserr = copy->nfserr; + memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh)); + nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, + &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); + nfsd4_run_cb(&cb_copy->cp_cb); +out: + if (!copy->cp_intra) + kfree(copy->nf_src); cleanup_async_copy(copy); return 0; } @@ -1769,12 +1499,13 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_copy *async_copy = NULL; - if (nfsd4_ssc_is_inter(copy)) { - if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) { + if (!copy->cp_intra) { /* Inter server SSC */ + if (!inter_copy_offload_enable || copy->cp_synchronous) { status = nfserr_notsupp; goto out; } - status = nfsd4_setup_inter_ssc(rqstp, cstate, copy); + status = nfsd4_setup_inter_ssc(rqstp, cstate, copy, + ©->ss_mnt); if (status) return nfserr_offload_denied; } else { @@ -1786,21 +1517,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, sizeof(struct knfsd_fh)); - if (nfsd4_copy_is_async(copy)) { + if (!copy->cp_synchronous) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out_err; - INIT_LIST_HEAD(&async_copy->copies); - refcount_set(&async_copy->refcount, 1); - async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); - if (!async_copy->cp_src) - goto out_err; if (!nfs4_init_copy_state(nn, copy)) goto out_err; - memcpy(©->cp_res.cb_stateid, ©->cp_stateid.cs_stid, + refcount_set(&async_copy->refcount, 1); + memcpy(©->cp_res.cb_stateid, ©->cp_stateid.stid, sizeof(copy->cp_res.cb_stateid)); dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, @@ -1814,24 +1541,18 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, wake_up_process(async_copy->copy_task); status = nfs_ok; } else { - status = nfsd4_do_copy(copy, copy->nf_src->nf_file, - copy->nf_dst->nf_file, true); + status = nfsd4_do_copy(copy, 1); } out: - release_copy_files(copy); return status; out_err: - if (nfsd4_ssc_is_inter(copy)) { - /* - * Source's vfsmount of inter-copy will be unmounted - * by the laundromat. Use copy instead of async_copy - * since async_copy->ss_nsui might not be set yet. - */ - refcount_dec(©->ss_nsui->nsui_refcnt); - } if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); + /* + * source's vfsmount of inter-copy will be unmounted + * by the laundromat + */ goto out; } @@ -1842,7 +1563,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid) spin_lock(&clp->async_lock); list_for_each_entry(copy, &clp->async_copies, copies) { - if (memcmp(©->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE)) + if (memcmp(©->cp_stateid.stid, stateid, NFS4_STATEID_SIZE)) continue; refcount_inc(©->refcount); spin_unlock(&clp->async_lock); @@ -1896,16 +1617,16 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cps = nfs4_alloc_init_cpntf_state(nn, stid); if (!cps) goto out; - memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.cs_stid, sizeof(stateid_t)); + memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t)); memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t)); memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t)); /* For now, only return one server address in cpn_src, the * address used by the client to connect to this server. */ - cn->cpn_src->nl4_type = NL4_NETADDR; + cn->cpn_src.nl4_type = NL4_NETADDR; status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, - &cn->cpn_src->u.nl4_addr); + &cn->cpn_src.u.nl4_addr); WARN_ON_ONCE(status); if (status) { nfs4_put_cpntf_state(nn, cps); @@ -1926,8 +1647,10 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, WR_STATE, &nf, NULL); - if (status != nfs_ok) + if (status != nfs_ok) { + dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); return status; + } status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file, fallocate->falloc_offset, @@ -1983,8 +1706,10 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, RD_STATE, &nf, NULL); - if (status) + if (status) { + dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); return status; + } switch (seek->seek_whence) { case NFS4_CONTENT_DATA: @@ -2152,7 +1877,7 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, nfserr = nfs_ok; if (gdp->gd_maxcount != 0) { nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, - rqstp, cstate->clp, gdp); + rqstp, cstate->session->se_client, gdp); } gdp->gd_notify_types &= ops->notify_types; @@ -2438,7 +2163,7 @@ nfsd4_proc_null(struct svc_rqst *rqstp) static inline void nfsd4_increment_op_stats(u32 opnum) { if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]); + nfsdstats.nfs4_opcount[opnum]++; } static const struct nfsd4_operation nfsd4_ops[]; @@ -2528,6 +2253,25 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } +static void svcxdr_init_encode(struct svc_rqst *rqstp, + struct nfsd4_compoundres *resp) +{ + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *head = buf->head; + + xdr->buf = buf; + xdr->iov = head; + xdr->p = head->iov_base + head->iov_len; + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + /* Tail and page_len should be zero at this point: */ + buf->len = buf->head[0].iov_len; + xdr->scratch.iov_len = 0; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - rqstp->rq_auth_slack; +} + #ifdef CONFIG_NFSD_V4_2_INTER_SSC static void check_if_stalefh_allowed(struct nfsd4_compoundargs *args) @@ -2555,7 +2299,7 @@ check_if_stalefh_allowed(struct nfsd4_compoundargs *args) return; } putfh = (struct nfsd4_putfh *)&saved_op->u; - if (nfsd4_ssc_is_inter(copy)) + if (!copy->cp_intra) putfh->no_verify = true; } } @@ -2582,14 +2326,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); __be32 status; - resp->xdr = &rqstp->rq_res_stream; - resp->statusp = resp->xdr->p; - - /* reserve space for: NFS status code */ - xdr_reserve_space(resp->xdr, XDR_UNIT); - + svcxdr_init_encode(rqstp, resp); + resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen); + xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; resp->rqstp = rqstp; @@ -2608,6 +2348,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) status = nfserr_minor_vers_mismatch; if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0) goto out; + status = nfserr_resource; + if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) + goto out; status = nfs41_check_op_ordering(args); if (status) { @@ -2620,20 +2363,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) rqstp->rq_lease_breaker = (void **)&cstate->clp; - trace_nfsd_compound(rqstp, args->client_opcnt); + trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; - if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { - /* If there are still more operations to process, - * stop here and report NFS4ERR_RESOURCE. */ - if (cstate->minorversion == 0 && - args->client_opcnt > resp->opcnt) { - op->status = nfserr_resource; - goto encode_op; - } - } - /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -2657,13 +2390,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } - fh_clear_pre_post_attrs(current_fh); + fh_clear_wcc(current_fh); /* If op is non-idempotent */ if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { /* * Don't execute this op if we couldn't encode a - * successful reply: + * succesful reply: */ u32 plen = op->opdesc->op_rsize_bop(rqstp, op); /* @@ -2702,15 +2435,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) encode_op: if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(resp->xdr, op); + nfsd4_encode_replay(&resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); status = op->status; } - trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, - status, nfsd4_op_name(op->opnum)); + trace_nfsd_compound_status(args->opcnt, resp->opcnt, status, + nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); nfsd4_increment_op_stats(op->opnum); @@ -2744,49 +2477,28 @@ out: #define op_encode_channel_attrs_maxsz (6 + 1 + 1) -/* - * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which - * is called before sunrpc sets rq_res.buflen. Thus we have to compute - * the maximum payload size here, based on transport limits and the size - * of the remaining space in the rq_pages array. - */ -static u32 nfsd4_max_payload(const struct svc_rqst *rqstp) -{ - u32 buflen; - - buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE; - buflen -= rqstp->rq_auth_slack; - buflen -= rqstp->rq_res.head[0].iov_len; - return min_t(u32, buflen, svc_max_payload(rqstp)); -} - -static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size) * sizeof(__be32); } -static u32 nfsd4_status_stateid_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); } -static u32 nfsd4_access_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { /* ac_supported, ac_resp_access */ return (op_encode_hdr_size + 2)* sizeof(__be32); } -static u32 nfsd4_commit_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); } -static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); @@ -2797,17 +2509,17 @@ static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp, * the op prematurely if the estimate is too large. We may turn off splice * reads unnecessarily. */ -static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) { - const u32 *bmap = op->u.getattr.ga_bmval; + u32 *bmap = op->u.getattr.ga_bmval; u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; u32 ret = 0; if (bmap0 & FATTR4_WORD0_ACL) - return nfsd4_max_payload(rqstp); + return svc_max_payload(rqstp); if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) - return nfsd4_max_payload(rqstp); + return svc_max_payload(rqstp); if (bmap1 & FATTR4_WORD1_OWNER) { ret += IDMAP_NAMESZ + 4; @@ -2835,28 +2547,24 @@ static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp, return ret; } -static u32 nfsd4_getfh_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE; } -static u32 nfsd4_link_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static u32 nfsd4_lock_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_lock_denied_maxsz) * sizeof(__be32); } -static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_stateid_maxsz + op_encode_change_info_maxsz + 1 @@ -2864,18 +2572,20 @@ static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp, + op_encode_delegation_maxsz) * sizeof(__be32); } -static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); + u32 maxcount = 0, rlen = 0; + + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.read.rd_length, maxcount); return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); + u32 maxcount = svc_max_payload(rqstp); + u32 rlen = min(op->u.read.rd_length, maxcount); /* * If we detect that the file changed during hole encoding, then we * recover by encoding the remaining reply as data. This means we need @@ -2886,77 +2596,70 @@ static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp, return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp)); + u32 maxcount = 0, rlen = 0; + + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.readdir.rd_maxcount, maxcount); return (op_encode_hdr_size + op_encode_verifier_maxsz + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static u32 nfsd4_readlink_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE; } -static u32 nfsd4_remove_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static u32 nfsd4_rename_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz + op_encode_change_info_maxsz) * sizeof(__be32); } -static u32 nfsd4_sequence_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) { return (op_encode_hdr_size + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); } -static u32 nfsd4_test_stateid_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids) * sizeof(__be32); } -static u32 nfsd4_setattr_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } -static u32 nfsd4_secinfo_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR * (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32); } -static u32 nfsd4_setclientid_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * sizeof(__be32); } -static u32 nfsd4_write_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } -static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ 1 + 1 + /* eir_flags, spr_how */\ @@ -2970,16 +2673,14 @@ static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp, 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32); } -static u32 nfsd4_bind_conn_to_session_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\ 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32); } -static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\ @@ -2988,8 +2689,7 @@ static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp, op_encode_channel_attrs_maxsz) * sizeof(__be32); } -static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* wr_callback */ + @@ -3001,16 +2701,16 @@ static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp, 1 /* cr_synchronous */) * sizeof(__be32); } -static u32 nfsd4_offload_status_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) { return (op_encode_hdr_size + 2 /* osr_count */ + 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32); } -static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) { return (op_encode_hdr_size + 3 /* cnr_lease_time */ + @@ -3025,10 +2725,12 @@ static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp, } #ifdef CONFIG_NFSD_PNFS -static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp)); + u32 maxcount = 0, rlen = 0; + + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount); return (op_encode_hdr_size + 1 /* gd_layout_type*/ + @@ -3041,8 +2743,7 @@ static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp, * so we need to define an arbitrary upper bound here. */ #define MAX_LAYOUT_SIZE 128 -static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* logr_return_on_close */ + @@ -3051,16 +2752,14 @@ static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp, MAX_LAYOUT_SIZE) * sizeof(__be32); } -static u32 nfsd4_layoutcommit_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* locr_newsize */ + 2 /* ns_size */) * sizeof(__be32); } -static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* lrs_stateid */ + @@ -3069,36 +2768,41 @@ static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp, #endif /* CONFIG_NFSD_PNFS */ -static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 3) * sizeof(__be32); } -static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) { - u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp)); + u32 maxcount, rlen; + + maxcount = svc_max_payload(rqstp); + rlen = min_t(u32, XATTR_SIZE_MAX, maxcount); return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_setxattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) { - u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp)); + u32 maxcount, rlen; + + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount); return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static u32 nfsd4_removexattr_rsize(const struct svc_rqst *rqstp, - const struct nfsd4_op *op) +static inline u32 nfsd4_removexattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); @@ -3531,7 +3235,7 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) { struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_compoundargs *argp = rqstp->rq_argp; - struct nfsd4_op *this; + struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; struct nfsd4_compound_state *cstate = &resp->cstate; struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; u32 opiter; @@ -3568,7 +3272,7 @@ int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) void warn_on_nonidempotent_op(struct nfsd4_op *op) { if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { - pr_err("unable to encode reply to nonidempotent op %u (%s)\n", + pr_err("unable to encode reply to nonidempotent op %d (%s)\n", op->opnum, nfsd4_op_name(op->opnum)); WARN_ON_ONCE(1); } @@ -3581,29 +3285,28 @@ static const char *nfsd4_op_name(unsigned opnum) return "unknown_operation"; } +#define nfsd4_voidres nfsd4_voidargs +struct nfsd4_voidargs { int dummy; }; + static const struct svc_procedure nfsd_procedures4[2] = { [NFSPROC4_NULL] = { .pc_func = nfsd4_proc_null, - .pc_decode = nfssvc_decode_voidarg, - .pc_encode = nfssvc_encode_voidres, - .pc_argsize = sizeof(struct nfsd_voidargs), - .pc_argzero = sizeof(struct nfsd_voidargs), - .pc_ressize = sizeof(struct nfsd_voidres), + .pc_decode = nfs4svc_decode_voidarg, + .pc_encode = nfs4svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd4_voidargs), + .pc_ressize = sizeof(struct nfsd4_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 1, - .pc_name = "NULL", }, [NFSPROC4_COMPOUND] = { .pc_func = nfsd4_proc_compound, .pc_decode = nfs4svc_decode_compoundargs, .pc_encode = nfs4svc_encode_compoundres, .pc_argsize = sizeof(struct nfsd4_compoundargs), - .pc_argzero = offsetof(struct nfsd4_compoundargs, iops), .pc_ressize = sizeof(struct nfsd4_compoundres), .pc_release = nfsd4_release_compoundargs, .pc_cachetype = RC_NOCACHE, .pc_xdrressize = NFSD_BUFSIZE/4, - .pc_name = "COMPOUND", }, }; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 189c622dde61..83c4e6883953 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -626,7 +626,7 @@ nfsd4_legacy_tracking_init(struct net *net) status = nfsd4_load_reboot_recovery_data(net); if (status) goto err; - pr_info("NFSD: Using legacy client tracking operations.\n"); + printk("NFSD: Using legacy client tracking operations.\n"); return 0; err: @@ -807,17 +807,17 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (get_user(namelen, &ci->cc_name.cn_len)) return -EFAULT; name.data = memdup_user(&ci->cc_name.cn_id, namelen); - if (IS_ERR(name.data)) - return PTR_ERR(name.data); + if (IS_ERR_OR_NULL(name.data)) + return -EFAULT; name.len = namelen; get_user(princhashlen, &ci->cc_princhash.cp_len); if (princhashlen > 0) { princhash.data = memdup_user( &ci->cc_princhash.cp_data, princhashlen); - if (IS_ERR(princhash.data)) { + if (IS_ERR_OR_NULL(princhash.data)) { kfree(name.data); - return PTR_ERR(princhash.data); + return -EFAULT; } princhash.len = princhashlen; } else @@ -829,8 +829,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (get_user(namelen, &cnm->cn_len)) return -EFAULT; name.data = memdup_user(&cnm->cn_id, namelen); - if (IS_ERR(name.data)) - return PTR_ERR(name.data); + if (IS_ERR_OR_NULL(name.data)) + return -EFAULT; name.len = namelen; } if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { @@ -1030,7 +1030,7 @@ nfsd4_init_cld_pipe(struct net *net) status = __nfsd4_init_cld_pipe(net); if (!status) - pr_info("NFSD: Using old nfsdcld client tracking operations.\n"); + printk("NFSD: Using old nfsdcld client tracking operations.\n"); return status; } @@ -1607,7 +1607,7 @@ nfsd4_cld_tracking_init(struct net *net) nfs4_release_reclaim(nn); goto err_remove; } else - pr_info("NFSD: Using nfsdcld client tracking operations.\n"); + printk("NFSD: Using nfsdcld client tracking operations.\n"); return 0; err_remove: @@ -1866,7 +1866,7 @@ nfsd4_umh_cltrack_init(struct net *net) ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); kfree(grace_start); if (!ret) - pr_info("NFSD: Using UMH upcall client tracking operations.\n"); + printk("NFSD: Using UMH upcall client tracking operations.\n"); return ret; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 228560f3fd0e..d402ca0b535f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -43,10 +43,6 @@ #include #include #include -#include -#include -#include - #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -86,7 +82,6 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); -static void nfsd4_file_hash_remove(struct nfs4_file *fi); /* Locking: */ @@ -128,23 +123,6 @@ static void free_session(struct nfsd4_session *); static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; -static struct workqueue_struct *laundry_wq; - -int nfsd4_create_laundry_wq(void) -{ - int rc = 0; - - laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); - if (laundry_wq == NULL) - rc = -ENOMEM; - return rc; -} - -void nfsd4_destroy_laundry_wq(void) -{ - destroy_workqueue(laundry_wq); -} - static bool is_session_dead(struct nfsd4_session *ses) { return ses->se_flags & NFS4_SESSION_DEAD; @@ -163,13 +141,6 @@ static bool is_client_expired(struct nfs4_client *clp) return clp->cl_time == 0; } -static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn, - struct nfs4_client *clp) -{ - if (clp->cl_state != NFSD4_ACTIVE) - atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0); -} - static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -179,8 +150,6 @@ static __be32 get_client_locked(struct nfs4_client *clp) if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_rpc_users); - nfsd4_dec_courtesy_client_count(nn, clp); - clp->cl_state = NFSD4_ACTIVE; return nfs_ok; } @@ -201,8 +170,6 @@ renew_client_locked(struct nfs4_client *clp) list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = ktime_get_boottime_seconds(); - nfsd4_dec_courtesy_client_count(nn, clp); - clp->cl_state = NFSD4_ACTIVE; } static void put_client_renew_locked(struct nfs4_client *clp) @@ -277,7 +244,6 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { if (fh_match(fh, &cur->nbl_fh)) { list_del_init(&cur->nbl_list); - WARN_ON(list_empty(&cur->nbl_lru)); list_del_init(&cur->nbl_lru); found = cur; break; @@ -303,7 +269,6 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); - kref_init(&nbl->nbl_kref); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, &nfsd4_cb_notify_lock_ops, NFSPROC4_CLNT_CB_NOTIFY_LOCK); @@ -312,21 +277,12 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, return nbl; } -static void -free_nbl(struct kref *kref) -{ - struct nfsd4_blocked_lock *nbl; - - nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref); - locks_release_private(&nbl->nbl_lock); - kfree(nbl); -} - static void free_blocked_lock(struct nfsd4_blocked_lock *nbl) { locks_delete_block(&nbl->nbl_lock); - kref_put(&nbl->nbl_kref, free_nbl); + locks_release_private(&nbl->nbl_lock); + kfree(nbl); } static void @@ -344,7 +300,6 @@ remove_blocked_locks(struct nfs4_lockowner *lo) struct nfsd4_blocked_lock, nbl_list); list_del_init(&nbl->nbl_list); - WARN_ON(list_empty(&nbl->nbl_lru)); list_move(&nbl->nbl_lru, &reaplist); } spin_unlock(&nn->blocked_locks_lock); @@ -369,8 +324,6 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb) static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { - trace_nfsd_cb_notify_lock_done(&zero_stateid, task); - /* * Since this is just an optimization, we don't try very hard if it * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and @@ -400,130 +353,6 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { .release = nfsd4_cb_notify_lock_release, }; -/* - * We store the NONE, READ, WRITE, and BOTH bits separately in the - * st_{access,deny}_bmap field of the stateid, in order to track not - * only what share bits are currently in force, but also what - * combinations of share bits previous opens have used. This allows us - * to enforce the recommendation in - * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that - * the server return an error if the client attempt to downgrade to a - * combination of share bits not explicable by closing some of its - * previous opens. - * - * This enforcement is arguably incomplete, since we don't keep - * track of access/deny bit combinations; so, e.g., we allow: - * - * OPEN allow read, deny write - * OPEN allow both, deny none - * DOWNGRADE allow read, deny none - * - * which we should reject. - * - * But you could also argue that our current code is already overkill, - * since it only exists to return NFS4ERR_INVAL on incorrect client - * behavior. - */ -static unsigned int -bmap_to_share_mode(unsigned long bmap) -{ - int i; - unsigned int access = 0; - - for (i = 1; i < 4; i++) { - if (test_bit(i, &bmap)) - access |= i; - } - return access; -} - -/* set share access for a given stateid */ -static inline void -set_access(u32 access, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << access; - - WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); - stp->st_access_bmap |= mask; -} - -/* clear share access for a given stateid */ -static inline void -clear_access(u32 access, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << access; - - WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); - stp->st_access_bmap &= ~mask; -} - -/* test whether a given stateid has access */ -static inline bool -test_access(u32 access, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << access; - - return (bool)(stp->st_access_bmap & mask); -} - -/* set share deny for a given stateid */ -static inline void -set_deny(u32 deny, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << deny; - - WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); - stp->st_deny_bmap |= mask; -} - -/* clear share deny for a given stateid */ -static inline void -clear_deny(u32 deny, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << deny; - - WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); - stp->st_deny_bmap &= ~mask; -} - -/* test whether a given stateid is denying specific access */ -static inline bool -test_deny(u32 deny, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << deny; - - return (bool)(stp->st_deny_bmap & mask); -} - -static int nfs4_access_to_omode(u32 access) -{ - switch (access & NFS4_SHARE_ACCESS_BOTH) { - case NFS4_SHARE_ACCESS_READ: - return O_RDONLY; - case NFS4_SHARE_ACCESS_WRITE: - return O_WRONLY; - case NFS4_SHARE_ACCESS_BOTH: - return O_RDWR; - } - WARN_ON_ONCE(1); - return O_RDONLY; -} - -static inline int -access_permit_read(struct nfs4_ol_stateid *stp) -{ - return test_access(NFS4_SHARE_ACCESS_READ, stp) || - test_access(NFS4_SHARE_ACCESS_BOTH, stp) || - test_access(NFS4_SHARE_ACCESS_WRITE, stp); -} - -static inline int -access_permit_write(struct nfs4_ol_stateid *stp) -{ - return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || - test_access(NFS4_SHARE_ACCESS_BOTH, stp); -} - static inline struct nfs4_stateowner * nfs4_get_stateowner(struct nfs4_stateowner *sop) { @@ -591,8 +420,11 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) void put_nfs4_file(struct nfs4_file *fi) { - if (refcount_dec_and_test(&fi->fi_ref)) { - nfsd4_file_hash_remove(fi); + might_lock(&state_lock); + + if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { + hlist_del_rcu(&fi->fi_hash); + spin_unlock(&state_lock); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); @@ -602,7 +434,9 @@ put_nfs4_file(struct nfs4_file *fi) static struct nfsd_file * __nfs4_get_fd(struct nfs4_file *f, int oflag) { - return nfsd_file_get(f->fi_fds[oflag]); + if (f->fi_fds[oflag]) + return nfsd_file_get(f->fi_fds[oflag]); + return NULL; } static struct nfsd_file * @@ -715,72 +549,22 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) return ret & OWNER_HASH_MASK; } -static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; +/* hash table for nfs4_file */ +#define FILE_HASH_BITS 8 +#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -static const struct rhashtable_params nfs4_file_rhash_params = { - .key_len = sizeof_field(struct nfs4_file, fi_inode), - .key_offset = offsetof(struct nfs4_file, fi_inode), - .head_offset = offsetof(struct nfs4_file, fi_rlist), - - /* - * Start with a single page hash table to reduce resizing churn - * on light workloads. - */ - .min_size = 256, - .automatic_shrinking = true, -}; - -/* - * Check if courtesy clients have conflicting access and resolve it if possible - * - * access: is op_share_access if share_access is true. - * Check if access mode, op_share_access, would conflict with - * the current deny mode of the file 'fp'. - * access: is op_share_deny if share_access is false. - * Check if the deny mode, op_share_deny, would conflict with - * current access of the file 'fp'. - * stp: skip checking this entry. - * new_stp: normal open, not open upgrade. - * - * Function returns: - * false - access/deny mode conflict with normal client. - * true - no conflict or conflict with courtesy client(s) is resolved. - */ -static bool -nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp, - struct nfs4_ol_stateid *stp, u32 access, bool share_access) +static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh) { - struct nfs4_ol_stateid *st; - bool resolvable = true; - unsigned char bmap; - struct nfsd_net *nn; - struct nfs4_client *clp; - - lockdep_assert_held(&fp->fi_lock); - list_for_each_entry(st, &fp->fi_stateids, st_perfile) { - /* ignore lock stateid */ - if (st->st_openstp) - continue; - if (st == stp && new_stp) - continue; - /* check file access against deny mode or vice versa */ - bmap = share_access ? st->st_deny_bmap : st->st_access_bmap; - if (!(access & bmap_to_share_mode(bmap))) - continue; - clp = st->st_stid.sc_client; - if (try_to_expire_client(clp)) - continue; - resolvable = false; - break; - } - if (resolvable) { - clp = stp->st_stid.sc_client; - nn = net_generic(clp->net, nfsd_net_id); - mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); - } - return resolvable; + return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0); } +static unsigned int file_hashval(struct knfsd_fh *fh) +{ + return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); +} + +static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; + static void __nfs4_file_get_access(struct nfs4_file *fp, u32 access) { @@ -984,23 +768,23 @@ out_free: * Create a unique stateid_t to represent each COPY. */ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, - unsigned char cs_type) + unsigned char sc_type) { int new_id; - stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; - stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; + stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + stid->sc_type = sc_type; idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); - stid->cs_stid.si_opaque.so_id = new_id; - stid->cs_stid.si_generation = 1; + stid->stid.si_opaque.so_id = new_id; + stid->stid.si_generation = 1; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) return 0; - stid->cs_type = cs_type; return 1; } @@ -1018,7 +802,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, if (!cps) return NULL; cps->cpntf_time = ktime_get_boottime_seconds(); - refcount_set(&cps->cp_stateid.cs_count, 1); + refcount_set(&cps->cp_stateid.sc_count, 1); if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) goto out_free; spin_lock(&nn->s2s_cp_lock); @@ -1034,12 +818,11 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; - if (copy->cp_stateid.cs_type != NFS4_COPY_STID) - return; + WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID); nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); idr_remove(&nn->s2s_cp_stateids, - copy->cp_stateid.cs_stid.si_opaque.so_id); + copy->cp_stateid.stid.si_opaque.so_id); spin_unlock(&nn->s2s_cp_lock); } @@ -1071,12 +854,7 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) static void nfs4_free_deleg(struct nfs4_stid *stid) { - struct nfs4_delegation *dp = delegstateid(stid); - - WARN_ON_ONCE(!list_empty(&stid->sc_cp_list)); - WARN_ON_ONCE(!list_empty(&dp->dl_perfile)); - WARN_ON_ONCE(!list_empty(&dp->dl_perclnt)); - WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru)); + WARN_ON(!list_empty(&stid->sc_cp_list)); kmem_cache_free(deleg_slab, stid); atomic_long_dec(&num_delegations); } @@ -1126,7 +904,7 @@ static int delegation_blocked(struct knfsd_fh *fh) } spin_unlock(&blocked_delegations_lock); } - hash = jhash(&fh->fh_raw, fh->fh_size, 0); + hash = jhash(&fh->fh_base, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && test_bit((hash>>8)&255, bd->set[0]) && test_bit((hash>>16)&255, bd->set[0])) @@ -1145,7 +923,7 @@ static void block_delegations(struct knfsd_fh *fh) u32 hash; struct bloom_pair *bd = &blocked_delegations; - hash = jhash(&fh->fh_raw, fh->fh_size, 0); + hash = jhash(&fh->fh_base, fh->fh_size, 0); spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); @@ -1159,6 +937,7 @@ static void block_delegations(struct knfsd_fh *fh) static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, + struct svc_fh *current_fh, struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; @@ -1168,7 +947,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, n = atomic_long_inc_return(&num_delegations); if (n < 0 || n > max_delegations) goto out_dec; - if (delegation_blocked(&fp->fi_fhandle)) + if (delegation_blocked(¤t_fh->fh_handle)) goto out_dec; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) @@ -1187,7 +966,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, get_clnt_odstate(odstate); dp->dl_type = NFS4_OPEN_DELEGATE_READ; dp->dl_retries = 1; - dp->dl_recalled = false; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); get_nfs4_file(fp); @@ -1366,8 +1144,6 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); - trace_nfsd_stid_revoke(&dp->dl_stid); - if (clp->cl_minorversion) { spin_lock(&clp->cl_lock); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; @@ -1392,6 +1168,108 @@ static unsigned int clientstr_hashval(struct xdr_netobj name) return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK; } +/* + * We store the NONE, READ, WRITE, and BOTH bits separately in the + * st_{access,deny}_bmap field of the stateid, in order to track not + * only what share bits are currently in force, but also what + * combinations of share bits previous opens have used. This allows us + * to enforce the recommendation of rfc 3530 14.2.19 that the server + * return an error if the client attempt to downgrade to a combination + * of share bits not explicable by closing some of its previous opens. + * + * XXX: This enforcement is actually incomplete, since we don't keep + * track of access/deny bit combinations; so, e.g., we allow: + * + * OPEN allow read, deny write + * OPEN allow both, deny none + * DOWNGRADE allow read, deny none + * + * which we should reject. + */ +static unsigned int +bmap_to_share_mode(unsigned long bmap) { + int i; + unsigned int access = 0; + + for (i = 1; i < 4; i++) { + if (test_bit(i, &bmap)) + access |= i; + } + return access; +} + +/* set share access for a given stateid */ +static inline void +set_access(u32 access, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap |= mask; +} + +/* clear share access for a given stateid */ +static inline void +clear_access(u32 access, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap &= ~mask; +} + +/* test whether a given stateid has access */ +static inline bool +test_access(u32 access, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << access; + + return (bool)(stp->st_access_bmap & mask); +} + +/* set share deny for a given stateid */ +static inline void +set_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap |= mask; +} + +/* clear share deny for a given stateid */ +static inline void +clear_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap &= ~mask; +} + +/* test whether a given stateid is denying specific access */ +static inline bool +test_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << deny; + + return (bool)(stp->st_deny_bmap & mask); +} + +static int nfs4_access_to_omode(u32 access) +{ + switch (access & NFS4_SHARE_ACCESS_BOTH) { + case NFS4_SHARE_ACCESS_READ: + return O_RDONLY; + case NFS4_SHARE_ACCESS_WRITE: + return O_WRONLY; + case NFS4_SHARE_ACCESS_BOTH: + return O_RDWR; + } + WARN_ON_ONCE(1); + return O_RDONLY; +} + /* * A stateid that had a deny mode associated with it is being released * or downgraded. Recalculate the deny mode on the file. @@ -1832,12 +1710,13 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, int numslots = fattrs->maxreqs; int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; - int i; + int mem, i; - BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) - > PAGE_SIZE); + BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) + + sizeof(struct nfsd4_session) > PAGE_SIZE); + mem = numslots * sizeof(struct nfsd4_slot *); - new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); + new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); if (!new) return NULL; /* allocate each struct nfsd4_slot and data cache in one piece */ @@ -1869,8 +1748,6 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); struct nfs4_client *clp = c->cn_session->se_client; - trace_nfsd_cb_lost(clp); - spin_lock(&clp->cl_lock); if (!list_empty(&c->cn_persession)) { list_del(&c->cn_persession); @@ -2082,16 +1959,11 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) * This type of memory management is somewhat inefficient, but we use it * anyway since SETCLIENTID is not a common operation. */ -static struct nfs4_client *alloc_client(struct xdr_netobj name, - struct nfsd_net *nn) +static struct nfs4_client *alloc_client(struct xdr_netobj name) { struct nfs4_client *clp; int i; - if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) { - mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); - return NULL; - } clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; @@ -2109,9 +1981,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name, idr_init(&clp->cl_stateids); atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; - clp->cl_state = NFSD4_ACTIVE; - atomic_inc(&nn->nfs4_client_count); - atomic_set(&clp->cl_delegs_in_recall, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); @@ -2143,7 +2012,6 @@ static void __free_client(struct kref *k) kfree(clp->cl_nii_domain.data); kfree(clp->cl_nii_name.data); idr_destroy(&clp->cl_stateids); - kfree(clp->cl_ra); kmem_cache_free(client_slab, clp); } @@ -2219,7 +2087,6 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static void __destroy_client(struct nfs4_client *clp) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; @@ -2263,8 +2130,6 @@ __destroy_client(struct nfs4_client *clp) nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); - atomic_add_unless(&nn->nfs4_client_count, -1, 0); - nfsd4_dec_courtesy_client_count(nn, clp); free_client(clp); wake_up_all(&expiry_wq); } @@ -2493,24 +2358,9 @@ static void seq_quote_mem(struct seq_file *m, char *data, int len) seq_printf(m, "\""); } -static const char *cb_state2str(int state) -{ - switch (state) { - case NFSD4_CB_UP: - return "UP"; - case NFSD4_CB_UNKNOWN: - return "UNKNOWN"; - case NFSD4_CB_DOWN: - return "DOWN"; - case NFSD4_CB_FAULT: - return "FAULT"; - } - return "UNDEFINED"; -} - static int client_info_show(struct seq_file *m, void *v) { - struct inode *inode = file_inode(m->file); + struct inode *inode = m->private; struct nfs4_client *clp; u64 clid; @@ -2520,17 +2370,6 @@ static int client_info_show(struct seq_file *m, void *v) memcpy(&clid, &clp->cl_clientid, sizeof(clid)); seq_printf(m, "clientid: 0x%llx\n", clid); seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); - - if (clp->cl_state == NFSD4_COURTESY) - seq_puts(m, "status: courtesy\n"); - else if (clp->cl_state == NFSD4_EXPIRABLE) - seq_puts(m, "status: expirable\n"); - else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) - seq_puts(m, "status: confirmed\n"); - else - seq_puts(m, "status: unconfirmed\n"); - seq_printf(m, "seconds from last renew: %lld\n", - ktime_get_boottime_seconds() - clp->cl_time); seq_printf(m, "name: "); seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); @@ -2543,14 +2382,22 @@ static int client_info_show(struct seq_file *m, void *v) seq_printf(m, "\nImplementation time: [%lld, %ld]\n", clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } - seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state)); - seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr); drop_client(clp); return 0; } -DEFINE_SHOW_ATTRIBUTE(client_info); +static int client_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, client_info_show, inode); +} + +static const struct file_operations client_info_fops = { + .open = client_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; static void *states_start(struct seq_file *s, loff_t *pos) __acquires(&clp->cl_lock) @@ -2593,7 +2440,7 @@ static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { - struct inode *inode = file_inode(f->nf_file); + struct inode *inode = f->nf_inode; seq_printf(s, "superblock: \"%02x:%02x:%ld\"", MAJOR(inode->i_sb->s_dev), @@ -2821,8 +2668,6 @@ static void force_expire_client(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); bool already_expired; - trace_nfsd_clid_admin_expired(&clp->cl_clientid); - spin_lock(&nn->client_lock); clp->cl_time = 0; spin_unlock(&nn->client_lock); @@ -2871,36 +2716,6 @@ static const struct tree_descr client_files[] = { [3] = {""}, }; -static int -nfsd4_cb_recall_any_done(struct nfsd4_callback *cb, - struct rpc_task *task) -{ - switch (task->tk_status) { - case -NFS4ERR_DELAY: - rpc_delay(task, 2 * HZ); - return 0; - default: - return 1; - } -} - -static void -nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) -{ - struct nfs4_client *clp = cb->cb_clp; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - spin_lock(&nn->client_lock); - clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); - put_client_renew_locked(clp); - spin_unlock(&nn->client_lock); -} - -static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { - .done = nfsd4_cb_recall_any_done, - .release = nfsd4_cb_recall_any_release, -}; - static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2909,9 +2724,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, int ret; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct dentry *dentries[ARRAY_SIZE(client_files)]; - clp = alloc_client(name, nn); + clp = alloc_client(name); if (clp == NULL) return NULL; @@ -2929,23 +2743,13 @@ static struct nfs4_client *create_client(struct xdr_netobj name, memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; - clp->cl_nfsd_dentry = nfsd_client_mkdir( - nn, &clp->cl_nfsdfs, - clp->cl_clientid.cl_id - nn->clientid_base, - client_files, dentries); - clp->cl_nfsd_info_dentry = dentries[0]; + clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs, + clp->cl_clientid.cl_id - nn->clientid_base, + client_files); if (!clp->cl_nfsd_dentry) { free_client(clp); return NULL; } - clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL); - if (!clp->cl_ra) { - free_client(clp); - return NULL; - } - clp->cl_ra_time = 0; - nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops, - NFSPROC4_CLNT_CB_RECALL_ANY); return clp; } @@ -3012,11 +2816,11 @@ move_to_confirmed(struct nfs4_client *clp) lockdep_assert_held(&nn->client_lock); + dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); - trace_nfsd_clid_confirmed(&clp->cl_clientid); renew_client_locked(clp); } @@ -3121,7 +2925,7 @@ out_err: static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { - struct xdr_buf *buf = resp->xdr->buf; + struct xdr_buf *buf = resp->xdr.buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; @@ -3191,7 +2995,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; __be32 status; @@ -3285,7 +3089,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " - "ip_addr=%s flags %x, spa_how %u\n", + "ip_addr=%s flags %x, spa_how %d\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); @@ -3332,7 +3136,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_nolock; } new->cl_mach_cred = true; - break; case SP4_NONE: break; default: /* checked by xdr code */ @@ -3369,24 +3172,20 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } /* case 6 */ exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; - trace_nfsd_clid_confirmed_r(conf); goto out_copy; } if (!creds_match) { /* case 3 */ if (client_has_state(conf)) { status = nfserr_clid_inuse; - trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } goto out_new; } if (verfs_match) { /* case 2 */ conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; - trace_nfsd_clid_confirmed_r(conf); goto out_copy; } /* case 5, client reboot */ - trace_nfsd_clid_verf_mismatch(conf, rqstp, &verf); conf = NULL; goto out_new; } @@ -3396,19 +3195,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - unconf = find_unconfirmed_client_by_name(&exid->clname, nn); + unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ unhash_client_locked(unconf); - /* case 1, new owner ID */ - trace_nfsd_clid_fresh(new); - + /* case 1 (normal case) */ out_new: if (conf) { status = mark_client_expired_locked(conf); if (status) goto out; - trace_nfsd_clid_replaced(&conf->cl_clientid); } new->cl_minorversion = cstate->minorversion; new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0]; @@ -3432,10 +3228,8 @@ out: out_nolock: if (new) expire_client(new); - if (unconf) { - trace_nfsd_clid_expire_unconf(&unconf->cl_clientid); + if (unconf) expire_client(unconf); - } return status; } @@ -3627,10 +3421,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } } else if (unconf) { - status = nfserr_clid_inuse; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { - trace_nfsd_clid_cred_mismatch(unconf, rqstp); + status = nfserr_clid_inuse; goto out_free_conn; } status = nfserr_wrong_cred; @@ -3650,7 +3443,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, old = NULL; goto out_free_conn; } - trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); conf = unconf; @@ -3675,8 +3467,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* cache solo and embedded create sessions under the client_lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); spin_unlock(&nn->client_lock); - if (conf == unconf) - fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); /* init connection and backchannel */ nfsd4_init_conn(rqstp, conn, new); nfsd4_put_session(new); @@ -3950,7 +3740,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_sequence *seq = &u->sequence; struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_session *session; struct nfs4_client *clp; struct nfsd4_slot *slot; @@ -4120,7 +3910,6 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, status = nfserr_wrong_cred; goto out; } - trace_nfsd_clid_destroyed(&clp->cl_clientid); unhash_client_locked(clp); out: spin_unlock(&nn->client_lock); @@ -4134,7 +3923,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; - struct nfs4_client *clp = cstate->clp; __be32 status = 0; if (rc->rca_one_fs) { @@ -4148,11 +3936,12 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, } status = nfserr_complete_already; - if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) + if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, + &cstate->session->se_client->cl_flags)) goto out; status = nfserr_stale_clientid; - if (is_client_expired(clp)) + if (is_client_expired(cstate->session->se_client)) /* * The following error isn't really legal. * But we only get here if the client just explicitly @@ -4163,9 +3952,8 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, goto out; status = nfs_ok; - trace_nfsd_clid_reclaim_complete(&clp->cl_clientid); - nfsd4_client_record_create(clp); - inc_reclaim_complete(clp); + nfsd4_client_record_create(cstate->session->se_client); + inc_reclaim_complete(cstate->session->se_client); out: return status; } @@ -4185,29 +3973,27 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, new = create_client(clname, rqstp, &clverifier); if (new == NULL) return nfserr_jukebox; + /* Cases below refer to rfc 3530 section 14.2.33: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); if (conf && client_has_state(conf)) { + /* case 0: */ status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) goto out; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - trace_nfsd_clid_cred_mismatch(conf, rqstp); + trace_nfsd_clid_inuse_err(conf); goto out; } } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) unhash_client_locked(unconf); - if (conf) { - if (same_verf(&conf->cl_verifier, &clverifier)) { - copy_clid(new, conf); - gen_confirm(new, nn); - } else - trace_nfsd_clid_verf_mismatch(conf, rqstp, - &clverifier); - } else - trace_nfsd_clid_fresh(new); + /* We need to handle only case 1: probable callback update */ + if (conf && same_verf(&conf->cl_verifier, &clverifier)) { + copy_clid(new, conf); + gen_confirm(new, nn); + } new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); @@ -4220,13 +4006,12 @@ out: spin_unlock(&nn->client_lock); if (new) free_client(new); - if (unconf) { - trace_nfsd_clid_expire_unconf(&unconf->cl_clientid); + if (unconf) expire_client(unconf); - } return status; } + __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -4255,27 +4040,25 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, * Nevertheless, RFC 7530 recommends INUSE for this case: */ status = nfserr_clid_inuse; - if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { - trace_nfsd_clid_cred_mismatch(unconf, rqstp); + if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) goto out; - } - if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - trace_nfsd_clid_cred_mismatch(conf, rqstp); + if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) goto out; - } + /* cases below refer to rfc 3530 section 14.2.34: */ if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { if (conf && same_verf(&confirm, &conf->cl_confirm)) { + /* case 2: probable retransmit */ status = nfs_ok; - } else + } else /* case 4: client hasn't noticed we rebooted yet? */ status = nfserr_stale_clientid; goto out; } status = nfs_ok; - if (conf) { + if (conf) { /* case 1: callback update */ old = unconf; unhash_client_locked(old); nfsd4_change_callback(conf, &unconf->cl_cb_conn); - } else { + } else { /* case 3: normal case; new or rebooted client */ old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = nfserr_clid_inuse; @@ -4290,15 +4073,12 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, old = NULL; goto out; } - trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); conf = unconf; } get_client_locked(conf); spin_unlock(&nn->client_lock); - if (conf == unconf) - fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); nfsd4_probe_callback(conf); spin_lock(&nn->client_lock); put_client_renew_locked(conf); @@ -4315,26 +4095,27 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ - -static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp) +static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, + struct nfs4_file *fp) { + lockdep_assert_held(&state_lock); + refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); INIT_LIST_HEAD(&fp->fi_clnt_odstate); - fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle); + fh_copy_shallow(&fp->fi_fhandle, fh); fp->fi_deleg_file = NULL; fp->fi_had_conflict = false; fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); - fp->fi_aliased = false; - fp->fi_inode = d_inode(fh->fh_dentry); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); #endif + hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); } void @@ -4398,51 +4179,6 @@ out: return -ENOMEM; } -static unsigned long -nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) -{ - int count; - struct nfsd_net *nn = container_of(shrink, - struct nfsd_net, nfsd_client_shrinker); - - count = atomic_read(&nn->nfsd_courtesy_clients); - if (!count) - count = atomic_long_read(&num_delegations); - if (count) - queue_work(laundry_wq, &nn->nfsd_shrinker_work); - return (unsigned long)count; -} - -static unsigned long -nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) -{ - return SHRINK_STOP; -} - -void -nfsd4_init_leases_net(struct nfsd_net *nn) -{ - struct sysinfo si; - u64 max_clients; - - nn->nfsd4_lease = 90; /* default lease time */ - nn->nfsd4_grace = 90; - nn->somebody_reclaimed = false; - nn->track_reclaim_completes = false; - nn->clverifier_counter = prandom_u32(); - nn->clientid_base = prandom_u32(); - nn->clientid_counter = nn->clientid_base + 1; - nn->s2s_cp_cl_id = nn->clientid_counter++; - - atomic_set(&nn->nfs4_client_count, 0); - si_meminfo(&si); - max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024); - max_clients *= NFS4_CLIENTS_PER_GB; - nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); - - atomic_set(&nn->nfsd_courtesy_clients, 0); -} - static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; @@ -4711,80 +4447,55 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) nfs4_put_stid(&last->st_stid); } -static noinline_for_stack struct nfs4_file * -nfsd4_file_hash_lookup(const struct svc_fh *fhp) +/* search file_hashtbl[] for file */ +static struct nfs4_file * +find_file_locked(struct knfsd_fh *fh, unsigned int hashval) { - struct inode *inode = d_inode(fhp->fh_dentry); - struct rhlist_head *tmp, *list; - struct nfs4_file *fi; + struct nfs4_file *fp; - rcu_read_lock(); - list = rhltable_lookup(&nfs4_file_rhltable, &inode, - nfs4_file_rhash_params); - rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { - if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { - if (refcount_inc_not_zero(&fi->fi_ref)) { - rcu_read_unlock(); - return fi; - } + hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, + lockdep_is_held(&state_lock)) { + if (fh_match(&fp->fi_fhandle, fh)) { + if (refcount_inc_not_zero(&fp->fi_ref)) + return fp; } } - rcu_read_unlock(); return NULL; } -/* - * On hash insertion, identify entries with the same inode but - * distinct filehandles. They will all be on the list returned - * by rhltable_lookup(). - * - * inode->i_lock prevents racing insertions from adding an entry - * for the same inode/fhp pair twice. - */ -static noinline_for_stack struct nfs4_file * -nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) +struct nfs4_file * +find_file(struct knfsd_fh *fh) { - struct inode *inode = d_inode(fhp->fh_dentry); - struct rhlist_head *tmp, *list; - struct nfs4_file *ret = NULL; - bool alias_found = false; - struct nfs4_file *fi; - int err; + struct nfs4_file *fp; + unsigned int hashval = file_hashval(fh); rcu_read_lock(); - spin_lock(&inode->i_lock); - - list = rhltable_lookup(&nfs4_file_rhltable, &inode, - nfs4_file_rhash_params); - rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { - if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { - if (refcount_inc_not_zero(&fi->fi_ref)) - ret = fi; - } else - fi->fi_aliased = alias_found = true; - } - if (ret) - goto out_unlock; - - nfsd4_file_init(fhp, new); - err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, - nfs4_file_rhash_params); - if (err) - goto out_unlock; - - new->fi_aliased = alias_found; - ret = new; - -out_unlock: - spin_unlock(&inode->i_lock); + fp = find_file_locked(fh, hashval); rcu_read_unlock(); - return ret; + return fp; } -static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) +static struct nfs4_file * +find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh) { - rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, - nfs4_file_rhash_params); + struct nfs4_file *fp; + unsigned int hashval = file_hashval(fh); + + rcu_read_lock(); + fp = find_file_locked(fh, hashval); + rcu_read_unlock(); + if (fp) + return fp; + + spin_lock(&state_lock); + fp = find_file_locked(fh, hashval); + if (likely(fp == NULL)) { + nfsd4_init_file(fh, hashval, new); + fp = new; + } + spin_unlock(&state_lock); + + return fp; } /* @@ -4797,10 +4508,9 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = nfsd4_file_hash_lookup(current_fh); + fp = find_file(¤t_fh->fh_handle); if (!fp) return ret; - /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); if (fp->fi_share_deny & deny_type) @@ -4810,35 +4520,6 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) return ret; } -static bool nfsd4_deleg_present(const struct inode *inode) -{ - struct file_lock_context *ctx = locks_inode_context(inode); - - return ctx && !list_empty_careful(&ctx->flc_lease); -} - -/** - * nfsd_wait_for_delegreturn - wait for delegations to be returned - * @rqstp: the RPC transaction being executed - * @inode: in-core inode of the file being waited for - * - * The timeout prevents deadlock if all nfsd threads happen to be - * tied up waiting for returning delegations. - * - * Return values: - * %true: delegation was returned - * %false: timed out waiting for delegreturn - */ -bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode) -{ - long __maybe_unused timeo; - - timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode), - NFSD_DELEGRETURN_TIMEOUT); - trace_nfsd_delegret_wakeup(rqstp, inode, timeo); - return timeo > 0; -} - static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); @@ -4867,8 +4548,6 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, { struct nfs4_delegation *dp = cb_to_delegation(cb); - trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task); - if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID || dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) return 1; @@ -4914,30 +4593,22 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the - * flc_lock) we know the server hasn't removed the lease yet, and + * i_lock) we know the server hasn't removed the lease yet, and * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); + nfsd4_run_cb(&dp->dl_recall); } -/* Called from break_lease() with flc_lock held. */ +/* Called from break_lease() with i_lock held. */ static bool nfsd_break_deleg_cb(struct file_lock *fl) { + bool ret = false; struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; - struct nfs4_client *clp = dp->dl_stid.sc_client; - struct nfsd_net *nn; - trace_nfsd_cb_recall(&dp->dl_stid); - - dp->dl_recalled = true; - atomic_inc(&clp->cl_delegs_in_recall); - if (try_to_expire_client(clp)) { - nn = net_generic(clp->net, nfsd_net_id); - mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); - } + trace_nfsd_deleg_break(&dp->dl_stid.sc_stateid); /* * We don't want the locks code to timeout the lease for us; @@ -4946,9 +4617,11 @@ nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; + spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); - return false; + spin_unlock(&fp->fi_lock); + return ret; } /** @@ -4979,14 +4652,9 @@ static int nfsd_change_deleg_cb(struct file_lock *onlist, int arg, struct list_head *dispose) { - struct nfs4_delegation *dp = (struct nfs4_delegation *)onlist->fl_owner; - struct nfs4_client *clp = dp->dl_stid.sc_client; - - if (arg & F_UNLCK) { - if (dp->dl_recalled) - atomic_dec(&clp->cl_delegs_in_recall); + if (arg & F_UNLCK) return lease_modify(onlist, arg, dispose); - } else + else return -EAGAIN; } @@ -5007,37 +4675,40 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 return nfserr_bad_seqid; } -static struct nfs4_client *lookup_clientid(clientid_t *clid, bool sessions, - struct nfsd_net *nn) +static __be32 lookup_clientid(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn, + bool sessions) { struct nfs4_client *found; - spin_lock(&nn->client_lock); - found = find_confirmed_client(clid, sessions, nn); - if (found) - atomic_inc(&found->cl_rpc_users); - spin_unlock(&nn->client_lock); - return found; -} - -static __be32 set_client(clientid_t *clid, - struct nfsd4_compound_state *cstate, - struct nfsd_net *nn) -{ if (cstate->clp) { - if (!same_clid(&cstate->clp->cl_clientid, clid)) + found = cstate->clp; + if (!same_clid(&found->cl_clientid, clid)) return nfserr_stale_clientid; return nfs_ok; } + if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; + /* - * We're in the 4.0 case (otherwise the SEQUENCE op would have - * set cstate->clp), so session = false: + * For v4.1+ we get the client in the SEQUENCE op. If we don't have one + * cached already then we know this is for is for v4.0 and "sessions" + * will be false. */ - cstate->clp = lookup_clientid(clid, false, nn); - if (!cstate->clp) + WARN_ON_ONCE(cstate->session); + spin_lock(&nn->client_lock); + found = find_confirmed_client(clid, sessions, nn); + if (!found) { + spin_unlock(&nn->client_lock); return nfserr_expired; + } + atomic_inc(&found->cl_rpc_users); + spin_unlock(&nn->client_lock); + + /* Cache the nfs4_client in cstate! */ + cstate->clp = found; return nfs_ok; } @@ -5051,6 +4722,8 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, struct nfs4_openowner *oo = NULL; __be32 status; + if (STALE_CLIENTID(&open->op_clientid, nn)) + return nfserr_stale_clientid; /* * In case we need it later, after we've already created the * file and don't want to risk a further failure: @@ -5059,7 +4732,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - status = set_client(clientid, cstate, nn); + status = lookup_clientid(clientid, cstate, nn, false); if (status) return status; clp = cstate->clp; @@ -5183,19 +4856,16 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, .ia_valid = ATTR_SIZE, .ia_size = 0, }; - struct nfsd_attrs attrs = { - .na_iattr = &iattr, - }; if (!open->op_truncate) return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0); + return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, - struct nfsd4_open *open, bool new_stp) + struct nfsd4_open *open) { struct nfsd_file *nf = NULL; __be32 status; @@ -5211,13 +4881,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, */ status = nfs4_file_check_deny(fp, open->op_share_deny); if (status != nfs_ok) { - if (status != nfserr_share_denied) { - spin_unlock(&fp->fi_lock); - goto out; - } - if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, - stp, open->op_share_deny, false)) - status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } @@ -5225,13 +4888,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, /* set access to the file */ status = nfs4_file_get_access(fp, open->op_share_access); if (status != nfs_ok) { - if (status != nfserr_share_denied) { - spin_unlock(&fp->fi_lock); - goto out; - } - if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, - stp, open->op_share_access, true)) - status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } @@ -5247,12 +4903,9 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - - status = nfsd_file_acquire_opened(rqstp, cur_fh, access, - open->op_filp, &nf); - if (status != nfs_ok) + status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); + if (status) goto out_put_access; - spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { fp->fi_fds[oflag] = nf; @@ -5281,30 +4934,21 @@ out_put_access: } static __be32 -nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, - struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, - struct nfsd4_open *open) +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { __be32 status; unsigned char old_deny_bmap = stp->st_deny_bmap; if (!test_access(open->op_share_access, stp)) - return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false); + return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); /* test and set deny mode */ spin_lock(&fp->fi_lock); status = nfs4_file_check_deny(fp, open->op_share_deny); - switch (status) { - case nfs_ok: + if (status == nfs_ok) { set_deny(open->op_share_deny, stp); fp->fi_share_deny |= - (open->op_share_deny & NFS4_SHARE_DENY_BOTH); - break; - case nfserr_share_denied: - if (nfs4_resolve_deny_conflicts_locked(fp, false, - stp, open->op_share_deny, false)) - status = nfserr_jukebox; - break; + (open->op_share_deny & NFS4_SHARE_DENY_BOTH); } spin_unlock(&fp->fi_lock); @@ -5348,118 +4992,11 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, return fl; } -static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, - struct nfs4_file *fp) -{ - struct nfs4_ol_stateid *st; - struct file *f = fp->fi_deleg_file->nf_file; - struct inode *ino = locks_inode(f); - int writes; - - writes = atomic_read(&ino->i_writecount); - if (!writes) - return 0; - /* - * There could be multiple filehandles (hence multiple - * nfs4_files) referencing this file, but that's not too - * common; let's just give up in that case rather than - * trying to go look up all the clients using that other - * nfs4_file as well: - */ - if (fp->fi_aliased) - return -EAGAIN; - /* - * If there's a close in progress, make sure that we see it - * clear any fi_fds[] entries before we see it decrement - * i_writecount: - */ - smp_mb__after_atomic(); - - if (fp->fi_fds[O_WRONLY]) - writes--; - if (fp->fi_fds[O_RDWR]) - writes--; - if (writes > 0) - return -EAGAIN; /* There may be non-NFSv4 writers */ - /* - * It's possible there are non-NFSv4 write opens in progress, - * but if they haven't incremented i_writecount yet then they - * also haven't called break lease yet; so, they'll break this - * lease soon enough. So, all that's left to check for is NFSv4 - * opens: - */ - spin_lock(&fp->fi_lock); - list_for_each_entry(st, &fp->fi_stateids, st_perfile) { - if (st->st_openstp == NULL /* it's an open */ && - access_permit_write(st) && - st->st_stid.sc_client != clp) { - spin_unlock(&fp->fi_lock); - return -EAGAIN; - } - } - spin_unlock(&fp->fi_lock); - /* - * There's a small chance that we could be racing with another - * NFSv4 open. However, any open that hasn't added itself to - * the fi_stateids list also hasn't called break_lease yet; so, - * they'll break this lease soon enough. - */ - return 0; -} - -/* - * It's possible that between opening the dentry and setting the delegation, - * that it has been renamed or unlinked. Redo the lookup to verify that this - * hasn't happened. - */ -static int -nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, - struct svc_fh *parent) -{ - struct svc_export *exp; - struct dentry *child; - __be32 err; - - err = nfsd_lookup_dentry(open->op_rqstp, parent, - open->op_fname, open->op_fnamelen, - &exp, &child); - - if (err) - return -EAGAIN; - - exp_put(exp); - dput(child); - if (child != file_dentry(fp->fi_deleg_file->nf_file)) - return -EAGAIN; - - return 0; -} - -/* - * We avoid breaking delegations held by a client due to its own activity, but - * clearing setuid/setgid bits on a write is an implicit activity and the client - * may not notice and continue using the old mode. Avoid giving out a delegation - * on setuid/setgid files when the client is requesting an open for write. - */ -static int -nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) -{ - struct inode *inode = file_inode(nf->nf_file); - - if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) && - (inode->i_mode & (S_ISUID|S_ISGID))) - return -EAGAIN; - return 0; -} - static struct nfs4_delegation * -nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, - struct svc_fh *parent) +nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, + struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) { int status = 0; - struct nfs4_client *clp = stp->st_stid.sc_client; - struct nfs4_file *fp = stp->st_stid.sc_file; - struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf; struct file_lock *fl; @@ -5474,19 +5011,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, nf = find_readable_file(fp); if (!nf) { - /* - * We probably could attempt another open and get a read - * delegation, but for now, don't bother until the - * client actually sends us one. - */ - return ERR_PTR(-EAGAIN); + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return ERR_PTR(-EBADF); } spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) status = -EAGAIN; - else if (nfsd4_verify_setuid_write(open, nf)) - status = -EAGAIN; else if (!fp->fi_deleg_file) { fp->fi_deleg_file = nf; /* increment early to prevent fi_deleg_file from being @@ -5503,7 +5035,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, return ERR_PTR(status); status = -ENOMEM; - dp = alloc_init_deleg(clp, fp, odstate); + dp = alloc_init_deleg(clp, fp, fh, odstate); if (!dp) goto out_delegees; @@ -5517,31 +5049,12 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (status) goto out_clnt_odstate; - if (parent) { - status = nfsd4_verify_deleg_dentry(open, fp, parent); - if (status) - goto out_unlock; - } - - status = nfsd4_check_conflicting_opens(clp, fp); - if (status) - goto out_unlock; - - /* - * Now that the deleg is set, check again to ensure that nothing - * raced in and changed the mode while we weren't lookng. - */ - status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file); - if (status) - goto out_unlock; - - status = -EAGAIN; - if (fp->fi_had_conflict) - goto out_unlock; - spin_lock(&state_lock); spin_lock(&fp->fi_lock); - status = hash_delegation_locked(dp, fp); + if (fp->fi_had_conflict) + status = -EAGAIN; + else + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -5587,13 +5100,12 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, - struct svc_fh *currentfh) +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, + struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); struct nfs4_client *clp = stp->st_stid.sc_client; - struct svc_fh *parent = NULL; int cb_up; int status = 0; @@ -5607,8 +5119,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: - parent = currentfh; - fallthrough; case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's @@ -5619,11 +5129,22 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: goto out_no_deleg; } - dp = nfs4_set_delegation(open, stp, parent); + dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate); if (IS_ERR(dp)) goto out_no_deleg; @@ -5665,18 +5186,6 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, */ } -/** - * nfsd4_process_open2 - finish open processing - * @rqstp: the RPC transaction being executed - * @current_fh: NFSv4 COMPOUND's current filehandle - * @open: OPEN arguments - * - * If successful, (1) truncate the file if open->op_truncate was - * set, (2) set open->op_stateid, (3) set open->op_delegation. - * - * Returns %nfs_ok on success; otherwise an nfs4stat value in - * network byte order is returned. - */ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { @@ -5693,9 +5202,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = nfsd4_file_hash_insert(open->op_file, current_fh); - if (unlikely(!fp)) - return nfserr_jukebox; + fp = find_or_add_file(open->op_file, ¤t_fh->fh_handle); if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -5728,7 +5235,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } } else { - status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true); + status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); @@ -5757,7 +5264,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(open, stp, &resp->cstate.current_fh); + nfs4_open_delegation(current_fh, open, stp); nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); @@ -5815,14 +5322,17 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); trace_nfsd_clid_renew(clid); - status = set_client(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) - return status; + goto out; clp = cstate->clp; + status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) - return nfserr_cb_path_down; - return nfs_ok; + goto out; + status = nfs_ok; +out: + return status; } void @@ -5883,245 +5393,66 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) return true; } -struct laundry_time { - time64_t cutoff; - time64_t new_timeo; -}; - -static bool state_expired(struct laundry_time *lt, time64_t last_refresh) -{ - time64_t time_remaining; - - if (last_refresh < lt->cutoff) - return true; - time_remaining = last_refresh - lt->cutoff; - lt->new_timeo = min(lt->new_timeo, time_remaining); - return false; -} - -#ifdef CONFIG_NFSD_V4_2_INTER_SSC -void nfsd4_ssc_init_umount_work(struct nfsd_net *nn) -{ - spin_lock_init(&nn->nfsd_ssc_lock); - INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list); - init_waitqueue_head(&nn->nfsd_ssc_waitq); -} -EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work); - -/* - * This is called when nfsd is being shutdown, after all inter_ssc - * cleanup were done, to destroy the ssc delayed unmount list. - */ -static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) -{ - struct nfsd4_ssc_umount_item *ni = NULL; - struct nfsd4_ssc_umount_item *tmp; - - spin_lock(&nn->nfsd_ssc_lock); - list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { - list_del(&ni->nsui_list); - spin_unlock(&nn->nfsd_ssc_lock); - mntput(ni->nsui_vfsmount); - kfree(ni); - spin_lock(&nn->nfsd_ssc_lock); - } - spin_unlock(&nn->nfsd_ssc_lock); -} - -static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) -{ - bool do_wakeup = false; - struct nfsd4_ssc_umount_item *ni = NULL; - struct nfsd4_ssc_umount_item *tmp; - - spin_lock(&nn->nfsd_ssc_lock); - list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { - if (time_after(jiffies, ni->nsui_expire)) { - if (refcount_read(&ni->nsui_refcnt) > 1) - continue; - - /* mark being unmount */ - ni->nsui_busy = true; - spin_unlock(&nn->nfsd_ssc_lock); - mntput(ni->nsui_vfsmount); - spin_lock(&nn->nfsd_ssc_lock); - - /* waiters need to start from begin of list */ - list_del(&ni->nsui_list); - kfree(ni); - - /* wakeup ssc_connect waiters */ - do_wakeup = true; - continue; - } - break; - } - if (do_wakeup) - wake_up_all(&nn->nfsd_ssc_waitq); - spin_unlock(&nn->nfsd_ssc_lock); -} -#endif - -/* Check if any lock belonging to this lockowner has any blockers */ -static bool -nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) -{ - struct file_lock_context *ctx; - struct nfs4_ol_stateid *stp; - struct nfs4_file *nf; - - list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { - nf = stp->st_stid.sc_file; - ctx = locks_inode_context(nf->fi_inode); - if (!ctx) - continue; - if (locks_owner_has_blockers(ctx, lo)) - return true; - } - return false; -} - -static bool -nfs4_anylock_blockers(struct nfs4_client *clp) -{ - int i; - struct nfs4_stateowner *so; - struct nfs4_lockowner *lo; - - if (atomic_read(&clp->cl_delegs_in_recall)) - return true; - spin_lock(&clp->cl_lock); - for (i = 0; i < OWNER_HASH_SIZE; i++) { - list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i], - so_strhash) { - if (so->so_is_open_owner) - continue; - lo = lockowner(so); - if (nfs4_lockowner_has_blockers(lo)) { - spin_unlock(&clp->cl_lock); - return true; - } - } - } - spin_unlock(&clp->cl_lock); - return false; -} - -static void -nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, - struct laundry_time *lt) -{ - unsigned int maxreap, reapcnt = 0; - struct list_head *pos, *next; - struct nfs4_client *clp; - - maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ? - NFSD_CLIENT_MAX_TRIM_PER_RUN : 0; - INIT_LIST_HEAD(reaplist); - spin_lock(&nn->client_lock); - list_for_each_safe(pos, next, &nn->client_lru) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_state == NFSD4_EXPIRABLE) - goto exp_client; - if (!state_expired(lt, clp->cl_time)) - break; - if (!atomic_read(&clp->cl_rpc_users)) { - if (clp->cl_state == NFSD4_ACTIVE) - atomic_inc(&nn->nfsd_courtesy_clients); - clp->cl_state = NFSD4_COURTESY; - } - if (!client_has_state(clp)) - goto exp_client; - if (!nfs4_anylock_blockers(clp)) - if (reapcnt >= maxreap) - continue; -exp_client: - if (!mark_client_expired_locked(clp)) { - list_add(&clp->cl_lru, reaplist); - reapcnt++; - } - } - spin_unlock(&nn->client_lock); -} - -static void -nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn, - struct list_head *reaplist) -{ - unsigned int maxreap = 0, reapcnt = 0; - struct list_head *pos, *next; - struct nfs4_client *clp; - - maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN; - INIT_LIST_HEAD(reaplist); - - spin_lock(&nn->client_lock); - list_for_each_safe(pos, next, &nn->client_lru) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_state == NFSD4_ACTIVE) - break; - if (reapcnt >= maxreap) - break; - if (!mark_client_expired_locked(clp)) { - list_add(&clp->cl_lru, reaplist); - reapcnt++; - } - } - spin_unlock(&nn->client_lock); -} - -static void -nfs4_process_client_reaplist(struct list_head *reaplist) -{ - struct list_head *pos, *next; - struct nfs4_client *clp; - - list_for_each_safe(pos, next, reaplist) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - trace_nfsd_clid_purged(&clp->cl_clientid); - list_del_init(&clp->cl_lru); - expire_client(clp); - } -} - static time64_t nfs4_laundromat(struct nfsd_net *nn) { + struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; - struct laundry_time lt = { - .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease, - .new_timeo = nn->nfsd4_lease - }; + time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease; + time64_t t, new_timeo = nn->nfsd4_lease; struct nfs4_cpntf_state *cps; copy_stateid_t *cps_t; int i; if (clients_still_reclaiming(nn)) { - lt.new_timeo = 0; + new_timeo = 0; goto out; } nfsd4_end_grace(nn); + INIT_LIST_HEAD(&reaplist); spin_lock(&nn->s2s_cp_lock); idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID && - state_expired(<, cps->cpntf_time)) + if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && + cps->cpntf_time < cutoff) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); - nfs4_get_client_reaplist(nn, &reaplist, <); - nfs4_process_client_reaplist(&reaplist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_time > cutoff) { + t = clp->cl_time - cutoff; + new_timeo = min(new_timeo, t); + break; + } + if (mark_client_expired_locked(clp)) { + trace_nfsd_clid_expired(&clp->cl_clientid); + continue; + } + list_add(&clp->cl_lru, &reaplist); + } + spin_unlock(&nn->client_lock); + list_for_each_safe(pos, next, &reaplist) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + trace_nfsd_clid_purged(&clp->cl_clientid); + list_del_init(&clp->cl_lru); + expire_client(clp); + } spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (!state_expired(<, dp->dl_time)) + if (dp->dl_time > cutoff) { + t = dp->dl_time - cutoff; + new_timeo = min(new_timeo, t); break; + } WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } @@ -6137,8 +5468,11 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); - if (!state_expired(<, oo->oo_time)) + if (oo->oo_time > cutoff) { + t = oo->oo_time - cutoff; + new_timeo = min(new_timeo, t); break; + } list_del_init(&oo->oo_close_lru); stp = oo->oo_last_closed_stid; oo->oo_last_closed_stid = NULL; @@ -6164,8 +5498,11 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); - if (!state_expired(<, nbl->nbl_time)) + if (nbl->nbl_time > cutoff) { + t = nbl->nbl_time - cutoff; + new_timeo = min(new_timeo, t); break; + } list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } @@ -6177,14 +5514,12 @@ nfs4_laundromat(struct nfsd_net *nn) list_del_init(&nbl->nbl_lru); free_blocked_lock(nbl); } -#ifdef CONFIG_NFSD_V4_2_INTER_SSC - /* service the server-to-server copy delayed unmount list */ - nfsd4_ssc_expire_umount(nn); -#endif out: - return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); + new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); + return new_timeo; } +static struct workqueue_struct *laundry_wq; static void laundromat_main(struct work_struct *); static void @@ -6199,63 +5534,6 @@ laundromat_main(struct work_struct *laundry) queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } -static void -courtesy_client_reaper(struct nfsd_net *nn) -{ - struct list_head reaplist; - - nfs4_get_courtesy_client_reaplist(nn, &reaplist); - nfs4_process_client_reaplist(&reaplist); -} - -static void -deleg_reaper(struct nfsd_net *nn) -{ - struct list_head *pos, *next; - struct nfs4_client *clp; - struct list_head cblist; - - INIT_LIST_HEAD(&cblist); - spin_lock(&nn->client_lock); - list_for_each_safe(pos, next, &nn->client_lru) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_state != NFSD4_ACTIVE || - list_empty(&clp->cl_delegations) || - atomic_read(&clp->cl_delegs_in_recall) || - test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || - (ktime_get_boottime_seconds() - - clp->cl_ra_time < 5)) { - continue; - } - list_add(&clp->cl_ra_cblist, &cblist); - - /* release in nfsd4_cb_recall_any_release */ - atomic_inc(&clp->cl_rpc_users); - set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); - clp->cl_ra_time = ktime_get_boottime_seconds(); - } - spin_unlock(&nn->client_lock); - - while (!list_empty(&cblist)) { - clp = list_first_entry(&cblist, struct nfs4_client, - cl_ra_cblist); - list_del_init(&clp->cl_ra_cblist); - clp->cl_ra->ra_keep = 0; - clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); - nfsd4_run_cb(&clp->cl_ra->ra_cb); - } -} - -static void -nfsd4_state_shrinker_worker(struct work_struct *work) -{ - struct nfsd_net *nn = container_of(work, struct nfsd_net, - nfsd_shrinker_work); - - courtesy_client_reaper(nn); - deleg_reaper(nn); -} - static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) @@ -6263,6 +5541,21 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) return nfs_ok; } +static inline int +access_permit_read(struct nfs4_ol_stateid *stp) +{ + return test_access(NFS4_SHARE_ACCESS_READ, stp) || + test_access(NFS4_SHARE_ACCESS_BOTH, stp) || + test_access(NFS4_SHARE_ACCESS_WRITE, stp); +} + +static inline int +access_permit_write(struct nfs4_ol_stateid *stp) +{ + return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || + test_access(NFS4_SHARE_ACCESS_BOTH, stp); +} + static __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) { @@ -6399,7 +5692,6 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; - struct nfs4_stid *stid; bool return_revoked = false; /* @@ -6414,7 +5706,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return nfserr_bad_stateid; - status = set_client(&stateid->si_opaque.so_clid, cstate, nn); + status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn, + false); if (status == nfserr_stale_clientid) { if (cstate->session) return nfserr_bad_stateid; @@ -6422,16 +5715,15 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, } if (status) return status; - stid = find_stateid_by_type(cstate->clp, stateid, typemask); - if (!stid) + *s = find_stateid_by_type(cstate->clp, stateid, typemask); + if (!*s) return nfserr_bad_stateid; - if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { - nfs4_put_stid(stid); + if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + nfs4_put_stid(*s); if (cstate->minorversion) return nfserr_deleg_revoked; return nfserr_bad_stateid; } - *s = stid; return nfs_ok; } @@ -6496,12 +5788,12 @@ out: static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { - WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID); - if (!refcount_dec_and_test(&cps->cp_stateid.cs_count)) + WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID); + if (!refcount_dec_and_test(&cps->cp_stateid.sc_count)) return; list_del(&cps->cp_list); idr_remove(&nn->s2s_cp_stateids, - cps->cp_stateid.cs_stid.si_opaque.so_id); + cps->cp_stateid.stid.si_opaque.so_id); kfree(cps); } /* @@ -6523,12 +5815,12 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, if (cps_t) { state = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) { + if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) { state = NULL; goto unlock; } if (!clp) - refcount_inc(&state->cp_stateid.cs_count); + refcount_inc(&state->cp_stateid.sc_count); else _free_cpntf_state_locked(nn, state); } @@ -6546,27 +5838,21 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, { __be32 status; struct nfs4_cpntf_state *cps = NULL; - struct nfs4_client *found; + struct nfsd4_compound_state cstate; status = manage_cpntf_state(nn, st, NULL, &cps); if (status) return status; cps->cpntf_time = ktime_get_boottime_seconds(); - - status = nfserr_expired; - found = lookup_clientid(&cps->cp_p_clid, true, nn); - if (!found) + memset(&cstate, 0, sizeof(cstate)); + status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true); + if (status) goto out; - - *stid = find_stateid_by_type(found, &cps->cp_p_stateid, - NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID); - if (*stid) - status = nfs_ok; - else - status = nfserr_bad_stateid; - - put_client_renew(found); + status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid, + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + stid, nn); + put_client_renew(cstate.clp); out: nfs4_put_cpntf_state(nn, cps); return status; @@ -6601,11 +5887,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, return nfserr_grace; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { - if (cstid) - status = nfserr_bad_stateid; - else - status = check_special_stateids(net, fhp, stateid, - flags); + status = check_special_stateids(net, fhp, stateid, flags); goto done; } @@ -6659,7 +5941,7 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; - struct nfs4_client *cl = cstate->clp; + struct nfs4_client *cl = cstate->session->se_client; list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) stateid->ts_id_status = @@ -6705,7 +5987,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; struct nfs4_delegation *dp; - struct nfs4_client *cl = cstate->clp; + struct nfs4_client *cl = cstate->session->se_client; __be32 ret = nfserr_bad_stateid; spin_lock(&cl->cl_lock); @@ -7034,8 +6316,6 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto put_stateid; - trace_nfsd_deleg_return(stateid); - wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); put_stateid: nfs4_put_stid(&dp->dl_stid); @@ -7043,6 +6323,15 @@ out: return status; } +static inline u64 +end_offset(u64 start, u64 len) +{ + u64 end; + + end = start + len; + return end >= start ? end: NFS4_MAX_UINT64; +} + /* last octet in a range */ static inline u64 last_byte_offset(u64 start, u64 len) @@ -7072,7 +6361,7 @@ nfs4_transform_lock_offset(struct file_lock *lock) } static fl_owner_t -nfsd4_lm_get_owner(fl_owner_t owner) +nfsd4_fl_get_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; @@ -7081,7 +6370,7 @@ nfsd4_lm_get_owner(fl_owner_t owner) } static void -nfsd4_lm_put_owner(fl_owner_t owner) +nfsd4_fl_put_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; @@ -7089,29 +6378,6 @@ nfsd4_lm_put_owner(fl_owner_t owner) nfs4_put_stateowner(&lo->lo_owner); } -/* return pointer to struct nfs4_client if client is expirable */ -static bool -nfsd4_lm_lock_expirable(struct file_lock *cfl) -{ - struct nfs4_lockowner *lo = (struct nfs4_lockowner *)cfl->fl_owner; - struct nfs4_client *clp = lo->lo_owner.so_client; - struct nfsd_net *nn; - - if (try_to_expire_client(clp)) { - nn = net_generic(clp->net, nfsd_net_id); - mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); - return true; - } - return false; -} - -/* schedule laundromat to run immediately and wait for it to complete */ -static void -nfsd4_lm_expire_lock(void) -{ - flush_workqueue(laundry_wq); -} - static void nfsd4_lm_notify(struct file_lock *fl) { @@ -7131,19 +6397,14 @@ nfsd4_lm_notify(struct file_lock *fl) } spin_unlock(&nn->blocked_locks_lock); - if (queue) { - trace_nfsd_cb_notify_lock(lo, nbl); + if (queue) nfsd4_run_cb(&nbl->nbl_cb); - } } static const struct lock_manager_operations nfsd_posix_mng_ops = { - .lm_mod_owner = THIS_MODULE, .lm_notify = nfsd4_lm_notify, - .lm_get_owner = nfsd4_lm_get_owner, - .lm_put_owner = nfsd4_lm_put_owner, - .lm_lock_expirable = nfsd4_lm_lock_expirable, - .lm_expire_lock = nfsd4_lm_expire_lock, + .lm_get_owner = nfsd4_fl_get_owner, + .lm_put_owner = nfsd4_fl_put_owner, }; static inline void @@ -7458,9 +6719,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ memcpy(&lock->lk_new_clientid, - &cstate->clp->cl_clientid, + &cstate->session->se_client->cl_clientid, sizeof(clientid_t)); + status = nfserr_stale_clientid; + if (STALE_CLIENTID(&lock->lk_new_clientid, nn)) + goto out; + /* validate and update open stateid and open seqid */ status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, @@ -7498,9 +6763,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!locks_in_grace(net) && lock->lk_reclaim) goto out; - if (lock->lk_reclaim) - fl_flags |= FL_RECLAIM; - fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: @@ -7537,16 +6799,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - /* - * Most filesystems with their own ->lock operations will block - * the nfsd thread waiting to acquire the lock. That leads to - * deadlocks (we don't want every nfsd thread tied up waiting - * for file locks), so don't attempt blocking lock notifications - * on those filesystems: - */ - if (nf->nf_file->f_op->lock) - fl_flags &= ~FL_SLEEP; - nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { dprintk("NFSD: %s: unable to allocate block!\n", __func__); @@ -7577,7 +6829,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); - kref_get(&nbl->nbl_kref); spin_unlock(&nn->blocked_locks_lock); } @@ -7590,7 +6841,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED: - kref_put(&nbl->nbl_kref, free_nbl); nbl = NULL; fallthrough; case -EAGAIN: /* conflock holds conflicting lock */ @@ -7611,13 +6861,8 @@ out: /* dequeue it if we queued it before */ if (fl_flags & FL_SLEEP) { spin_lock(&nn->blocked_locks_lock); - if (!list_empty(&nbl->nbl_list) && - !list_empty(&nbl->nbl_lru)) { - list_del_init(&nbl->nbl_list); - list_del_init(&nbl->nbl_lru); - kref_put(&nbl->nbl_kref, free_nbl); - } - /* nbl can use one of lists to be linked to reaplist */ + list_del_init(&nbl->nbl_list); + list_del_init(&nbl->nbl_lru); spin_unlock(&nn->blocked_locks_lock); } free_blocked_lock(nbl); @@ -7658,22 +6903,21 @@ out: static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct nfsd_file *nf; - struct inode *inode; __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; - inode = fhp->fh_dentry->d_inode; - inode_lock(inode); /* to block new leases till after test_lock: */ - err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + fh_lock(fhp); /* to block new leases till after test_lock: */ + err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode, + NFSD_MAY_READ)); if (err) goto out; lock->fl_file = nf->nf_file; err = nfserrno(vfs_test_lock(nf->nf_file, lock)); lock->fl_file = NULL; out: - inode_unlock(inode); + fh_unlock(fhp); nfsd_file_put(nf); return err; } @@ -7698,7 +6942,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; if (!nfsd4_has_session(cstate)) { - status = set_client(&lockt->lt_clientid, cstate, nn); + status = lookup_clientid(&lockt->lt_clientid, cstate, nn, + false); if (status) goto out; } @@ -7835,20 +7080,18 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock *fl; int status = false; - struct nfsd_file *nf; + struct nfsd_file *nf = find_any_file(fp); struct inode *inode; struct file_lock_context *flctx; - spin_lock(&fp->fi_lock); - nf = find_any_file_locked(fp); if (!nf) { /* Any valid lock stateid should have some sort of access */ WARN_ON_ONCE(1); - goto out; + return status; } inode = locks_inode(nf->nf_file); - flctx = locks_inode_context(inode); + flctx = inode->i_flctx; if (flctx && !list_empty_careful(&flctx->flc_posix)) { spin_lock(&flctx->flc_lock); @@ -7860,62 +7103,57 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) } spin_unlock(&flctx->flc_lock); } -out: - spin_unlock(&fp->fi_lock); + nfsd_file_put(nf); return status; } -/** - * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations - * @rqstp: RPC transaction - * @cstate: NFSv4 COMPOUND state - * @u: RELEASE_LOCKOWNER arguments - * - * Check if theree are any locks still held and if not - free the lockowner - * and any lock state that is owned. - * - * Return values: - * %nfs_ok: lockowner released or not found - * %nfserr_locks_held: lockowner still in use - * %nfserr_stale_clientid: clientid no longer active - * %nfserr_expired: clientid not recognized - */ __be32 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); clientid_t *clid = &rlockowner->rl_clientid; + struct nfs4_stateowner *sop; + struct nfs4_lockowner *lo = NULL; struct nfs4_ol_stateid *stp; - struct nfs4_lockowner *lo; - struct nfs4_client *clp; - LIST_HEAD(reaplist); + struct xdr_netobj *owner = &rlockowner->rl_owner; + unsigned int hashval = ownerstr_hashval(owner); __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp; + LIST_HEAD (reaplist); dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - status = set_client(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) return status; + clp = cstate->clp; - + /* Find the matching lock stateowner */ spin_lock(&clp->cl_lock); - lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); - if (!lo) { - spin_unlock(&clp->cl_lock); - return nfs_ok; - } + list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval], + so_strhash) { - list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_stid.sc_file, lo)) { + if (sop->so_is_open_owner || !same_owner_str(sop, owner)) + continue; + + if (atomic_read(&sop->so_count) != 1) { spin_unlock(&clp->cl_lock); - nfs4_put_stateowner(&lo->lo_owner); return nfserr_locks_held; } + + lo = lockowner(sop); + nfs4_get_stateowner(sop); + break; } + if (!lo) { + spin_unlock(&clp->cl_lock); + return status; + } + unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, @@ -7925,11 +7163,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); - free_ol_stateid_reaplist(&reaplist); remove_blocked_locks(lo); nfs4_put_stateowner(&lo->lo_owner); - return nfs_ok; + + return status; } static inline struct nfs4_client_reclaim * @@ -8018,13 +7256,25 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) return NULL; } +/* +* Called from OPEN. Look for clientid in reclaim list. +*/ __be32 -nfs4_check_open_reclaim(struct nfs4_client *clp) +nfs4_check_open_reclaim(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn) { - if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) + __be32 status; + + /* find clientid in conf_id_hashtbl */ + status = lookup_clientid(clid, cstate, nn, false); + if (status) + return nfserr_reclaim_bad; + + if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags)) return nfserr_no_grace; - if (nfsd4_client_record_check(clp)) + if (nfsd4_client_record_check(cstate->clp)) return nfserr_reclaim_bad; return nfs_ok; @@ -8095,20 +7345,10 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - - if (register_shrinker(&nn->nfsd_client_shrinker)) - goto err_shrinker; return 0; -err_shrinker: - put_net(net); - kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8180,18 +7420,22 @@ nfs4_state_start(void) { int ret; - ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params); - if (ret) - return ret; - - ret = nfsd4_create_callback_queue(); - if (ret) { - rhltable_destroy(&nfs4_file_rhltable); - return ret; + laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); + if (laundry_wq == NULL) { + ret = -ENOMEM; + goto out; } + ret = nfsd4_create_callback_queue(); + if (ret) + goto out_free_laundry; set_max_delegations(); return 0; + +out_free_laundry: + destroy_workqueue(laundry_wq); +out: + return ret; } void @@ -8201,8 +7445,6 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - unregister_shrinker(&nn->nfsd_client_shrinker); - cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -8222,16 +7464,13 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); -#ifdef CONFIG_NFSD_V4_2_INTER_SSC - nfsd4_ssc_shutdown_umount(nn); -#endif } void nfs4_state_shutdown(void) { + destroy_workqueue(laundry_wq); nfsd4_destroy_callback_queue(); - rhltable_destroy(&nfs4_file_rhltable); } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5a68c6286492..dbfa24cf3390 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -42,8 +42,6 @@ #include #include #include -#include - #include #include "idmap.h" @@ -56,8 +54,6 @@ #include "pnfs.h" #include "filecache.h" -#include "trace.h" - #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #include #endif @@ -94,8 +90,6 @@ check_filename(char *str, int len) if (len == 0) return nfserr_inval; - if (len > NFS4_MAXNAMLEN) - return nfserr_nametoolong; if (isdotent(str, len)) return nfserr_badname; for (i = 0; i < len; i++) @@ -104,6 +98,122 @@ check_filename(char *str, int len) return 0; } +#define DECODE_HEAD \ + __be32 *p; \ + __be32 status +#define DECODE_TAIL \ + status = 0; \ +out: \ + return status; \ +xdr_error: \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ + status = nfserr_bad_xdr; \ + goto out + +#define READMEM(x,nbytes) do { \ + x = (char *)p; \ + p += XDR_QUADLEN(nbytes); \ +} while (0) +#define SAVEMEM(x,nbytes) do { \ + if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ + savemem(argp, p, nbytes) : \ + (char *)p)) { \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ + goto xdr_error; \ + } \ + p += XDR_QUADLEN(nbytes); \ +} while (0) +#define COPYMEM(x,nbytes) do { \ + memcpy((x), p, nbytes); \ + p += XDR_QUADLEN(nbytes); \ +} while (0) + +/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */ +#define READ_BUF(nbytes) do { \ + if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \ + p = argp->p; \ + argp->p += XDR_QUADLEN(nbytes); \ + } else if (!(p = read_buf(argp, nbytes))) { \ + dprintk("NFSD: xdr error (%s:%d)\n", \ + __FILE__, __LINE__); \ + goto xdr_error; \ + } \ +} while (0) + +static void next_decode_page(struct nfsd4_compoundargs *argp) +{ + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + XDR_QUADLEN(argp->pagelen); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } +} + +static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) +{ + /* We want more bytes than seem to be available. + * Maybe we need a new page, maybe we have just run out + */ + unsigned int avail = (char *)argp->end - (char *)argp->p; + __be32 *p; + + if (argp->pagelen == 0) { + struct kvec *vec = &argp->rqstp->rq_arg.tail[0]; + + if (!argp->tail) { + argp->tail = true; + avail = vec->iov_len; + argp->p = vec->iov_base; + argp->end = vec->iov_base + avail; + } + + if (avail < nbytes) + return NULL; + + p = argp->p; + argp->p += XDR_QUADLEN(nbytes); + return p; + } + + if (avail + argp->pagelen < nbytes) + return NULL; + if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */ + return NULL; + /* ok, we can do it with the current plus the next page */ + if (nbytes <= sizeof(argp->tmp)) + p = argp->tmp; + else { + kfree(argp->tmpp); + p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL); + if (!p) + return NULL; + + } + /* + * The following memcpy is safe because read_buf is always + * called with nbytes > avail, and the two cases above both + * guarantee p points to at least nbytes bytes. + */ + memcpy(p, argp->p, avail); + next_decode_page(argp); + memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); + argp->p += XDR_QUADLEN(nbytes - avail); + return p; +} + +static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp) +{ + unsigned int this = (char *)argp->end - (char *)argp->p; + + return this + argp->pagelen; +} + static int zero_clientid(clientid_t *clid) { return (clid->cl_boot == 0) && (clid->cl_id == 0); @@ -149,246 +259,118 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) return p; } -static void * -svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len) +static __be32 +svcxdr_construct_vector(struct nfsd4_compoundargs *argp, struct kvec *head, + struct page ***pagelist, u32 buflen) { - __be32 *tmp; + int avail; + int len; + int pages; - /* - * The location of the decoded data item is stable, - * so @p is OK to use. This is the common case. + /* Sorry .. no magic macros for this.. * + * READ_BUF(write->wr_buflen); + * SAVEMEM(write->wr_buf, write->wr_buflen); */ - if (p != argp->xdr->scratch.iov_base) - return p; + avail = (char *)argp->end - (char *)argp->p; + if (avail + argp->pagelen < buflen) { + dprintk("NFSD: xdr error (%s:%d)\n", + __FILE__, __LINE__); + return nfserr_bad_xdr; + } + head->iov_base = argp->p; + head->iov_len = avail; + *pagelist = argp->pagelist; - tmp = svcxdr_tmpalloc(argp, len); - if (!tmp) + len = XDR_QUADLEN(buflen) << 2; + if (len >= avail) { + len -= avail; + + pages = len >> PAGE_SHIFT; + argp->pagelist += pages; + argp->pagelen -= pages * PAGE_SIZE; + len -= pages * PAGE_SIZE; + + next_decode_page(argp); + } + argp->p += XDR_QUADLEN(len); + + return 0; +} + +/** + * savemem - duplicate a chunk of memory for later processing + * @argp: NFSv4 compound argument structure to be freed with + * @p: pointer to be duplicated + * @nbytes: length to be duplicated + * + * Returns a pointer to a copy of @nbytes bytes of memory at @p + * that are preserved until processing of the NFSv4 compound + * operation described by @argp finishes. + */ +static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) +{ + void *ret; + + ret = svcxdr_tmpalloc(argp, nbytes); + if (!ret) return NULL; - memcpy(tmp, p, len); - return tmp; -} - -/* - * NFSv4 basic data type decoders - */ - -/* - * This helper handles variable-length opaques which belong to protocol - * elements that this implementation does not support. - */ -static __be32 -nfsd4_decode_ignored_string(struct nfsd4_compoundargs *argp, u32 maxlen) -{ - u32 len; - - if (xdr_stream_decode_u32(argp->xdr, &len) < 0) - return nfserr_bad_xdr; - if (maxlen && len > maxlen) - return nfserr_bad_xdr; - if (!xdr_inline_decode(argp->xdr, len)) - return nfserr_bad_xdr; - - return nfs_ok; + memcpy(ret, p, nbytes); + return ret; } static __be32 -nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) +nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv) { - __be32 *p; - u32 len; + DECODE_HEAD; - if (xdr_stream_decode_u32(argp->xdr, &len) < 0) - return nfserr_bad_xdr; - if (len == 0 || len > NFS4_OPAQUE_LIMIT) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, len); - if (!p) - return nfserr_bad_xdr; - o->data = svcxdr_savemem(argp, p, len); - if (!o->data) - return nfserr_jukebox; - o->len = len; - - return nfs_ok; -} - -static __be32 -nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp) -{ - __be32 *p, status; - - if (xdr_stream_decode_u32(argp->xdr, lenp) < 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, *lenp); - if (!p) - return nfserr_bad_xdr; - status = check_filename((char *)p, *lenp); - if (status) - return status; - *namp = svcxdr_savemem(argp, p, *lenp); - if (!*namp) - return nfserr_jukebox; - - return nfs_ok; -} - -static __be32 -nfsd4_decode_nfstime4(struct nfsd4_compoundargs *argp, struct timespec64 *tv) -{ - __be32 *p; - - p = xdr_inline_decode(argp->xdr, XDR_UNIT * 3); - if (!p) - return nfserr_bad_xdr; + READ_BUF(12); p = xdr_decode_hyper(p, &tv->tv_sec); tv->tv_nsec = be32_to_cpup(p++); if (tv->tv_nsec >= (u32)1000000000) return nfserr_inval; - return nfs_ok; + + DECODE_TAIL; } static __be32 -nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf) +nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) { - __be32 *p; + u32 bmlen; + DECODE_HEAD; - p = xdr_inline_decode(argp->xdr, NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_bad_xdr; - memcpy(verf->data, p, sizeof(verf->data)); - return nfs_ok; -} + bmval[0] = 0; + bmval[1] = 0; + bmval[2] = 0; -/** - * nfsd4_decode_bitmap4 - Decode an NFSv4 bitmap4 - * @argp: NFSv4 compound argument structure - * @bmval: pointer to an array of u32's to decode into - * @bmlen: size of the @bmval array - * - * The server needs to return nfs_ok rather than nfserr_bad_xdr when - * encountering bitmaps containing bits it does not recognize. This - * includes bits in bitmap words past WORDn, where WORDn is the last - * bitmap WORD the implementation currently supports. Thus we are - * careful here to simply ignore bits in bitmap words that this - * implementation has yet to support explicitly. - * - * Return values: - * %nfs_ok: @bmval populated successfully - * %nfserr_bad_xdr: the encoded bitmap was invalid - */ -static __be32 -nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen) -{ - ssize_t status; + READ_BUF(4); + bmlen = be32_to_cpup(p++); + if (bmlen > 1000) + goto xdr_error; - status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen); - return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok; + READ_BUF(bmlen << 2); + if (bmlen > 0) + bmval[0] = be32_to_cpup(p++); + if (bmlen > 1) + bmval[1] = be32_to_cpup(p++); + if (bmlen > 2) + bmval[2] = be32_to_cpup(p++); + + DECODE_TAIL; } static __be32 -nfsd4_decode_nfsace4(struct nfsd4_compoundargs *argp, struct nfs4_ace *ace) +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label, int *umask) { - __be32 *p, status; - u32 length; - - if (xdr_stream_decode_u32(argp->xdr, &ace->type) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &ace->flag) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &ace->access_mask) < 0) - return nfserr_bad_xdr; - - if (xdr_stream_decode_u32(argp->xdr, &length) < 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, length); - if (!p) - return nfserr_bad_xdr; - ace->whotype = nfs4_acl_get_whotype((char *)p, length); - if (ace->whotype != NFS4_ACL_WHO_NAMED) - status = nfs_ok; - else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) - status = nfsd_map_name_to_gid(argp->rqstp, - (char *)p, length, &ace->who_gid); - else - status = nfsd_map_name_to_uid(argp->rqstp, - (char *)p, length, &ace->who_uid); - - return status; -} - -/* A counted array of nfsace4's */ -static noinline __be32 -nfsd4_decode_acl(struct nfsd4_compoundargs *argp, struct nfs4_acl **acl) -{ - struct nfs4_ace *ace; - __be32 status; - u32 count; - - if (xdr_stream_decode_u32(argp->xdr, &count) < 0) - return nfserr_bad_xdr; - - if (count > xdr_stream_remaining(argp->xdr) / 20) - /* - * Even with 4-byte names there wouldn't be - * space for that many aces; something fishy is - * going on: - */ - return nfserr_fbig; - - *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(count)); - if (*acl == NULL) - return nfserr_jukebox; - - (*acl)->naces = count; - for (ace = (*acl)->aces; ace < (*acl)->aces + count; ace++) { - status = nfsd4_decode_nfsace4(argp, ace); - if (status) - return status; - } - - return nfs_ok; -} - -static noinline __be32 -nfsd4_decode_security_label(struct nfsd4_compoundargs *argp, - struct xdr_netobj *label) -{ - u32 lfs, pi, length; - __be32 *p; - - if (xdr_stream_decode_u32(argp->xdr, &lfs) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &pi) < 0) - return nfserr_bad_xdr; - - if (xdr_stream_decode_u32(argp->xdr, &length) < 0) - return nfserr_bad_xdr; - if (length > NFS4_MAXLABELLEN) - return nfserr_badlabel; - p = xdr_inline_decode(argp->xdr, length); - if (!p) - return nfserr_bad_xdr; - label->len = length; - label->data = svcxdr_dupstr(argp, p, length); - if (!label->data) - return nfserr_jukebox; - - return nfs_ok; -} - -static __be32 -nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, - struct iattr *iattr, struct nfs4_acl **acl, - struct xdr_netobj *label, int *umask) -{ - unsigned int starting_pos; - u32 attrlist4_count; - __be32 *p, status; + int expected_len, len = 0; + u32 dummy32; + char *buf; + DECODE_HEAD; iattr->ia_valid = 0; - status = nfsd4_decode_bitmap4(argp, bmval, bmlen); - if (status) - return nfserr_bad_xdr; + if ((status = nfsd4_decode_bitmap(argp, bmval))) + return status; if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 @@ -398,69 +380,96 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, return nfserr_attrnotsupp; } - if (xdr_stream_decode_u32(argp->xdr, &attrlist4_count) < 0) - return nfserr_bad_xdr; - starting_pos = xdr_stream_pos(argp->xdr); + READ_BUF(4); + expected_len = be32_to_cpup(p++); if (bmval[0] & FATTR4_WORD0_SIZE) { - u64 size; - - if (xdr_stream_decode_u64(argp->xdr, &size) < 0) - return nfserr_bad_xdr; - iattr->ia_size = size; + READ_BUF(8); + len += 8; + p = xdr_decode_hyper(p, &iattr->ia_size); iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - status = nfsd4_decode_acl(argp, acl); - if (status) - return status; + u32 nace; + struct nfs4_ace *ace; + + READ_BUF(4); len += 4; + nace = be32_to_cpup(p++); + + if (nace > compoundargs_bytes_left(argp)/20) + /* + * Even with 4-byte names there wouldn't be + * space for that many aces; something fishy is + * going on: + */ + return nfserr_fbig; + + *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace)); + if (*acl == NULL) + return nfserr_jukebox; + + (*acl)->naces = nace; + for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { + READ_BUF(16); len += 16; + ace->type = be32_to_cpup(p++); + ace->flag = be32_to_cpup(p++); + ace->access_mask = be32_to_cpup(p++); + dummy32 = be32_to_cpup(p++); + READ_BUF(dummy32); + len += XDR_QUADLEN(dummy32) << 2; + READMEM(buf, dummy32); + ace->whotype = nfs4_acl_get_whotype(buf, dummy32); + status = nfs_ok; + if (ace->whotype != NFS4_ACL_WHO_NAMED) + ; + else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) + status = nfsd_map_name_to_gid(argp->rqstp, + buf, dummy32, &ace->who_gid); + else + status = nfsd_map_name_to_uid(argp->rqstp, + buf, dummy32, &ace->who_uid); + if (status) + return status; + } } else *acl = NULL; if (bmval[1] & FATTR4_WORD1_MODE) { - u32 mode; - - if (xdr_stream_decode_u32(argp->xdr, &mode) < 0) - return nfserr_bad_xdr; - iattr->ia_mode = mode; + READ_BUF(4); + len += 4; + iattr->ia_mode = be32_to_cpup(p++); iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { - u32 length; - - if (xdr_stream_decode_u32(argp->xdr, &length) < 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, length); - if (!p) - return nfserr_bad_xdr; - status = nfsd_map_name_to_uid(argp->rqstp, (char *)p, length, - &iattr->ia_uid); - if (status) + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); + READ_BUF(dummy32); + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) return status; iattr->ia_valid |= ATTR_UID; } if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { - u32 length; - - if (xdr_stream_decode_u32(argp->xdr, &length) < 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, length); - if (!p) - return nfserr_bad_xdr; - status = nfsd_map_name_to_gid(argp->rqstp, (char *)p, length, - &iattr->ia_gid); - if (status) + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); + READ_BUF(dummy32); + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) return status; iattr->ia_valid |= ATTR_GID; } if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { - u32 set_it; - - if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0) - return nfserr_bad_xdr; - switch (set_it) { + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); + switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: - status = nfsd4_decode_nfstime4(argp, &iattr->ia_atime); + len += 12; + status = nfsd4_decode_time(argp, &iattr->ia_atime); if (status) return status; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -469,26 +478,17 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, iattr->ia_valid |= ATTR_ATIME; break; default: - return nfserr_bad_xdr; + goto xdr_error; } } - if (bmval[1] & FATTR4_WORD1_TIME_CREATE) { - struct timespec64 ts; - - /* No Linux filesystem supports setting this attribute. */ - bmval[1] &= ~FATTR4_WORD1_TIME_CREATE; - status = nfsd4_decode_nfstime4(argp, &ts); - if (status) - return status; - } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { - u32 set_it; - - if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0) - return nfserr_bad_xdr; - switch (set_it) { + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); + switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: - status = nfsd4_decode_nfstime4(argp, &iattr->ia_mtime); + len += 12; + status = nfsd4_decode_time(argp, &iattr->ia_mtime); if (status) return status; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -497,335 +497,222 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, iattr->ia_valid |= ATTR_MTIME; break; default: - return nfserr_bad_xdr; + goto xdr_error; } } + label->len = 0; if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) && bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { - status = nfsd4_decode_security_label(argp, label); - if (status) - return status; + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ + READ_BUF(4); + len += 4; + dummy32 = be32_to_cpup(p++); + READ_BUF(dummy32); + if (dummy32 > NFS4_MAXLABELLEN) + return nfserr_badlabel; + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + label->len = dummy32; + label->data = svcxdr_dupstr(argp, buf, dummy32); + if (!label->data) + return nfserr_jukebox; } if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { - u32 mode, mask; - if (!umask) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &mode) < 0) - return nfserr_bad_xdr; - iattr->ia_mode = mode & (S_IFMT | S_IALLUGO); - if (xdr_stream_decode_u32(argp->xdr, &mask) < 0) - return nfserr_bad_xdr; - *umask = mask & S_IRWXUGO; + goto xdr_error; + READ_BUF(8); + len += 8; + dummy32 = be32_to_cpup(p++); + iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO); + dummy32 = be32_to_cpup(p++); + *umask = dummy32 & S_IRWXUGO; iattr->ia_valid |= ATTR_MODE; } + if (len != expected_len) + goto xdr_error; - /* request sanity: did attrlist4 contain the expected number of words? */ - if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos) - return nfserr_bad_xdr; - - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_stateid4(struct nfsd4_compoundargs *argp, stateid_t *sid) +nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) { - __be32 *p; + DECODE_HEAD; - p = xdr_inline_decode(argp->xdr, NFS4_STATEID_SIZE); - if (!p) - return nfserr_bad_xdr; + READ_BUF(sizeof(stateid_t)); sid->si_generation = be32_to_cpup(p++); - memcpy(&sid->si_opaque, p, sizeof(sid->si_opaque)); - return nfs_ok; + COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; } static __be32 -nfsd4_decode_clientid4(struct nfsd4_compoundargs *argp, clientid_t *clientid) +nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) { - __be32 *p; + DECODE_HEAD; - p = xdr_inline_decode(argp->xdr, sizeof(__be64)); - if (!p) - return nfserr_bad_xdr; - memcpy(clientid, p, sizeof(*clientid)); - return nfs_ok; + READ_BUF(4); + access->ac_req_access = be32_to_cpup(p++); + + DECODE_TAIL; } -static __be32 -nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp, - clientid_t *clientid, struct xdr_netobj *owner) +static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) { - __be32 status; - - status = nfsd4_decode_clientid4(argp, clientid); - if (status) - return status; - return nfsd4_decode_opaque(argp, owner); -} - -#ifdef CONFIG_NFSD_PNFS -static __be32 -nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp, - struct nfsd4_deviceid *devid) -{ - __be32 *p; - - p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE); - if (!p) - return nfserr_bad_xdr; - memcpy(devid, p, sizeof(*devid)); - return nfs_ok; -} - -static __be32 -nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutcommit *lcp) -{ - if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0) - return nfserr_bad_xdr; - if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES) - return nfserr_bad_xdr; - if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX) - return nfserr_bad_xdr; - - if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0) - return nfserr_bad_xdr; - if (lcp->lc_up_len > 0) { - lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len); - if (!lcp->lc_up_layout) - return nfserr_bad_xdr; - } - - return nfs_ok; -} - -static __be32 -nfsd4_decode_layoutreturn4(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutreturn *lrp) -{ - __be32 status; - - if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_return_type) < 0) - return nfserr_bad_xdr; - switch (lrp->lr_return_type) { - case RETURN_FILE: - if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.length) < 0) - return nfserr_bad_xdr; - status = nfsd4_decode_stateid4(argp, &lrp->lr_sid); - if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &lrp->lrf_body_len) < 0) - return nfserr_bad_xdr; - if (lrp->lrf_body_len > 0) { - lrp->lrf_body = xdr_inline_decode(argp->xdr, lrp->lrf_body_len); - if (!lrp->lrf_body) - return nfserr_bad_xdr; - } - break; - case RETURN_FSID: - case RETURN_ALL: - lrp->lr_seg.offset = 0; - lrp->lr_seg.length = NFS4_MAX_UINT64; - break; - default: - return nfserr_bad_xdr; - } - - return nfs_ok; -} - -#endif /* CONFIG_NFSD_PNFS */ - -static __be32 -nfsd4_decode_sessionid4(struct nfsd4_compoundargs *argp, - struct nfs4_sessionid *sessionid) -{ - __be32 *p; - - p = xdr_inline_decode(argp->xdr, NFS4_MAX_SESSIONID_LEN); - if (!p) - return nfserr_bad_xdr; - memcpy(sessionid->data, p, sizeof(sessionid->data)); - return nfs_ok; -} - -/* Defined in Appendix A of RFC 5531 */ -static __be32 -nfsd4_decode_authsys_parms(struct nfsd4_compoundargs *argp, - struct nfsd4_cb_sec *cbs) -{ - u32 stamp, gidcount, uid, gid; - __be32 *p, status; - - if (xdr_stream_decode_u32(argp->xdr, &stamp) < 0) - return nfserr_bad_xdr; - /* machine name */ - status = nfsd4_decode_ignored_string(argp, 255); - if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &uid) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &gid) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &gidcount) < 0) - return nfserr_bad_xdr; - if (gidcount > 16) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, gidcount << 2); - if (!p) - return nfserr_bad_xdr; - if (cbs->flavor == (u32)(-1)) { - struct user_namespace *userns = nfsd_user_namespace(argp->rqstp); - - kuid_t kuid = make_kuid(userns, uid); - kgid_t kgid = make_kgid(userns, gid); - if (uid_valid(kuid) && gid_valid(kgid)) { - cbs->uid = kuid; - cbs->gid = kgid; - cbs->flavor = RPC_AUTH_UNIX; - } else { - dprintk("RPC_AUTH_UNIX with invalid uid or gid, ignoring!\n"); - } - } - - return nfs_ok; -} - -static __be32 -nfsd4_decode_gss_cb_handles4(struct nfsd4_compoundargs *argp, - struct nfsd4_cb_sec *cbs) -{ - __be32 status; - u32 service; - - dprintk("RPC_AUTH_GSS callback secflavor not supported!\n"); - - if (xdr_stream_decode_u32(argp->xdr, &service) < 0) - return nfserr_bad_xdr; - if (service < RPC_GSS_SVC_NONE || service > RPC_GSS_SVC_PRIVACY) - return nfserr_bad_xdr; - /* gcbp_handle_from_server */ - status = nfsd4_decode_ignored_string(argp, 0); - if (status) - return status; - /* gcbp_handle_from_client */ - status = nfsd4_decode_ignored_string(argp, 0); - if (status) - return status; - - return nfs_ok; -} - -/* a counted array of callback_sec_parms4 items */ -static __be32 -nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) -{ - u32 i, secflavor, nr_secflavs; - __be32 status; + DECODE_HEAD; + struct user_namespace *userns = nfsd_user_namespace(argp->rqstp); + u32 dummy, uid, gid; + char *machine_name; + int i; + int nr_secflavs; /* callback_sec_params4 */ - if (xdr_stream_decode_u32(argp->xdr, &nr_secflavs) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + nr_secflavs = be32_to_cpup(p++); if (nr_secflavs) cbs->flavor = (u32)(-1); else /* Is this legal? Be generous, take it to mean AUTH_NONE: */ cbs->flavor = 0; - for (i = 0; i < nr_secflavs; ++i) { - if (xdr_stream_decode_u32(argp->xdr, &secflavor) < 0) - return nfserr_bad_xdr; - switch (secflavor) { + READ_BUF(4); + dummy = be32_to_cpup(p++); + switch (dummy) { case RPC_AUTH_NULL: - /* void */ + /* Nothing to read */ if (cbs->flavor == (u32)(-1)) cbs->flavor = RPC_AUTH_NULL; break; case RPC_AUTH_UNIX: - status = nfsd4_decode_authsys_parms(argp, cbs); - if (status) - return status; + READ_BUF(8); + /* stamp */ + dummy = be32_to_cpup(p++); + + /* machine name */ + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + SAVEMEM(machine_name, dummy); + + /* uid, gid */ + READ_BUF(8); + uid = be32_to_cpup(p++); + gid = be32_to_cpup(p++); + + /* more gids */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + if (cbs->flavor == (u32)(-1)) { + kuid_t kuid = make_kuid(userns, uid); + kgid_t kgid = make_kgid(userns, gid); + if (uid_valid(kuid) && gid_valid(kgid)) { + cbs->uid = kuid; + cbs->gid = kgid; + cbs->flavor = RPC_AUTH_UNIX; + } else { + dprintk("RPC_AUTH_UNIX with invalid" + "uid or gid ignoring!\n"); + } + } break; case RPC_AUTH_GSS: - status = nfsd4_decode_gss_cb_handles4(argp, cbs); - if (status) - return status; + dprintk("RPC_AUTH_GSS callback secflavor " + "not supported!\n"); + READ_BUF(8); + /* gcbp_service */ + dummy = be32_to_cpup(p++); + /* gcbp_handle_from_server */ + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + /* gcbp_handle_from_client */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); break; default: + dprintk("Illegal callback secflavor\n"); return nfserr_inval; } } - - return nfs_ok; + DECODE_TAIL; } - -/* - * NFSv4 operation argument decoders - */ - -static __be32 -nfsd4_decode_access(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) { - struct nfsd4_access *access = &u->access; - if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) - return nfserr_bad_xdr; - return nfs_ok; + DECODE_HEAD; + + READ_BUF(4); + bc->bc_cb_program = be32_to_cpup(p++); + nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +{ + DECODE_HEAD; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); + COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); + bcts->dir = be32_to_cpup(p++); + /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker + * could help us figure out we should be using it. */ + DECODE_TAIL; } static __be32 -nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) { - struct nfsd4_close *close = &u->close; - if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) - return nfserr_bad_xdr; - return nfsd4_decode_stateid4(argp, &close->cl_stateid); + DECODE_HEAD; + + READ_BUF(4); + close->cl_seqid = be32_to_cpup(p++); + return nfsd4_decode_stateid(argp, &close->cl_stateid); + + DECODE_TAIL; } static __be32 -nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) { - struct nfsd4_commit *commit = &u->commit; - if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) - return nfserr_bad_xdr; - memset(&commit->co_verf, 0, sizeof(commit->co_verf)); - return nfs_ok; + DECODE_HEAD; + + READ_BUF(12); + p = xdr_decode_hyper(p, &commit->co_offset); + commit->co_count = be32_to_cpup(p++); + + DECODE_TAIL; } static __be32 -nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) { - struct nfsd4_create *create = &u->create; - __be32 *p, status; + DECODE_HEAD; - memset(create, 0, sizeof(*create)); - if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + create->cr_type = be32_to_cpup(p++); switch (create->cr_type) { case NF4LNK: - if (xdr_stream_decode_u32(argp->xdr, &create->cr_datalen) < 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, create->cr_datalen); - if (!p) - return nfserr_bad_xdr; + READ_BUF(4); + create->cr_datalen = be32_to_cpup(p++); + READ_BUF(create->cr_datalen); create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen); if (!create->cr_data) return nfserr_jukebox; break; case NF4BLK: case NF4CHR: - if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata1) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata2) < 0) - return nfserr_bad_xdr; + READ_BUF(8); + create->cr_specdata1 = be32_to_cpup(p++); + create->cr_specdata2 = be32_to_cpup(p++); break; case NF4SOCK: case NF4FIFO: @@ -833,221 +720,151 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) default: break; } - status = nfsd4_decode_component4(argp, &create->cr_name, - &create->cr_namelen); - if (status) - return status; - status = nfsd4_decode_fattr4(argp, create->cr_bmval, - ARRAY_SIZE(create->cr_bmval), - &create->cr_iattr, &create->cr_acl, - &create->cr_label, &create->cr_umask); - if (status) + + READ_BUF(4); + create->cr_namelen = be32_to_cpup(p++); + READ_BUF(create->cr_namelen); + SAVEMEM(create->cr_name, create->cr_namelen); + if ((status = check_filename(create->cr_name, create->cr_namelen))) return status; - return nfs_ok; + status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, + &create->cr_acl, &create->cr_label, + &create->cr_umask); + if (status) + goto out; + + DECODE_TAIL; } static inline __be32 -nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) { - struct nfsd4_delegreturn *dr = &u->delegreturn; - return nfsd4_decode_stateid4(argp, &dr->dr_stateid); + return nfsd4_decode_stateid(argp, &dr->dr_stateid); } static inline __be32 -nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) { - struct nfsd4_getattr *getattr = &u->getattr; - memset(getattr, 0, sizeof(*getattr)); - return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, - ARRAY_SIZE(getattr->ga_bmval)); + return nfsd4_decode_bitmap(argp, getattr->ga_bmval); } static __be32 -nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) { - struct nfsd4_link *link = &u->link; - memset(link, 0, sizeof(*link)); - return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); -} + DECODE_HEAD; -static __be32 -nfsd4_decode_open_to_lock_owner4(struct nfsd4_compoundargs *argp, - struct nfsd4_lock *lock) -{ - __be32 status; - - if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_open_seqid) < 0) - return nfserr_bad_xdr; - status = nfsd4_decode_stateid4(argp, &lock->lk_new_open_stateid); - if (status) + READ_BUF(4); + link->li_namelen = be32_to_cpup(p++); + READ_BUF(link->li_namelen); + SAVEMEM(link->li_name, link->li_namelen); + if ((status = check_filename(link->li_name, link->li_namelen))) return status; - if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_lock_seqid) < 0) - return nfserr_bad_xdr; - return nfsd4_decode_state_owner4(argp, &lock->lk_new_clientid, - &lock->lk_new_owner); + + DECODE_TAIL; } static __be32 -nfsd4_decode_exist_lock_owner4(struct nfsd4_compoundargs *argp, - struct nfsd4_lock *lock) +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) { - __be32 status; + DECODE_HEAD; - status = nfsd4_decode_stateid4(argp, &lock->lk_old_lock_stateid); - if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &lock->lk_old_lock_seqid) < 0) - return nfserr_bad_xdr; - - return nfs_ok; -} - -static __be32 -nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) -{ - if (xdr_stream_decode_bool(argp->xdr, &lock->lk_is_new) < 0) - return nfserr_bad_xdr; - if (lock->lk_is_new) - return nfsd4_decode_open_to_lock_owner4(argp, lock); - return nfsd4_decode_exist_lock_owner4(argp, lock); -} - -static __be32 -nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) -{ - struct nfsd4_lock *lock = &u->lock; - memset(lock, 0, sizeof(*lock)); - if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) - return nfserr_bad_xdr; + /* + * type, reclaim(boolean), offset, length, new_lock_owner(boolean) + */ + READ_BUF(28); + lock->lk_type = be32_to_cpup(p++); if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) - return nfserr_bad_xdr; - if (xdr_stream_decode_bool(argp->xdr, &lock->lk_reclaim) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lock->lk_offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lock->lk_length) < 0) - return nfserr_bad_xdr; - return nfsd4_decode_locker4(argp, lock); + goto xdr_error; + lock->lk_reclaim = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &lock->lk_offset); + p = xdr_decode_hyper(p, &lock->lk_length); + lock->lk_is_new = be32_to_cpup(p++); + + if (lock->lk_is_new) { + READ_BUF(4); + lock->lk_new_open_seqid = be32_to_cpup(p++); + status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); + if (status) + return status; + READ_BUF(8 + sizeof(clientid_t)); + lock->lk_new_lock_seqid = be32_to_cpup(p++); + COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); + lock->lk_new_owner.len = be32_to_cpup(p++); + READ_BUF(lock->lk_new_owner.len); + READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); + } else { + status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); + if (status) + return status; + READ_BUF(4); + lock->lk_old_lock_seqid = be32_to_cpup(p++); + } + + DECODE_TAIL; } static __be32 -nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) { - struct nfsd4_lockt *lockt = &u->lockt; - memset(lockt, 0, sizeof(*lockt)); - if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) - return nfserr_bad_xdr; - if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_length) < 0) - return nfserr_bad_xdr; - return nfsd4_decode_state_owner4(argp, &lockt->lt_clientid, - &lockt->lt_owner); + DECODE_HEAD; + + READ_BUF(32); + lockt->lt_type = be32_to_cpup(p++); + if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) + goto xdr_error; + p = xdr_decode_hyper(p, &lockt->lt_offset); + p = xdr_decode_hyper(p, &lockt->lt_length); + COPYMEM(&lockt->lt_clientid, 8); + lockt->lt_owner.len = be32_to_cpup(p++); + READ_BUF(lockt->lt_owner.len); + READMEM(lockt->lt_owner.data, lockt->lt_owner.len); + + DECODE_TAIL; } static __be32 -nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) { - struct nfsd4_locku *locku = &u->locku; - __be32 status; + DECODE_HEAD; - if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) - return nfserr_bad_xdr; + READ_BUF(8); + locku->lu_type = be32_to_cpup(p++); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &locku->lu_seqid) < 0) - return nfserr_bad_xdr; - status = nfsd4_decode_stateid4(argp, &locku->lu_stateid); + goto xdr_error; + locku->lu_seqid = be32_to_cpup(p++); + status = nfsd4_decode_stateid(argp, &locku->lu_stateid); if (status) return status; - if (xdr_stream_decode_u64(argp->xdr, &locku->lu_offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &locku->lu_length) < 0) - return nfserr_bad_xdr; + READ_BUF(16); + p = xdr_decode_hyper(p, &locku->lu_offset); + p = xdr_decode_hyper(p, &locku->lu_length); - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) { - struct nfsd4_lookup *lookup = &u->lookup; - return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); -} + DECODE_HEAD; -static __be32 -nfsd4_decode_createhow4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) -{ - __be32 status; + READ_BUF(4); + lookup->lo_len = be32_to_cpup(p++); + READ_BUF(lookup->lo_len); + SAVEMEM(lookup->lo_name, lookup->lo_len); + if ((status = check_filename(lookup->lo_name, lookup->lo_len))) + return status; - if (xdr_stream_decode_u32(argp->xdr, &open->op_createmode) < 0) - return nfserr_bad_xdr; - switch (open->op_createmode) { - case NFS4_CREATE_UNCHECKED: - case NFS4_CREATE_GUARDED: - status = nfsd4_decode_fattr4(argp, open->op_bmval, - ARRAY_SIZE(open->op_bmval), - &open->op_iattr, &open->op_acl, - &open->op_label, &open->op_umask); - if (status) - return status; - break; - case NFS4_CREATE_EXCLUSIVE: - status = nfsd4_decode_verifier4(argp, &open->op_verf); - if (status) - return status; - break; - case NFS4_CREATE_EXCLUSIVE4_1: - if (argp->minorversion < 1) - return nfserr_bad_xdr; - status = nfsd4_decode_verifier4(argp, &open->op_verf); - if (status) - return status; - status = nfsd4_decode_fattr4(argp, open->op_bmval, - ARRAY_SIZE(open->op_bmval), - &open->op_iattr, &open->op_acl, - &open->op_label, &open->op_umask); - if (status) - return status; - break; - default: - return nfserr_bad_xdr; - } - - return nfs_ok; -} - -static __be32 -nfsd4_decode_openflag4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) -{ - __be32 status; - - if (xdr_stream_decode_u32(argp->xdr, &open->op_create) < 0) - return nfserr_bad_xdr; - switch (open->op_create) { - case NFS4_OPEN_NOCREATE: - break; - case NFS4_OPEN_CREATE: - status = nfsd4_decode_createhow4(argp, open); - if (status) - return status; - break; - default: - return nfserr_bad_xdr; - } - - return nfs_ok; + DECODE_TAIL; } static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) { + __be32 *p; u32 w; - if (xdr_stream_decode_u32(argp->xdr, &w) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + w = be32_to_cpup(p++); *share_access = w & NFS4_SHARE_ACCESS_MASK; *deleg_want = w & NFS4_SHARE_WANT_MASK; if (deleg_when) @@ -1090,163 +907,210 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED): return nfs_ok; } +xdr_error: return nfserr_bad_xdr; } static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) { - if (xdr_stream_decode_u32(argp->xdr, x) < 0) - return nfserr_bad_xdr; - /* Note: unlike access bits, deny bits may be zero. */ + __be32 *p; + + READ_BUF(4); + *x = be32_to_cpup(p++); + /* Note: unlinke access bits, deny bits may be zero. */ if (*x & ~NFS4_SHARE_DENY_BOTH) return nfserr_bad_xdr; - return nfs_ok; +xdr_error: + return nfserr_bad_xdr; +} + +static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) +{ + __be32 *p; + + READ_BUF(4); + o->len = be32_to_cpup(p++); + + if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) + return nfserr_bad_xdr; + + READ_BUF(o->len); + SAVEMEM(o->data, o->len); + return nfs_ok; +xdr_error: + return nfserr_bad_xdr; } static __be32 -nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, - struct nfsd4_open *open) +nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) { - __be32 status; + DECODE_HEAD; + u32 dummy; - if (xdr_stream_decode_u32(argp->xdr, &open->op_claim_type) < 0) - return nfserr_bad_xdr; + memset(open->op_bmval, 0, sizeof(open->op_bmval)); + open->op_iattr.ia_valid = 0; + open->op_openowner = NULL; + + open->op_xdr_error = 0; + /* seqid, share_access, share_deny, clientid, ownerlen */ + READ_BUF(4); + open->op_seqid = be32_to_cpup(p++); + /* decode, yet ignore deleg_when until supported */ + status = nfsd4_decode_share_access(argp, &open->op_share_access, + &open->op_deleg_want, &dummy); + if (status) + goto xdr_error; + status = nfsd4_decode_share_deny(argp, &open->op_share_deny); + if (status) + goto xdr_error; + READ_BUF(sizeof(clientid_t)); + COPYMEM(&open->op_clientid, sizeof(clientid_t)); + status = nfsd4_decode_opaque(argp, &open->op_owner); + if (status) + goto xdr_error; + READ_BUF(4); + open->op_create = be32_to_cpup(p++); + switch (open->op_create) { + case NFS4_OPEN_NOCREATE: + break; + case NFS4_OPEN_CREATE: + READ_BUF(4); + open->op_createmode = be32_to_cpup(p++); + switch (open->op_createmode) { + case NFS4_CREATE_UNCHECKED: + case NFS4_CREATE_GUARDED: + status = nfsd4_decode_fattr(argp, open->op_bmval, + &open->op_iattr, &open->op_acl, &open->op_label, + &open->op_umask); + if (status) + goto out; + break; + case NFS4_CREATE_EXCLUSIVE: + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); + break; + case NFS4_CREATE_EXCLUSIVE4_1: + if (argp->minorversion < 1) + goto xdr_error; + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); + status = nfsd4_decode_fattr(argp, open->op_bmval, + &open->op_iattr, &open->op_acl, &open->op_label, + &open->op_umask); + if (status) + goto out; + break; + default: + goto xdr_error; + } + break; + default: + goto xdr_error; + } + + /* open_claim */ + READ_BUF(4); + open->op_claim_type = be32_to_cpup(p++); switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - status = nfsd4_decode_component4(argp, &open->op_fname, - &open->op_fnamelen); - if (status) + READ_BUF(4); + open->op_fname.len = be32_to_cpup(p++); + READ_BUF(open->op_fname.len); + SAVEMEM(open->op_fname.data, open->op_fname.len); + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_PREVIOUS: - if (xdr_stream_decode_u32(argp->xdr, &open->op_delegate_type) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + open->op_delegate_type = be32_to_cpup(p++); break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: - status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid); + status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); if (status) return status; - status = nfsd4_decode_component4(argp, &open->op_fname, - &open->op_fnamelen); - if (status) + READ_BUF(4); + open->op_fname.len = be32_to_cpup(p++); + READ_BUF(open->op_fname.len); + SAVEMEM(open->op_fname.data, open->op_fname.len); + if ((status = check_filename(open->op_fname.data, open->op_fname.len))) return status; break; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_PREV_FH: if (argp->minorversion < 1) - return nfserr_bad_xdr; + goto xdr_error; /* void */ break; case NFS4_OPEN_CLAIM_DELEG_CUR_FH: if (argp->minorversion < 1) - return nfserr_bad_xdr; - status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid); + goto xdr_error; + status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); if (status) return status; break; default: - return nfserr_bad_xdr; + goto xdr_error; } - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) { - struct nfsd4_open *open = &u->open; - __be32 status; - u32 dummy; - - memset(open, 0, sizeof(*open)); - - if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0) - return nfserr_bad_xdr; - /* deleg_want is ignored */ - status = nfsd4_decode_share_access(argp, &open->op_share_access, - &open->op_deleg_want, &dummy); - if (status) - return status; - status = nfsd4_decode_share_deny(argp, &open->op_share_deny); - if (status) - return status; - status = nfsd4_decode_state_owner4(argp, &open->op_clientid, - &open->op_owner); - if (status) - return status; - status = nfsd4_decode_openflag4(argp, open); - if (status) - return status; - return nfsd4_decode_open_claim4(argp, open); -} - -static __be32 -nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_open_confirm *open_conf = &u->open_confirm; - __be32 status; + DECODE_HEAD; if (argp->minorversion >= 1) return nfserr_notsupp; - status = nfsd4_decode_stateid4(argp, &open_conf->oc_req_stateid); + status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); if (status) return status; - if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + open_conf->oc_seqid = be32_to_cpup(p++); - memset(&open_conf->oc_resp_stateid, 0, - sizeof(open_conf->oc_resp_stateid)); - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) +nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) { - struct nfsd4_open_downgrade *open_down = &u->open_downgrade; - __be32 status; - - memset(open_down, 0, sizeof(*open_down)); - status = nfsd4_decode_stateid4(argp, &open_down->od_stateid); + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &open_down->od_stateid); if (status) return status; - if (xdr_stream_decode_u32(argp->xdr, &open_down->od_seqid) < 0) - return nfserr_bad_xdr; - /* deleg_want is ignored */ + READ_BUF(4); + open_down->od_seqid = be32_to_cpup(p++); status = nfsd4_decode_share_access(argp, &open_down->od_share_access, &open_down->od_deleg_want, NULL); if (status) return status; - return nfsd4_decode_share_deny(argp, &open_down->od_share_deny); + status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); + if (status) + return status; + DECODE_TAIL; } static __be32 -nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) { - struct nfsd4_putfh *putfh = &u->putfh; - __be32 *p; + DECODE_HEAD; - if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + putfh->pf_fhlen = be32_to_cpup(p++); if (putfh->pf_fhlen > NFS4_FHSIZE) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen); - if (!p) - return nfserr_bad_xdr; - putfh->pf_fhval = svcxdr_savemem(argp, p, putfh->pf_fhlen); - if (!putfh->pf_fhval) - return nfserr_jukebox; + goto xdr_error; + READ_BUF(putfh->pf_fhlen); + SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen); - putfh->no_verify = false; - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) +nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) { if (argp->minorversion == 0) return nfs_ok; @@ -1254,771 +1118,719 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) } static __be32 -nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { - struct nfsd4_read *read = &u->read; - __be32 status; + DECODE_HEAD; - memset(read, 0, sizeof(*read)); - status = nfsd4_decode_stateid4(argp, &read->rd_stateid); + status = nfsd4_decode_stateid(argp, &read->rd_stateid); if (status) return status; - if (xdr_stream_decode_u64(argp->xdr, &read->rd_offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &read->rd_length) < 0) - return nfserr_bad_xdr; + READ_BUF(12); + p = xdr_decode_hyper(p, &read->rd_offset); + read->rd_length = be32_to_cpup(p++); - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) { - struct nfsd4_readdir *readdir = &u->readdir; - __be32 status; + DECODE_HEAD; - memset(readdir, 0, sizeof(*readdir)); - if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0) - return nfserr_bad_xdr; - status = nfsd4_decode_verifier4(argp, &readdir->rd_verf); - if (status) + READ_BUF(24); + p = xdr_decode_hyper(p, &readdir->rd_cookie); + COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); + readdir->rd_dircount = be32_to_cpup(p++); + readdir->rd_maxcount = be32_to_cpup(p++); + if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) + goto out; + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) +{ + DECODE_HEAD; + + READ_BUF(4); + remove->rm_namelen = be32_to_cpup(p++); + READ_BUF(remove->rm_namelen); + SAVEMEM(remove->rm_name, remove->rm_namelen); + if ((status = check_filename(remove->rm_name, remove->rm_namelen))) return status; - if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_dircount) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_maxcount) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_uint32_array(argp->xdr, readdir->rd_bmval, - ARRAY_SIZE(readdir->rd_bmval)) < 0) - return nfserr_bad_xdr; - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) { - struct nfsd4_remove *remove = &u->remove; - memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); - return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); -} + DECODE_HEAD; -static __be32 -nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) -{ - struct nfsd4_rename *rename = &u->rename; - __be32 status; - - memset(rename, 0, sizeof(*rename)); - status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen); - if (status) + READ_BUF(4); + rename->rn_snamelen = be32_to_cpup(p++); + READ_BUF(rename->rn_snamelen); + SAVEMEM(rename->rn_sname, rename->rn_snamelen); + READ_BUF(4); + rename->rn_tnamelen = be32_to_cpup(p++); + READ_BUF(rename->rn_tnamelen); + SAVEMEM(rename->rn_tname, rename->rn_tnamelen); + if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) return status; - return nfsd4_decode_component4(argp, &rename->rn_tname, &rename->rn_tnamelen); + if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen))) + return status; + + DECODE_TAIL; } static __be32 -nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) { - clientid_t *clientid = &u->renew; - return nfsd4_decode_clientid4(argp, clientid); + DECODE_HEAD; + + if (argp->minorversion >= 1) + return nfserr_notsupp; + + READ_BUF(sizeof(clientid_t)); + COPYMEM(clientid, sizeof(clientid_t)); + + DECODE_TAIL; } static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_secinfo *secinfo) { - struct nfsd4_secinfo *secinfo = &u->secinfo; - secinfo->si_exp = NULL; - return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); -} + DECODE_HEAD; -static __be32 -nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) -{ - struct nfsd4_setattr *setattr = &u->setattr; - __be32 status; - - memset(setattr, 0, sizeof(*setattr)); - status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid); + READ_BUF(4); + secinfo->si_namelen = be32_to_cpup(p++); + READ_BUF(secinfo->si_namelen); + SAVEMEM(secinfo->si_name, secinfo->si_namelen); + status = check_filename(secinfo->si_name, secinfo->si_namelen); if (status) return status; - return nfsd4_decode_fattr4(argp, setattr->sa_bmval, - ARRAY_SIZE(setattr->sa_bmval), - &setattr->sa_iattr, &setattr->sa_acl, - &setattr->sa_label, NULL); + DECODE_TAIL; } static __be32 -nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, + struct nfsd4_secinfo_no_name *sin) { - struct nfsd4_setclientid *setclientid = &u->setclientid; - __be32 *p, status; + DECODE_HEAD; - memset(setclientid, 0, sizeof(*setclientid)); + READ_BUF(4); + sin->sin_style = be32_to_cpup(p++); + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +{ + __be32 status; + + status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); + if (status) + return status; + return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, + &setattr->sa_acl, &setattr->sa_label, NULL); +} + +static __be32 +nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) +{ + DECODE_HEAD; if (argp->minorversion >= 1) return nfserr_notsupp; - status = nfsd4_decode_verifier4(argp, &setclientid->se_verf); - if (status) - return status; + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); + status = nfsd4_decode_opaque(argp, &setclientid->se_name); if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_prog) < 0) return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_netid_len) < 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len); - if (!p) - return nfserr_bad_xdr; - setclientid->se_callback_netid_val = svcxdr_savemem(argp, p, - setclientid->se_callback_netid_len); - if (!setclientid->se_callback_netid_val) - return nfserr_jukebox; + READ_BUF(8); + setclientid->se_callback_prog = be32_to_cpup(p++); + setclientid->se_callback_netid_len = be32_to_cpup(p++); + READ_BUF(setclientid->se_callback_netid_len); + SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); + READ_BUF(4); + setclientid->se_callback_addr_len = be32_to_cpup(p++); - if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len); - if (!p) - return nfserr_bad_xdr; - setclientid->se_callback_addr_val = svcxdr_savemem(argp, p, - setclientid->se_callback_addr_len); - if (!setclientid->se_callback_addr_val) - return nfserr_jukebox; - if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0) - return nfserr_bad_xdr; + READ_BUF(setclientid->se_callback_addr_len); + SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); + READ_BUF(4); + setclientid->se_callback_ident = be32_to_cpup(p++); - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) +nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) { - struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm; - __be32 status; + DECODE_HEAD; if (argp->minorversion >= 1) return nfserr_notsupp; - status = nfsd4_decode_clientid4(argp, &scd_c->sc_clientid); - if (status) - return status; - return nfsd4_decode_verifier4(argp, &scd_c->sc_confirm); + READ_BUF(8 + NFS4_VERIFIER_SIZE); + COPYMEM(&scd_c->sc_clientid, 8); + COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); + + DECODE_TAIL; } /* Also used for NVERIFY */ static __be32 -nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) { - struct nfsd4_verify *verify = &u->verify; - __be32 *p, status; + DECODE_HEAD; - memset(verify, 0, sizeof(*verify)); - - status = nfsd4_decode_bitmap4(argp, verify->ve_bmval, - ARRAY_SIZE(verify->ve_bmval)); - if (status) - return status; + if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) + goto out; /* For convenience's sake, we compare raw xdr'd attributes in * nfsd4_proc_verify */ - if (xdr_stream_decode_u32(argp->xdr, &verify->ve_attrlen) < 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, verify->ve_attrlen); - if (!p) - return nfserr_bad_xdr; - verify->ve_attrval = svcxdr_savemem(argp, p, verify->ve_attrlen); - if (!verify->ve_attrval) - return nfserr_jukebox; + READ_BUF(4); + verify->ve_attrlen = be32_to_cpup(p++); + READ_BUF(verify->ve_attrlen); + SAVEMEM(verify->ve_attrval, verify->ve_attrlen); - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) { - struct nfsd4_write *write = &u->write; - __be32 status; + DECODE_HEAD; - status = nfsd4_decode_stateid4(argp, &write->wr_stateid); + status = nfsd4_decode_stateid(argp, &write->wr_stateid); if (status) return status; - if (xdr_stream_decode_u64(argp->xdr, &write->wr_offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &write->wr_stable_how) < 0) - return nfserr_bad_xdr; + READ_BUF(16); + p = xdr_decode_hyper(p, &write->wr_offset); + write->wr_stable_how = be32_to_cpup(p++); if (write->wr_stable_how > NFS_FILE_SYNC) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &write->wr_buflen) < 0) - return nfserr_bad_xdr; - if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen)) - return nfserr_bad_xdr; + goto xdr_error; + write->wr_buflen = be32_to_cpup(p++); - write->wr_bytes_written = 0; - write->wr_how_written = 0; - memset(&write->wr_verifier, 0, sizeof(write->wr_verifier)); - return nfs_ok; + status = svcxdr_construct_vector(argp, &write->wr_head, + &write->wr_pagelist, write->wr_buflen); + if (status) + return status; + + DECODE_TAIL; } static __be32 -nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) { - struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; - __be32 status; + DECODE_HEAD; if (argp->minorversion >= 1) return nfserr_notsupp; - status = nfsd4_decode_state_owner4(argp, &rlockowner->rl_clientid, - &rlockowner->rl_owner); - if (status) - return status; + READ_BUF(12); + COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); + rlockowner->rl_owner.len = be32_to_cpup(p++); + READ_BUF(rlockowner->rl_owner.len); + READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid)) return nfserr_inval; - - return nfs_ok; -} - -static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl; - memset(bc, 0, sizeof(*bc)); - if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) - return nfserr_bad_xdr; - return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); -} - -static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; - u32 use_conn_in_rdma_mode; - __be32 status; - - memset(bcts, 0, sizeof(*bcts)); - status = nfsd4_decode_sessionid4(argp, &bcts->sessionid); - if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &bcts->dir) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &use_conn_in_rdma_mode) < 0) - return nfserr_bad_xdr; - - return nfs_ok; -} - -static __be32 -nfsd4_decode_state_protect_ops(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) -{ - __be32 status; - - status = nfsd4_decode_bitmap4(argp, exid->spo_must_enforce, - ARRAY_SIZE(exid->spo_must_enforce)); - if (status) - return nfserr_bad_xdr; - status = nfsd4_decode_bitmap4(argp, exid->spo_must_allow, - ARRAY_SIZE(exid->spo_must_allow)); - if (status) - return nfserr_bad_xdr; - - return nfs_ok; -} - -/* - * This implementation currently does not support SP4_SSV. - * This decoder simply skips over these arguments. - */ -static noinline __be32 -nfsd4_decode_ssv_sp_parms(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) -{ - u32 count, window, num_gss_handles; - __be32 status; - - /* ssp_ops */ - status = nfsd4_decode_state_protect_ops(argp, exid); - if (status) - return status; - - /* ssp_hash_algs<> */ - if (xdr_stream_decode_u32(argp->xdr, &count) < 0) - return nfserr_bad_xdr; - while (count--) { - status = nfsd4_decode_ignored_string(argp, 0); - if (status) - return status; - } - - /* ssp_encr_algs<> */ - if (xdr_stream_decode_u32(argp->xdr, &count) < 0) - return nfserr_bad_xdr; - while (count--) { - status = nfsd4_decode_ignored_string(argp, 0); - if (status) - return status; - } - - if (xdr_stream_decode_u32(argp->xdr, &window) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &num_gss_handles) < 0) - return nfserr_bad_xdr; - - return nfs_ok; -} - -static __be32 -nfsd4_decode_state_protect4_a(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) -{ - __be32 status; - - if (xdr_stream_decode_u32(argp->xdr, &exid->spa_how) < 0) - return nfserr_bad_xdr; - switch (exid->spa_how) { - case SP4_NONE: - break; - case SP4_MACH_CRED: - status = nfsd4_decode_state_protect_ops(argp, exid); - if (status) - return status; - break; - case SP4_SSV: - status = nfsd4_decode_ssv_sp_parms(argp, exid); - if (status) - return status; - break; - default: - return nfserr_bad_xdr; - } - - return nfs_ok; -} - -static __be32 -nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) -{ - __be32 status; - u32 count; - - if (xdr_stream_decode_u32(argp->xdr, &count) < 0) - return nfserr_bad_xdr; - switch (count) { - case 0: - break; - case 1: - /* Note that RFC 8881 places no length limit on - * nii_domain, but this implementation permits no - * more than NFS4_OPAQUE_LIMIT bytes */ - status = nfsd4_decode_opaque(argp, &exid->nii_domain); - if (status) - return status; - /* Note that RFC 8881 places no length limit on - * nii_name, but this implementation permits no - * more than NFS4_OPAQUE_LIMIT bytes */ - status = nfsd4_decode_opaque(argp, &exid->nii_name); - if (status) - return status; - status = nfsd4_decode_nfstime4(argp, &exid->nii_time); - if (status) - return status; - break; - default: - return nfserr_bad_xdr; - } - - return nfs_ok; + DECODE_TAIL; } static __be32 nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_exchange_id *exid) { - struct nfsd4_exchange_id *exid = &u->exchange_id; - __be32 status; + int dummy, tmp; + DECODE_HEAD; + + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); - memset(exid, 0, sizeof(*exid)); - status = nfsd4_decode_verifier4(argp, &exid->verifier); - if (status) - return status; status = nfsd4_decode_opaque(argp, &exid->clname); if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &exid->flags) < 0) - return nfserr_bad_xdr; - status = nfsd4_decode_state_protect4_a(argp, exid); - if (status) - return status; - return nfsd4_decode_nfs_impl_id4(argp, exid); -} - -static __be32 -nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, - struct nfsd4_channel_attrs *ca) -{ - __be32 *p; - - p = xdr_inline_decode(argp->xdr, XDR_UNIT * 7); - if (!p) return nfserr_bad_xdr; - /* headerpadsz is ignored */ - p++; - ca->maxreq_sz = be32_to_cpup(p++); - ca->maxresp_sz = be32_to_cpup(p++); - ca->maxresp_cached = be32_to_cpup(p++); - ca->maxops = be32_to_cpup(p++); - ca->maxreqs = be32_to_cpup(p++); - ca->nr_rdma_attrs = be32_to_cpup(p); - switch (ca->nr_rdma_attrs) { - case 0: + READ_BUF(4); + exid->flags = be32_to_cpup(p++); + + /* Ignore state_protect4_a */ + READ_BUF(4); + exid->spa_how = be32_to_cpup(p++); + switch (exid->spa_how) { + case SP4_NONE: break; - case 1: - if (xdr_stream_decode_u32(argp->xdr, &ca->rdma_attrs) < 0) - return nfserr_bad_xdr; + case SP4_MACH_CRED: + /* spo_must_enforce */ + status = nfsd4_decode_bitmap(argp, + exid->spo_must_enforce); + if (status) + goto out; + /* spo_must_allow */ + status = nfsd4_decode_bitmap(argp, exid->spo_must_allow); + if (status) + goto out; + break; + case SP4_SSV: + /* ssp_ops */ + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + p += dummy; + + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy * 4); + p += dummy; + + /* ssp_hash_algs<> */ + READ_BUF(4); + tmp = be32_to_cpup(p++); + while (tmp--) { + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + } + + /* ssp_encr_algs<> */ + READ_BUF(4); + tmp = be32_to_cpup(p++); + while (tmp--) { + READ_BUF(4); + dummy = be32_to_cpup(p++); + READ_BUF(dummy); + p += XDR_QUADLEN(dummy); + } + + /* ignore ssp_window and ssp_num_gss_handles: */ + READ_BUF(8); break; default: - return nfserr_bad_xdr; + goto xdr_error; } - return nfs_ok; + READ_BUF(4); /* nfs_impl_id4 array length */ + dummy = be32_to_cpup(p++); + + if (dummy > 1) + goto xdr_error; + + if (dummy == 1) { + status = nfsd4_decode_opaque(argp, &exid->nii_domain); + if (status) + goto xdr_error; + + /* nii_name */ + status = nfsd4_decode_opaque(argp, &exid->nii_name); + if (status) + goto xdr_error; + + /* nii_date */ + status = nfsd4_decode_time(argp, &exid->nii_time); + if (status) + goto xdr_error; + } + DECODE_TAIL; } static __be32 nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_create_session *sess) { - struct nfsd4_create_session *sess = &u->create_session; - __be32 status; + DECODE_HEAD; - memset(sess, 0, sizeof(*sess)); - status = nfsd4_decode_clientid4(argp, &sess->clientid); - if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &sess->seqid) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &sess->flags) < 0) - return nfserr_bad_xdr; - status = nfsd4_decode_channel_attrs4(argp, &sess->fore_channel); - if (status) - return status; - status = nfsd4_decode_channel_attrs4(argp, &sess->back_channel); - if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0) - return nfserr_bad_xdr; - return nfsd4_decode_cb_sec(argp, &sess->cb_sec); + READ_BUF(16); + COPYMEM(&sess->clientid, 8); + sess->seqid = be32_to_cpup(p++); + sess->flags = be32_to_cpup(p++); + + /* Fore channel attrs */ + READ_BUF(28); + p++; /* headerpadsz is always 0 */ + sess->fore_channel.maxreq_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_sz = be32_to_cpup(p++); + sess->fore_channel.maxresp_cached = be32_to_cpup(p++); + sess->fore_channel.maxops = be32_to_cpup(p++); + sess->fore_channel.maxreqs = be32_to_cpup(p++); + sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++); + if (sess->fore_channel.nr_rdma_attrs == 1) { + READ_BUF(4); + sess->fore_channel.rdma_attrs = be32_to_cpup(p++); + } else if (sess->fore_channel.nr_rdma_attrs > 1) { + dprintk("Too many fore channel attr bitmaps!\n"); + goto xdr_error; + } + + /* Back channel attrs */ + READ_BUF(28); + p++; /* headerpadsz is always 0 */ + sess->back_channel.maxreq_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_sz = be32_to_cpup(p++); + sess->back_channel.maxresp_cached = be32_to_cpup(p++); + sess->back_channel.maxops = be32_to_cpup(p++); + sess->back_channel.maxreqs = be32_to_cpup(p++); + sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++); + if (sess->back_channel.nr_rdma_attrs == 1) { + READ_BUF(4); + sess->back_channel.rdma_attrs = be32_to_cpup(p++); + } else if (sess->back_channel.nr_rdma_attrs > 1) { + dprintk("Too many back channel attr bitmaps!\n"); + goto xdr_error; + } + + READ_BUF(4); + sess->callback_prog = be32_to_cpup(p++); + nfsd4_decode_cb_sec(argp, &sess->cb_sec); + DECODE_TAIL; } static __be32 nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_destroy_session *destroy_session) { - struct nfsd4_destroy_session *destroy_session = &u->destroy_session; - return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); + DECODE_HEAD; + READ_BUF(NFS4_MAX_SESSIONID_LEN); + COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN); + + DECODE_TAIL; } static __be32 nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_free_stateid *free_stateid) { - struct nfsd4_free_stateid *free_stateid = &u->free_stateid; - return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + free_stateid->fr_stateid.si_generation = be32_to_cpup(p++); + COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, + struct nfsd4_sequence *seq) +{ + DECODE_HEAD; + + READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); + COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); + seq->seqid = be32_to_cpup(p++); + seq->slotid = be32_to_cpup(p++); + seq->maxslots = be32_to_cpup(p++); + seq->cachethis = be32_to_cpup(p++); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +{ + int i; + __be32 *p, status; + struct nfsd4_test_stateid_id *stateid; + + READ_BUF(4); + test_stateid->ts_num_ids = ntohl(*p++); + + INIT_LIST_HEAD(&test_stateid->ts_stateid_list); + + for (i = 0; i < test_stateid->ts_num_ids; i++) { + stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); + if (!stateid) { + status = nfserrno(-ENOMEM); + goto out; + } + + INIT_LIST_HEAD(&stateid->ts_id_list); + list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); + + status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid); + if (status) + goto out; + } + + status = 0; +out: + return status; +xdr_error: + dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__); + status = nfserr_bad_xdr; + goto out; +} + +static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc) +{ + DECODE_HEAD; + + READ_BUF(8); + COPYMEM(&dc->clientid, 8); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) +{ + DECODE_HEAD; + + READ_BUF(4); + rc->rca_one_fs = be32_to_cpup(p++); + + DECODE_TAIL; } #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_getdeviceinfo *gdev) { - struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; - __be32 status; + DECODE_HEAD; + u32 num, i; - memset(gdev, 0, sizeof(*gdev)); - status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid); + READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4); + COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid)); + gdev->gd_layout_type = be32_to_cpup(p++); + gdev->gd_maxcount = be32_to_cpup(p++); + num = be32_to_cpup(p++); + if (num) { + if (num > 1000) + goto xdr_error; + READ_BUF(4 * num); + gdev->gd_notify_types = be32_to_cpup(p++); + for (i = 1; i < num; i++) { + if (be32_to_cpup(p++)) { + status = nfserr_inval; + goto out; + } + } + } + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutget *lgp) +{ + DECODE_HEAD; + + READ_BUF(36); + lgp->lg_signal = be32_to_cpup(p++); + lgp->lg_layout_type = be32_to_cpup(p++); + lgp->lg_seg.iomode = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &lgp->lg_seg.offset); + p = xdr_decode_hyper(p, &lgp->lg_seg.length); + p = xdr_decode_hyper(p, &lgp->lg_minlength); + + status = nfsd4_decode_stateid(argp, &lgp->lg_sid); if (status) return status; - if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_maxcount) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_uint32_array(argp->xdr, - &gdev->gd_notify_types, 1) < 0) - return nfserr_bad_xdr; - return nfs_ok; + READ_BUF(4); + lgp->lg_maxcount = be32_to_cpup(p++); + + DECODE_TAIL; } static __be32 nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_layoutcommit *lcp) { - struct nfsd4_layoutcommit *lcp = &u->layoutcommit; - __be32 *p, status; + DECODE_HEAD; + u32 timechange; - memset(lcp, 0, sizeof(*lcp)); - if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_bool(argp->xdr, &lcp->lc_reclaim) < 0) - return nfserr_bad_xdr; - status = nfsd4_decode_stateid4(argp, &lcp->lc_sid); + READ_BUF(20); + p = xdr_decode_hyper(p, &lcp->lc_seg.offset); + p = xdr_decode_hyper(p, &lcp->lc_seg.length); + lcp->lc_reclaim = be32_to_cpup(p++); + + status = nfsd4_decode_stateid(argp, &lcp->lc_sid); if (status) return status; - if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_newoffset) < 0) - return nfserr_bad_xdr; + + READ_BUF(4); + lcp->lc_newoffset = be32_to_cpup(p++); if (lcp->lc_newoffset) { - if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_last_wr) < 0) - return nfserr_bad_xdr; + READ_BUF(8); + p = xdr_decode_hyper(p, &lcp->lc_last_wr); } else lcp->lc_last_wr = 0; - p = xdr_inline_decode(argp->xdr, XDR_UNIT); - if (!p) - return nfserr_bad_xdr; - if (xdr_item_is_present(p)) { - status = nfsd4_decode_nfstime4(argp, &lcp->lc_mtime); + READ_BUF(4); + timechange = be32_to_cpup(p++); + if (timechange) { + status = nfsd4_decode_time(argp, &lcp->lc_mtime); if (status) return status; } else { lcp->lc_mtime.tv_nsec = UTIME_NOW; } - return nfsd4_decode_layoutupdate4(argp, lcp); -} + READ_BUF(8); + lcp->lc_layout_type = be32_to_cpup(p++); -static __be32 -nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_layoutget *lgp = &u->layoutget; - __be32 status; + /* + * Save the layout update in XDR format and let the layout driver deal + * with it later. + */ + lcp->lc_up_len = be32_to_cpup(p++); + if (lcp->lc_up_len > 0) { + READ_BUF(lcp->lc_up_len); + READMEM(lcp->lc_up_layout, lcp->lc_up_len); + } - memset(lgp, 0, sizeof(*lgp)); - if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_seg.iomode) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.length) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_minlength) < 0) - return nfserr_bad_xdr; - status = nfsd4_decode_stateid4(argp, &lgp->lg_sid); - if (status) - return status; - if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_maxcount) < 0) - return nfserr_bad_xdr; - - return nfs_ok; + DECODE_TAIL; } static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_layoutreturn *lrp) { - struct nfsd4_layoutreturn *lrp = &u->layoutreturn; - memset(lrp, 0, sizeof(*lrp)); - if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_seg.iomode) < 0) - return nfserr_bad_xdr; - return nfsd4_decode_layoutreturn4(argp, lrp); + DECODE_HEAD; + + READ_BUF(16); + lrp->lr_reclaim = be32_to_cpup(p++); + lrp->lr_layout_type = be32_to_cpup(p++); + lrp->lr_seg.iomode = be32_to_cpup(p++); + lrp->lr_return_type = be32_to_cpup(p++); + if (lrp->lr_return_type == RETURN_FILE) { + READ_BUF(16); + p = xdr_decode_hyper(p, &lrp->lr_seg.offset); + p = xdr_decode_hyper(p, &lrp->lr_seg.length); + + status = nfsd4_decode_stateid(argp, &lrp->lr_sid); + if (status) + return status; + + READ_BUF(4); + lrp->lrf_body_len = be32_to_cpup(p++); + if (lrp->lrf_body_len > 0) { + READ_BUF(lrp->lrf_body_len); + READMEM(lrp->lrf_body, lrp->lrf_body_len); + } + } else { + lrp->lr_seg.offset = 0; + lrp->lr_seg.length = NFS4_MAX_UINT64; + } + + DECODE_TAIL; } #endif /* CONFIG_NFSD_PNFS */ -static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name; - if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) - return nfserr_bad_xdr; - - sin->sin_exp = NULL; - return nfs_ok; -} - -static __be32 -nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_sequence *seq = &u->sequence; - __be32 *p, status; - - status = nfsd4_decode_sessionid4(argp, &seq->sessionid); - if (status) - return status; - p = xdr_inline_decode(argp->xdr, XDR_UNIT * 4); - if (!p) - return nfserr_bad_xdr; - seq->seqid = be32_to_cpup(p++); - seq->slotid = be32_to_cpup(p++); - seq->maxslots = be32_to_cpup(p++); - seq->cachethis = be32_to_cpup(p); - - seq->status_flags = 0; - return nfs_ok; -} - -static __be32 -nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_test_stateid *test_stateid = &u->test_stateid; - struct nfsd4_test_stateid_id *stateid; - __be32 status; - u32 i; - - memset(test_stateid, 0, sizeof(*test_stateid)); - if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0) - return nfserr_bad_xdr; - - INIT_LIST_HEAD(&test_stateid->ts_stateid_list); - for (i = 0; i < test_stateid->ts_num_ids; i++) { - stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); - if (!stateid) - return nfserr_jukebox; - INIT_LIST_HEAD(&stateid->ts_id_list); - list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); - status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid); - if (status) - return status; - } - - return nfs_ok; -} - -static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_destroy_clientid *dc = &u->destroy_clientid; - return nfsd4_decode_clientid4(argp, &dc->clientid); -} - -static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; - if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) - return nfserr_bad_xdr; - return nfs_ok; -} - static __be32 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_fallocate *fallocate) { - struct nfsd4_fallocate *fallocate = &u->allocate; - __be32 status; + DECODE_HEAD; - status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); + status = nfsd4_decode_stateid(argp, &fallocate->falloc_stateid); if (status) return status; - if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_length) < 0) - return nfserr_bad_xdr; - return nfs_ok; + READ_BUF(16); + p = xdr_decode_hyper(p, &fallocate->falloc_offset); + xdr_decode_hyper(p, &fallocate->falloc_length); + + DECODE_TAIL; +} + +static __be32 +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +{ + DECODE_HEAD; + + status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid); + if (status) + return status; + status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid); + if (status) + return status; + + READ_BUF(8 + 8 + 8); + p = xdr_decode_hyper(p, &clone->cl_src_pos); + p = xdr_decode_hyper(p, &clone->cl_dst_pos); + p = xdr_decode_hyper(p, &clone->cl_count); + DECODE_TAIL; } static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, struct nl4_server *ns) { + DECODE_HEAD; struct nfs42_netaddr *naddr; - __be32 *p; - if (xdr_stream_decode_u32(argp->xdr, &ns->nl4_type) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + ns->nl4_type = be32_to_cpup(p++); /* currently support for 1 inter-server source server */ switch (ns->nl4_type) { case NL4_NETADDR: naddr = &ns->u.nl4_addr; - if (xdr_stream_decode_u32(argp->xdr, &naddr->netid_len) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + naddr->netid_len = be32_to_cpup(p++); if (naddr->netid_len > RPCBIND_MAXNETIDLEN) - return nfserr_bad_xdr; + goto xdr_error; - p = xdr_inline_decode(argp->xdr, naddr->netid_len); - if (!p) - return nfserr_bad_xdr; - memcpy(naddr->netid, p, naddr->netid_len); + READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */ + COPYMEM(naddr->netid, naddr->netid_len); - if (xdr_stream_decode_u32(argp->xdr, &naddr->addr_len) < 0) - return nfserr_bad_xdr; + naddr->addr_len = be32_to_cpup(p++); if (naddr->addr_len > RPCBIND_MAXUADDRLEN) - return nfserr_bad_xdr; + goto xdr_error; - p = xdr_inline_decode(argp->xdr, naddr->addr_len); - if (!p) - return nfserr_bad_xdr; - memcpy(naddr->addr, p, naddr->addr_len); + READ_BUF(naddr->addr_len); + COPYMEM(naddr->addr, naddr->addr_len); break; default: - return nfserr_bad_xdr; + goto xdr_error; } - - return nfs_ok; + DECODE_TAIL; } static __be32 -nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) { - struct nfsd4_copy *copy = &u->copy; - u32 consecutive, i, count, sync; + DECODE_HEAD; struct nl4_server *ns_dummy; - __be32 status; + int i, count; - memset(copy, 0, sizeof(*copy)); - status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid); + status = nfsd4_decode_stateid(argp, ©->cp_src_stateid); if (status) return status; - status = nfsd4_decode_stateid4(argp, ©->cp_dst_stateid); + status = nfsd4_decode_stateid(argp, ©->cp_dst_stateid); if (status) return status; - if (xdr_stream_decode_u64(argp->xdr, ©->cp_src_pos) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, ©->cp_dst_pos) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, ©->cp_count) < 0) - return nfserr_bad_xdr; - /* ca_consecutive: we always do consecutive copies */ - if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_bool(argp->xdr, &sync) < 0) - return nfserr_bad_xdr; - nfsd4_copy_set_sync(copy, sync); - if (xdr_stream_decode_u32(argp->xdr, &count) < 0) - return nfserr_bad_xdr; - copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src)); - if (copy->cp_src == NULL) - return nfserr_jukebox; + READ_BUF(8 + 8 + 8 + 4 + 4 + 4); + p = xdr_decode_hyper(p, ©->cp_src_pos); + p = xdr_decode_hyper(p, ©->cp_dst_pos); + p = xdr_decode_hyper(p, ©->cp_count); + p++; /* ca_consecutive: we always do consecutive copies */ + copy->cp_synchronous = be32_to_cpup(p++); + + count = be32_to_cpup(p++); + + copy->cp_intra = false; if (count == 0) { /* intra-server copy */ - __set_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); - return nfs_ok; + copy->cp_intra = true; + goto intra; } - /* decode all the supplied server addresses but use only the first */ - status = nfsd4_decode_nl4_server(argp, copy->cp_src); + /* decode all the supplied server addresses but use first */ + status = nfsd4_decode_nl4_server(argp, ©->cp_src); if (status) return status; ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); if (ns_dummy == NULL) - return nfserr_jukebox; + return nfserrno(-ENOMEM); for (i = 0; i < count - 1; i++) { status = nfsd4_decode_nl4_server(argp, ns_dummy); if (status) { @@ -2027,80 +1839,44 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) } } kfree(ns_dummy); +intra: - return nfs_ok; -} - -static __be32 -nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) -{ - struct nfsd4_copy_notify *cn = &u->copy_notify; - __be32 status; - - memset(cn, 0, sizeof(*cn)); - cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src)); - if (cn->cpn_src == NULL) - return nfserr_jukebox; - cn->cpn_dst = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_dst)); - if (cn->cpn_dst == NULL) - return nfserr_jukebox; - - status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid); - if (status) - return status; - return nfsd4_decode_nl4_server(argp, cn->cpn_dst); + DECODE_TAIL; } static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_offload_status *os) { - struct nfsd4_offload_status *os = &u->offload_status; - os->count = 0; - os->status = 0; - return nfsd4_decode_stateid4(argp, &os->stateid); + return nfsd4_decode_stateid(argp, &os->stateid); } static __be32 -nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, + struct nfsd4_copy_notify *cn) { - struct nfsd4_seek *seek = &u->seek; __be32 status; - status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); + status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid); if (status) return status; - if (xdr_stream_decode_u64(argp->xdr, &seek->seek_offset) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0) - return nfserr_bad_xdr; - - seek->seek_eof = 0; - seek->seek_pos = 0; - return nfs_ok; + return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); } static __be32 -nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { - struct nfsd4_clone *clone = &u->clone; - __be32 status; + DECODE_HEAD; - status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); + status = nfsd4_decode_stateid(argp, &seek->seek_stateid); if (status) return status; - status = nfsd4_decode_stateid4(argp, &clone->cl_dst_stateid); - if (status) - return status; - if (xdr_stream_decode_u64(argp->xdr, &clone->cl_src_pos) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &clone->cl_dst_pos) < 0) - return nfserr_bad_xdr; - if (xdr_stream_decode_u64(argp->xdr, &clone->cl_count) < 0) - return nfserr_bad_xdr; - return nfs_ok; + READ_BUF(8 + 4); + p = xdr_decode_hyper(p, &seek->seek_offset); + seek->seek_whence = be32_to_cpup(p); + + DECODE_TAIL; } /* @@ -2113,14 +1889,13 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) */ /* - * Decode data into buffer. + * Decode data into buffer. Uses head and pages constructed by + * svcxdr_construct_vector. */ static __be32 -nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr, - char **bufp, u32 buflen) +nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head, + struct page **pages, char **bufp, u32 buflen) { - struct page **pages = xdr->pages; - struct kvec *head = xdr->head; char *tmp, *dp; u32 len; @@ -2163,22 +1938,25 @@ nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr, static __be32 nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) { + DECODE_HEAD; char *name, *sp, *dp; u32 namelen, cnt; - __be32 *p; - if (xdr_stream_decode_u32(argp->xdr, &namelen) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + namelen = be32_to_cpup(p++); + if (namelen > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) return nfserr_nametoolong; + if (namelen == 0) - return nfserr_bad_xdr; - p = xdr_inline_decode(argp->xdr, namelen); - if (!p) - return nfserr_bad_xdr; + goto xdr_error; + + READ_BUF(namelen); + name = svcxdr_tmpalloc(argp, namelen + XATTR_USER_PREFIX_LEN + 1); if (!name) return nfserr_jukebox; + memcpy(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); /* @@ -2191,14 +1969,14 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) while (cnt-- > 0) { if (*sp == '\0') - return nfserr_bad_xdr; + goto xdr_error; *dp++ = *sp++; } *dp = '\0'; *namep = name; - return nfs_ok; + DECODE_TAIL; } /* @@ -2209,13 +1987,11 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) */ static __be32 nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_getxattr *getxattr) { - struct nfsd4_getxattr *getxattr = &u->getxattr; __be32 status; u32 maxcount; - memset(getxattr, 0, sizeof(*getxattr)); status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name); if (status) return status; @@ -2224,21 +2000,21 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); getxattr->getxa_len = maxcount; - return nfs_ok; + + return status; } static __be32 nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_setxattr *setxattr) { - struct nfsd4_setxattr *setxattr = &u->setxattr; + DECODE_HEAD; u32 flags, maxcount, size; - __be32 status; + struct kvec head; + struct page **pagelist; - memset(setxattr, 0, sizeof(*setxattr)); - - if (xdr_stream_decode_u32(argp->xdr, &flags) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + flags = be32_to_cpup(p++); if (flags > SETXATTR4_REPLACE) return nfserr_inval; @@ -2251,35 +2027,33 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, maxcount = svc_max_payload(argp->rqstp); maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); - if (xdr_stream_decode_u32(argp->xdr, &size) < 0) - return nfserr_bad_xdr; + READ_BUF(4); + size = be32_to_cpup(p++); if (size > maxcount) return nfserr_xattr2big; setxattr->setxa_len = size; if (size > 0) { - struct xdr_buf payload; + status = svcxdr_construct_vector(argp, &head, &pagelist, size); + if (status) + return status; - if (!xdr_stream_subsegment(argp->xdr, &payload, size)) - return nfserr_bad_xdr; - status = nfsd4_vbuf_from_vector(argp, &payload, - &setxattr->setxa_buf, size); + status = nfsd4_vbuf_from_vector(argp, &head, pagelist, + &setxattr->setxa_buf, size); } - return nfs_ok; + DECODE_TAIL; } static __be32 nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_listxattrs *listxattrs) { - struct nfsd4_listxattrs *listxattrs = &u->listxattrs; + DECODE_HEAD; u32 maxcount; - memset(listxattrs, 0, sizeof(*listxattrs)); - - if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0) - return nfserr_bad_xdr; + READ_BUF(12); + p = xdr_decode_hyper(p, &listxattrs->lsxa_cookie); /* * If the cookie is too large to have even one user.x attribute @@ -2289,8 +2063,7 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, (XATTR_LIST_MAX / (XATTR_USER_PREFIX_LEN + 2))) return nfserr_badcookie; - if (xdr_stream_decode_u32(argp->xdr, &maxcount) < 0) - return nfserr_bad_xdr; + maxcount = be32_to_cpup(p++); if (maxcount < 8) /* Always need at least 2 words (length and one character) */ return nfserr_inval; @@ -2298,119 +2071,117 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, maxcount = min(maxcount, svc_max_payload(argp->rqstp)); listxattrs->lsxa_maxcount = maxcount; - return nfs_ok; + DECODE_TAIL; } static __be32 nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, - union nfsd4_op_u *u) + struct nfsd4_removexattr *removexattr) { - struct nfsd4_removexattr *removexattr = &u->removexattr; - memset(removexattr, 0, sizeof(*removexattr)); return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); } static __be32 -nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) +nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) { return nfs_ok; } static __be32 -nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) +nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) { return nfserr_notsupp; } -typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u); +typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); static const nfsd4_dec nfsd4_dec_ops[] = { - [OP_ACCESS] = nfsd4_decode_access, - [OP_CLOSE] = nfsd4_decode_close, - [OP_COMMIT] = nfsd4_decode_commit, - [OP_CREATE] = nfsd4_decode_create, - [OP_DELEGPURGE] = nfsd4_decode_notsupp, - [OP_DELEGRETURN] = nfsd4_decode_delegreturn, - [OP_GETATTR] = nfsd4_decode_getattr, - [OP_GETFH] = nfsd4_decode_noop, - [OP_LINK] = nfsd4_decode_link, - [OP_LOCK] = nfsd4_decode_lock, - [OP_LOCKT] = nfsd4_decode_lockt, - [OP_LOCKU] = nfsd4_decode_locku, - [OP_LOOKUP] = nfsd4_decode_lookup, - [OP_LOOKUPP] = nfsd4_decode_noop, - [OP_NVERIFY] = nfsd4_decode_verify, - [OP_OPEN] = nfsd4_decode_open, - [OP_OPENATTR] = nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm, - [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade, - [OP_PUTFH] = nfsd4_decode_putfh, - [OP_PUTPUBFH] = nfsd4_decode_putpubfh, - [OP_PUTROOTFH] = nfsd4_decode_noop, - [OP_READ] = nfsd4_decode_read, - [OP_READDIR] = nfsd4_decode_readdir, - [OP_READLINK] = nfsd4_decode_noop, - [OP_REMOVE] = nfsd4_decode_remove, - [OP_RENAME] = nfsd4_decode_rename, - [OP_RENEW] = nfsd4_decode_renew, - [OP_RESTOREFH] = nfsd4_decode_noop, - [OP_SAVEFH] = nfsd4_decode_noop, - [OP_SECINFO] = nfsd4_decode_secinfo, - [OP_SETATTR] = nfsd4_decode_setattr, - [OP_SETCLIENTID] = nfsd4_decode_setclientid, - [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm, - [OP_VERIFY] = nfsd4_decode_verify, - [OP_WRITE] = nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner, + [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, + [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, + [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] = (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew, + [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid, + [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm, + [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl, - [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session, - [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id, - [OP_CREATE_SESSION] = nfsd4_decode_create_session, - [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, - [OP_FREE_STATEID] = nfsd4_decode_free_stateid, - [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, + [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, + [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, + [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, - [OP_GETDEVICELIST] = nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit, - [OP_LAYOUTGET] = nfsd4_decode_layoutget, - [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn, + [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo, + [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit, + [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget, + [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn, #else - [OP_GETDEVICEINFO] = nfsd4_decode_notsupp, - [OP_GETDEVICELIST] = nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp, - [OP_LAYOUTGET] = nfsd4_decode_notsupp, - [OP_LAYOUTRETURN] = nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, #endif - [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name, - [OP_SEQUENCE] = nfsd4_decode_sequence, - [OP_SET_SSV] = nfsd4_decode_notsupp, - [OP_TEST_STATEID] = nfsd4_decode_test_stateid, - [OP_WANT_DELEGATION] = nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid, - [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete, + [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, + [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, + [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, + [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, + [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, /* new operations for NFSv4.2 */ - [OP_ALLOCATE] = nfsd4_decode_fallocate, - [OP_COPY] = nfsd4_decode_copy, - [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify, - [OP_DEALLOCATE] = nfsd4_decode_fallocate, - [OP_IO_ADVISE] = nfsd4_decode_notsupp, - [OP_LAYOUTERROR] = nfsd4_decode_notsupp, - [OP_LAYOUTSTATS] = nfsd4_decode_notsupp, - [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status, - [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status, - [OP_READ_PLUS] = nfsd4_decode_read, - [OP_SEEK] = nfsd4_decode_seek, - [OP_WRITE_SAME] = nfsd4_decode_notsupp, - [OP_CLONE] = nfsd4_decode_clone, + [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, + [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, + [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, + [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, + [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, + [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, + [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, + [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = nfsd4_decode_getxattr, - [OP_SETXATTR] = nfsd4_decode_setxattr, - [OP_LISTXATTRS] = nfsd4_decode_listxattrs, - [OP_REMOVEXATTR] = nfsd4_decode_removexattr, + [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, + [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, + [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, + [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, }; static inline bool @@ -2427,46 +2198,43 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) return true; } -static bool +static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { + DECODE_HEAD; struct nfsd4_op *op; bool cachethis = false; int auth_slack= argp->rqstp->rq_auth_slack; int max_reply = auth_slack + 8; /* opcnt, status */ int readcount = 0; int readbytes = 0; - __be32 *p; int i; - if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0) - return false; - max_reply += XDR_UNIT; - argp->tag = NULL; - if (unlikely(argp->taglen)) { - if (argp->taglen > NFSD4_MAX_TAGLEN) - return false; - p = xdr_inline_decode(argp->xdr, argp->taglen); - if (!p) - return false; - argp->tag = svcxdr_savemem(argp, p, argp->taglen); - if (!argp->tag) - return false; - max_reply += xdr_align_size(argp->taglen); - } + READ_BUF(4); + argp->taglen = be32_to_cpup(p++); + READ_BUF(argp->taglen); + SAVEMEM(argp->tag, argp->taglen); + READ_BUF(8); + argp->minorversion = be32_to_cpup(p++); + argp->opcnt = be32_to_cpup(p++); + max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); - if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) - return false; - if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) - return false; - argp->opcnt = min_t(u32, argp->client_opcnt, - NFSD_MAX_OPS_PER_COMPOUND); + if (argp->taglen > NFSD4_MAX_TAGLEN) + goto xdr_error; + /* + * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS + * here, so we return success at the xdr level so that + * nfsd4_proc can handle this is an NFS-level error. + */ + if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) + return 0; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { - argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops)); + argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; - return false; + dprintk("nfsd: couldn't allocate room for COMPOUND\n"); + goto xdr_error; } } @@ -2476,23 +2244,17 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; - op->opdesc = NULL; - if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) - return false; - if (nfsd4_opnum_in_range(argp, op)) { - op->opdesc = OPDESC(op); + READ_BUF(4); + op->opnum = be32_to_cpup(p++); + + if (nfsd4_opnum_in_range(argp, op)) op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); - if (op->status != nfs_ok) - trace_nfsd_compound_decode_err(argp->rqstp, - argp->opcnt, i, - op->opnum, - op->status); - } else { + else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } - + op->opdesc = OPDESC(op); /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: @@ -2527,7 +2289,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); - return true; + DECODE_TAIL; } static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, @@ -2536,25 +2298,15 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, if (exp->ex_flags & NFSEXP_V4ROOT) { *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); *p++ = 0; - } else + } else if (IS_I_VERSION(inode)) { p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); + } else { + *p++ = cpu_to_be32(stat->ctime.tv_sec); + *p++ = cpu_to_be32(stat->ctime.tv_nsec); + } return p; } -static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, - struct timespec64 *tv) -{ - __be32 *p; - - p = xdr_reserve_space(xdr, XDR_UNIT * 3); - if (!p) - return nfserr_resource; - - p = xdr_encode_hyper(p, (s64)tv->tv_sec); - *p = cpu_to_be32(tv->tv_nsec); - return nfs_ok; -} - /* * ctime (in NFSv4, time_metadata) is not writeable, and the client * doesn't really care what resolution could theoretically be stored by @@ -2583,8 +2335,15 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode) static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { *p++ = cpu_to_be32(c->atomic); - p = xdr_encode_hyper(p, c->before_change); - p = xdr_encode_hyper(p, c->after_change); + if (c->change_supported) { + p = xdr_encode_hyper(p, c->before_change); + p = xdr_encode_hyper(p, c->after_change); + } else { + *p++ = cpu_to_be32(c->before_ctime_sec); + *p++ = cpu_to_be32(c->before_ctime_nsec); + *p++ = cpu_to_be32(c->after_ctime_sec); + *p++ = cpu_to_be32(c->after_ctime_nsec); + } return p; } @@ -2799,7 +2558,7 @@ static u32 nfs4_file_type(umode_t mode) case S_IFREG: return NF4REG; case S_IFSOCK: return NF4SOCK; default: return NF4BAD; - } + }; } static inline __be32 @@ -2883,10 +2642,9 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 } -static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino) +static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) { struct path path = exp->ex_path; - struct kstat stat; int err; path_get(&path); @@ -2894,10 +2652,8 @@ static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino) if (path.dentry != path.mnt->mnt_root) break; } - err = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); + err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); path_put(&path); - if (!err) - *pino = stat.ino; return err; } @@ -2950,9 +2706,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - __be32 *p, *attrlen_p; + __be32 *p; int starting_len = xdr->buf->len; int attrlen_offset; + __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; @@ -2984,9 +2741,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; - if (!(stat.result_mask & STATX_BTIME)) - /* underlying FS does not offer btime so we can't share it */ - bmval1 &= ~FATTR4_WORD1_TIME_CREATE; if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | @@ -3040,9 +2794,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; attrlen_offset = xdr->buf->len; - attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); - if (!attrlen_p) + p = xdr_reserve_space(xdr, 4); + if (!p) goto out_resource; + p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 supp[3]; @@ -3228,7 +2983,7 @@ out_acl: p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { @@ -3360,14 +3115,11 @@ out_acl: p = xdr_encode_hyper(p, dummy64); } if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { - status = nfsd4_encode_nfstime4(xdr, &stat.atime); - if (status) - goto out; - } - if (bmval1 & FATTR4_WORD1_TIME_CREATE) { - status = nfsd4_encode_nfstime4(xdr, &stat.btime); - if (status) - goto out; + p = xdr_reserve_space(xdr, 12); + if (!p) + goto out_resource; + p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); + *p++ = cpu_to_be32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { p = xdr_reserve_space(xdr, 12); @@ -3376,31 +3128,36 @@ out_acl: p = encode_time_delta(p, d_inode(dentry)); } if (bmval1 & FATTR4_WORD1_TIME_METADATA) { - status = nfsd4_encode_nfstime4(xdr, &stat.ctime); - if (status) - goto out; + p = xdr_reserve_space(xdr, 12); + if (!p) + goto out_resource; + p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); + *p++ = cpu_to_be32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { - status = nfsd4_encode_nfstime4(xdr, &stat.mtime); - if (status) - goto out; + p = xdr_reserve_space(xdr, 12); + if (!p) + goto out_resource; + p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); + *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { + struct kstat parent_stat; u64 ino = stat.ino; p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; /* - * Get ino of mountpoint in parent filesystem, if not ignoring - * crossmount and this is the root of a cross-mounted - * filesystem. + * Get parent's attributes if not ignoring crossmount + * and this is the root of a cross-mounted filesystem. */ if (ignore_crossmnt == 0 && dentry == exp->ex_path.mnt->mnt_root) { - err = nfsd4_get_mounted_on_ino(exp, &ino); + err = get_parent_attributes(exp, &parent_stat); if (err) goto out_nfserr; + ino = parent_stat.ino; } p = xdr_encode_hyper(p, ino); } @@ -3437,6 +3194,16 @@ out_acl: goto out; } + if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) { + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; + if (IS_I_VERSION(d_inode(dentry))) + *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR); + else + *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA); + } + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, @@ -3455,7 +3222,8 @@ out_acl: *p++ = cpu_to_be32(err == 0); } - *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); + attrlen = htonl(xdr->buf->len - attrlen_offset - 4); + write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); status = nfs_ok; out: @@ -3624,7 +3392,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, p = xdr_reserve_space(xdr, 3*4 + namlen); if (!p) goto fail; - p = xdr_encode_hyper(p, OFFSET_MAX); /* offset of next entry */ + p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); @@ -3708,11 +3476,9 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) } static __be32 -nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { - struct nfsd4_access *access = &u->access; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 8); @@ -3723,11 +3489,9 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } -static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { - struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); @@ -3742,22 +3506,18 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, } static __be32 -nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { - struct nfsd4_close *close = &u->close; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); } static __be32 -nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { - struct nfsd4_commit *commit = &u->commit; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); @@ -3769,11 +3529,9 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { - struct nfsd4_create *create = &u->create; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3785,23 +3543,19 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { - struct nfsd4_getattr *getattr = &u->getattr; struct svc_fh *fhp = getattr->ga_fhp; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, getattr->ga_bmval, resp->rqstp, 0); } static __be32 -nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { - struct svc_fh **fhpp = &u->getfh; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; __be32 *p; @@ -3810,7 +3564,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, len); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); return 0; } @@ -3854,11 +3608,9 @@ again: } static __be32 -nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { - struct nfsd4_lock *lock = &u->lock; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; if (!nfserr) nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); @@ -3869,11 +3621,9 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { - struct nfsd4_lockt *lockt = &u->lockt; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; if (nfserr == nfserr_denied) nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); @@ -3881,22 +3631,18 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { - struct nfsd4_locku *locku = &u->locku; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); } static __be32 -nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { - struct nfsd4_link *link = &u->link; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3908,11 +3654,9 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 -nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { - struct nfsd4_open *open = &u->open; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); @@ -4004,21 +3748,17 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { - struct nfsd4_open_confirm *oc = &u->open_confirm; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 -nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { - struct nfsd4_open_downgrade *od = &u->open_downgrade; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); } @@ -4028,28 +3768,33 @@ static __be32 nfsd4_encode_splice_read( struct nfsd4_read *read, struct file *file, unsigned long maxcount) { - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; struct xdr_buf *buf = xdr->buf; - int status, space_left; + u32 eof; + int space_left; __be32 nfserr; + __be32 *p = xdr->p - 2; /* Make sure there will be room for padding if needed */ if (xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, - file, read->rd_offset, &maxcount, - &read->rd_eof); + file, read->rd_offset, &maxcount, &eof); read->rd_length = maxcount; - if (nfserr) - goto out_err; - status = svc_encode_result_payload(read->rd_rqstp, - buf->head[0].iov_len, maxcount); - if (status) { - nfserr = nfserrno(status); - goto out_err; + if (nfserr) { + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode: + */ + buf->page_len = 0; + return nfserr; } + *(p++) = htonl(eof); + *(p++) = htonl(maxcount); + buf->page_len = maxcount; buf->len += maxcount; xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) @@ -4075,25 +3820,18 @@ static __be32 nfsd4_encode_splice_read( xdr->end = (__be32 *)((void *)xdr->end + space_left); return 0; - -out_err: - /* - * nfsd_splice_actor may have already messed with the - * page length; reset it so as not to confuse - * xdr_truncate_encode in our caller. - */ - buf->page_len = 0; - return nfserr; } static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct nfsd4_read *read, struct file *file, unsigned long maxcount) { - struct xdr_stream *xdr = resp->xdr; - unsigned int starting_len = xdr->buf->len; - __be32 zero = xdr_zero; + struct xdr_stream *xdr = &resp->xdr; + u32 eof; + int starting_len = xdr->buf->len - 8; __be32 nfserr; + __be32 tmp; + int pad; read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); if (read->rd_vlen < 0) @@ -4101,27 +3839,33 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount, - &read->rd_eof); + &eof); read->rd_length = maxcount; if (nfserr) return nfserr; - if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount)) + if (svc_encode_read_payload(resp->rqstp, starting_len + 8, maxcount)) return nfserr_io; - xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount)); + xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount)); + + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); + tmp = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + + tmp = xdr_zero; + pad = (maxcount&3) ? 4 - (maxcount&3) : 0; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, + &tmp, pad); + return 0; - write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero, - xdr_pad_size(maxcount)); - return nfs_ok; } static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_read *read) { - struct nfsd4_read *read = &u->read; - bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; struct file *file; int starting_len = xdr->buf->len; __be32 *p; @@ -4132,44 +3876,45 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { - WARN_ON_ONCE(splice_ok); + WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); return nfserr_resource; } - if (resp->xdr->buf->page_len && splice_ok) { + if (resp->xdr.buf->page_len && + test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); return nfserr_serverfault; } xdr_commit_encode(xdr); - maxcount = min_t(unsigned long, read->rd_length, + maxcount = svc_max_payload(resp->rqstp); + maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); + maxcount = min_t(unsigned long, maxcount, read->rd_length); - if (file->f_op->splice_read && splice_ok) + if (file->f_op->splice_read && + test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); - if (nfserr) { - xdr_truncate_encode(xdr, starting_len); - return nfserr; - } - p = xdr_encode_bool(p, read->rd_eof); - *p = cpu_to_be32(read->rd_length); - return nfs_ok; + if (nfserr) + xdr_truncate_encode(xdr, starting_len); + + return nfserr; } static __be32 -nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) { - struct nfsd4_readlink *readlink = &u->readlink; - __be32 *p, *maxcount_p, zero = xdr_zero; - struct xdr_stream *xdr = resp->xdr; + int maxcount; + __be32 wire_count; + int zero = 0; + struct xdr_stream *xdr = &resp->xdr; int length_offset = xdr->buf->len; - int maxcount, status; + __be32 *p; - maxcount_p = xdr_reserve_space(xdr, XDR_UNIT); - if (!maxcount_p) + p = xdr_reserve_space(xdr, 4); + if (!p) return nfserr_resource; maxcount = PAGE_SIZE; @@ -4186,35 +3931,28 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, (char *)p, &maxcount); if (nfserr == nfserr_isdir) nfserr = nfserr_inval; - if (nfserr) - goto out_err; - status = svc_encode_result_payload(readlink->rl_rqstp, length_offset, - maxcount); - if (status) { - nfserr = nfserrno(status); - goto out_err; + if (nfserr) { + xdr_truncate_encode(xdr, length_offset); + return nfserr; } - *maxcount_p = cpu_to_be32(maxcount); - xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount)); - write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero, - xdr_pad_size(maxcount)); - return nfs_ok; -out_err: - xdr_truncate_encode(xdr, length_offset); - return nfserr; + wire_count = htonl(maxcount); + write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); + xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); + if (maxcount & 3) + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, + &zero, 4 - (maxcount&3)); + return 0; } static __be32 -nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir) { - struct nfsd4_readdir *readdir = &u->readdir; int maxcount; int bytes_left; loff_t offset; __be64 wire_offset; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; int starting_len = xdr->buf->len; __be32 *p; @@ -4225,8 +3963,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ *p++ = cpu_to_be32(0); *p++ = cpu_to_be32(0); - xdr->buf->head[0].iov_len = (char *)xdr->p - - (char *)xdr->buf->head[0].iov_base; + resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) + - (char *)resp->xdr.buf->head[0].iov_base; /* * Number of bytes left for directory entries allowing for the @@ -4299,11 +4037,9 @@ err_no_verf: } static __be32 -nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { - struct nfsd4_remove *remove = &u->remove; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -4314,11 +4050,9 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { - struct nfsd4_rename *rename = &u->rename; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 40); @@ -4399,20 +4133,18 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_secinfo *secinfo) { - struct nfsd4_secinfo *secinfo = &u->secinfo; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); } static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_secinfo_no_name *secinfo) { - struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); } @@ -4422,11 +4154,9 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, * regardless of the error status. */ static __be32 -nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { - struct nfsd4_setattr *setattr = &u->setattr; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 16); @@ -4448,11 +4178,9 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { - struct nfsd4_setclientid *scd = &u->setclientid; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (!nfserr) { @@ -4474,11 +4202,9 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) +nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { - struct nfsd4_write *write = &u->write; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 16); @@ -4493,10 +4219,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_exchange_id *exid) { - struct nfsd4_exchange_id *exid = &u->exchange_id; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; char *major_id; char *server_scope; @@ -4572,10 +4297,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_create_session *sess) { - struct nfsd4_create_session *sess = &u->create_session; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 24); @@ -4626,10 +4350,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_sequence *seq) { - struct nfsd4_sequence *seq = &u->sequence; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); @@ -4650,10 +4373,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_test_stateid *test_stateid) { - struct nfsd4_test_stateid *test_stateid = &u->test_stateid; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4672,10 +4394,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_getdeviceinfo *gdev) { - struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; __be32 *p; @@ -4726,10 +4447,9 @@ toosmall: static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_layoutget *lgp) { - struct nfsd4_layoutget *lgp = &u->layoutget; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4754,10 +4474,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_layoutcommit *lcp) { - struct nfsd4_layoutcommit *lcp = &u->layoutcommit; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -4776,10 +4495,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_layoutreturn *lrp) { - struct nfsd4_layoutreturn *lrp = &u->layoutreturn; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -4797,7 +4515,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write, bool sync) { __be32 *p; - p = xdr_reserve_space(resp->xdr, 4); + p = xdr_reserve_space(&resp->xdr, 4); if (!p) return nfserr_resource; @@ -4806,11 +4524,11 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, else { __be32 nfserr; *p++ = cpu_to_be32(1); - nfserr = nfsd4_encode_stateid(resp->xdr, &write->cb_stateid); + nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid); if (nfserr) return nfserr; } - p = xdr_reserve_space(resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; @@ -4824,7 +4542,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, static __be32 nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) { - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; struct nfs42_netaddr *addr; __be32 *p; @@ -4863,28 +4581,26 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_copy *copy) { - struct nfsd4_copy *copy = &u->copy; __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, - nfsd4_copy_is_sync(copy)); + copy->cp_synchronous); if (nfserr) return nfserr; - p = xdr_reserve_space(resp->xdr, 4 + 4); + p = xdr_reserve_space(&resp->xdr, 4 + 4); *p++ = xdr_one; /* cr_consecutive */ - *p = nfsd4_copy_is_sync(copy) ? xdr_one : xdr_zero; + *p++ = cpu_to_be32(copy->cp_synchronous); return 0; } static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_offload_status *os) { - struct nfsd4_offload_status *os = &u->offload_status; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 8 + 4); @@ -4897,84 +4613,159 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, - struct nfsd4_read *read) + struct nfsd4_read *read, + unsigned long *maxcount, u32 *eof, + loff_t *pos) { - bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); + struct xdr_stream *xdr = &resp->xdr; struct file *file = read->rd_nf->nf_file; - struct xdr_stream *xdr = resp->xdr; - unsigned long maxcount; - __be32 nfserr, *p; + int starting_len = xdr->buf->len; + loff_t hole_pos; + __be32 nfserr; + __be32 *p, tmp; + __be64 tmp64; + + hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); + if (hole_pos > read->rd_offset) + *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); + *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); /* Content type, offset, byte count */ p = xdr_reserve_space(xdr, 4 + 8 + 4); if (!p) - return nfserr_io; - if (resp->xdr->buf->page_len && splice_ok) { - WARN_ON_ONCE(splice_ok); - return nfserr_serverfault; - } + return nfserr_resource; - maxcount = min_t(unsigned long, read->rd_length, - (xdr->buf->buflen - xdr->buf->len)); + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); + if (read->rd_vlen < 0) + return nfserr_resource; - if (file->f_op->splice_read && splice_ok) - nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); - else - nfserr = nfsd4_encode_readv(resp, read, file, maxcount); + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, + resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); - *p++ = cpu_to_be32(NFS4_CONTENT_DATA); - p = xdr_encode_hyper(p, read->rd_offset); - *p = cpu_to_be32(read->rd_length); + tmp = htonl(NFS4_CONTENT_DATA); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp64 = cpu_to_be64(read->rd_offset); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); + tmp = htonl(*maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); + tmp = xdr_zero; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, + xdr_pad_size(*maxcount)); + return nfs_ok; +} + +static __be32 +nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + unsigned long *maxcount, u32 *eof) +{ + struct file *file = read->rd_nf->nf_file; + loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); + loff_t f_size = i_size_read(file_inode(file)); + unsigned long count; + __be32 *p; + + if (data_pos == -ENXIO) + data_pos = f_size; + else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) + return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); + count = data_pos - read->rd_offset; + + /* Content type, offset, byte count */ + p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8); + if (!p) + return nfserr_resource; + + *p++ = htonl(NFS4_CONTENT_HOLE); + p = xdr_encode_hyper(p, read->rd_offset); + p = xdr_encode_hyper(p, count); + + *eof = (read->rd_offset + count) >= f_size; + *maxcount = min_t(unsigned long, count, *maxcount); return nfs_ok; } static __be32 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_read *read) { - struct nfsd4_read *read = &u->read; - struct file *file = read->rd_nf->nf_file; - struct xdr_stream *xdr = resp->xdr; + unsigned long maxcount, count; + struct xdr_stream *xdr = &resp->xdr; + struct file *file; int starting_len = xdr->buf->len; - u32 segments = 0; - __be32 *p; + int last_segment = xdr->buf->len; + int segments = 0; + __be32 *p, tmp; + bool is_data; + loff_t pos; + u32 eof; if (nfserr) return nfserr; + file = read->rd_nf->nf_file; /* eof flag, segment count */ p = xdr_reserve_space(xdr, 4 + 4); if (!p) - return nfserr_io; + return nfserr_resource; xdr_commit_encode(xdr); - read->rd_eof = read->rd_offset >= i_size_read(file_inode(file)); - if (read->rd_eof) + maxcount = svc_max_payload(resp->rqstp); + maxcount = min_t(unsigned long, maxcount, + (xdr->buf->buflen - xdr->buf->len)); + maxcount = min_t(unsigned long, maxcount, read->rd_length); + count = maxcount; + + eof = read->rd_offset >= i_size_read(file_inode(file)); + if (eof) goto out; - nfserr = nfsd4_encode_read_plus_data(resp, read); - if (nfserr) { - xdr_truncate_encode(xdr, starting_len); - return nfserr; + pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); + is_data = pos > read->rd_offset; + + while (count > 0 && !eof) { + maxcount = count; + if (is_data) + nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, + segments == 0 ? &pos : NULL); + else + nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); + if (nfserr) + goto out; + count -= maxcount; + read->rd_offset += maxcount; + is_data = !is_data; + last_segment = xdr->buf->len; + segments++; } - segments++; - out: - p = xdr_encode_bool(p, read->rd_eof); - *p = cpu_to_be32(segments); + if (nfserr && segments == 0) + xdr_truncate_encode(xdr, starting_len); + else { + if (nfserr) { + xdr_truncate_encode(xdr, last_segment); + nfserr = nfs_ok; + eof = 0; + } + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp = htonl(segments); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + } + return nfserr; } static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_copy_notify *cn) { - struct nfsd4_copy_notify *cn = &u->copy_notify; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; if (nfserr) @@ -5001,18 +4792,16 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(1); - nfserr = nfsd42_encode_nl4_server(resp, cn->cpn_src); - return nfserr; + return nfsd42_encode_nl4_server(resp, &cn->cpn_src); } static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_seek *seek) { - struct nfsd4_seek *seek = &u->seek; __be32 *p; - p = xdr_reserve_space(resp->xdr, 4 + 8); + p = xdr_reserve_space(&resp->xdr, 4 + 8); *p++ = cpu_to_be32(seek->seek_eof); p = xdr_encode_hyper(p, seek->seek_pos); @@ -5020,8 +4809,7 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 -nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *p) +nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) { return nfserr; } @@ -5072,10 +4860,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) static __be32 nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_getxattr *getxattr) { - struct nfsd4_getxattr *getxattr = &u->getxattr; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p, err; p = xdr_reserve_space(xdr, 4); @@ -5097,10 +4884,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_setxattr *setxattr) { - struct nfsd4_setxattr *setxattr = &u->setxattr; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -5139,10 +4925,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, static __be32 nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_listxattrs *listxattrs) { - struct nfsd4_listxattrs *listxattrs = &u->listxattrs; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; u32 cookie_offset, count_offset, eof; u32 left, xdrleft, slen, count; u32 xdrlen, offset; @@ -5251,10 +5036,9 @@ out: static __be32 nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, - union nfsd4_op_u *u) + struct nfsd4_removexattr *removexattr) { - struct nfsd4_removexattr *removexattr = &u->removexattr; - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -5265,7 +5049,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } -typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u); +typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); /* * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 @@ -5273,93 +5057,93 @@ typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u * done in the decoding phase. */ static const nfsd4_enc nfsd4_enc_ops[] = { - [OP_ACCESS] = nfsd4_encode_access, - [OP_CLOSE] = nfsd4_encode_close, - [OP_COMMIT] = nfsd4_encode_commit, - [OP_CREATE] = nfsd4_encode_create, - [OP_DELEGPURGE] = nfsd4_encode_noop, - [OP_DELEGRETURN] = nfsd4_encode_noop, - [OP_GETATTR] = nfsd4_encode_getattr, - [OP_GETFH] = nfsd4_encode_getfh, - [OP_LINK] = nfsd4_encode_link, - [OP_LOCK] = nfsd4_encode_lock, - [OP_LOCKT] = nfsd4_encode_lockt, - [OP_LOCKU] = nfsd4_encode_locku, - [OP_LOOKUP] = nfsd4_encode_noop, - [OP_LOOKUPP] = nfsd4_encode_noop, - [OP_NVERIFY] = nfsd4_encode_noop, - [OP_OPEN] = nfsd4_encode_open, - [OP_OPENATTR] = nfsd4_encode_noop, - [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm, - [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade, - [OP_PUTFH] = nfsd4_encode_noop, - [OP_PUTPUBFH] = nfsd4_encode_noop, - [OP_PUTROOTFH] = nfsd4_encode_noop, - [OP_READ] = nfsd4_encode_read, - [OP_READDIR] = nfsd4_encode_readdir, - [OP_READLINK] = nfsd4_encode_readlink, - [OP_REMOVE] = nfsd4_encode_remove, - [OP_RENAME] = nfsd4_encode_rename, - [OP_RENEW] = nfsd4_encode_noop, - [OP_RESTOREFH] = nfsd4_encode_noop, - [OP_SAVEFH] = nfsd4_encode_noop, - [OP_SECINFO] = nfsd4_encode_secinfo, - [OP_SETATTR] = nfsd4_encode_setattr, - [OP_SETCLIENTID] = nfsd4_encode_setclientid, - [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop, - [OP_VERIFY] = nfsd4_encode_noop, - [OP_WRITE] = nfsd4_encode_write, - [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop, + [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, + [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, + [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, + [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create, + [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr, + [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh, + [OP_LINK] = (nfsd4_enc)nfsd4_encode_link, + [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock, + [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt, + [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku, + [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, + [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, + [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, + [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, + [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_READ] = (nfsd4_enc)nfsd4_encode_read, + [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir, + [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink, + [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove, + [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename, + [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop, + [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop, + [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo, + [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr, + [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid, + [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop, + [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, /* NFSv4.1 operations */ - [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop, - [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session, - [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id, - [OP_CREATE_SESSION] = nfsd4_encode_create_session, - [OP_DESTROY_SESSION] = nfsd4_encode_noop, - [OP_FREE_STATEID] = nfsd4_encode_noop, - [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, + [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, + [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, + [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, #ifdef CONFIG_NFSD_PNFS - [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, - [OP_GETDEVICELIST] = nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit, - [OP_LAYOUTGET] = nfsd4_encode_layoutget, - [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn, + [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo, + [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit, + [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget, + [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn, #else - [OP_GETDEVICEINFO] = nfsd4_encode_noop, - [OP_GETDEVICELIST] = nfsd4_encode_noop, - [OP_LAYOUTCOMMIT] = nfsd4_encode_noop, - [OP_LAYOUTGET] = nfsd4_encode_noop, - [OP_LAYOUTRETURN] = nfsd4_encode_noop, + [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, + [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, #endif - [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name, - [OP_SEQUENCE] = nfsd4_encode_sequence, - [OP_SET_SSV] = nfsd4_encode_noop, - [OP_TEST_STATEID] = nfsd4_encode_test_stateid, - [OP_WANT_DELEGATION] = nfsd4_encode_noop, - [OP_DESTROY_CLIENTID] = nfsd4_encode_noop, - [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop, + [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, + [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, + [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, + [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, + [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, + [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, /* NFSv4.2 operations */ - [OP_ALLOCATE] = nfsd4_encode_noop, - [OP_COPY] = nfsd4_encode_copy, - [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify, - [OP_DEALLOCATE] = nfsd4_encode_noop, - [OP_IO_ADVISE] = nfsd4_encode_noop, - [OP_LAYOUTERROR] = nfsd4_encode_noop, - [OP_LAYOUTSTATS] = nfsd4_encode_noop, - [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop, - [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status, - [OP_READ_PLUS] = nfsd4_encode_read_plus, - [OP_SEEK] = nfsd4_encode_seek, - [OP_WRITE_SAME] = nfsd4_encode_noop, - [OP_CLONE] = nfsd4_encode_noop, + [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, + [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, + [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, + [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, + [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, + [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, + [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, /* RFC 8276 extended atributes operations */ - [OP_GETXATTR] = nfsd4_encode_getxattr, - [OP_SETXATTR] = nfsd4_encode_setxattr, - [OP_LISTXATTRS] = nfsd4_encode_listxattrs, - [OP_REMOVEXATTR] = nfsd4_encode_removexattr, + [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, + [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, + [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, + [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, }; /* @@ -5394,7 +5178,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { - struct xdr_stream *xdr = resp->xdr; + struct xdr_stream *xdr = &resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; const struct nfsd4_operation *opdesc = op->opdesc; @@ -5403,8 +5187,10 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) __be32 *p; p = xdr_reserve_space(xdr, 8); - if (!p) - goto release; + if (!p) { + WARN_ON_ONCE(1); + return; + } *p++ = cpu_to_be32(op->opnum); post_err_offset = xdr->buf->len; @@ -5413,12 +5199,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) if (op->status && opdesc && !(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE)) goto status; - BUG_ON(op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || + BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); - if (op->status) - trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status); + if (opdesc && opdesc->op_release) + opdesc->op_release(&op->u); xdr_commit_encode(xdr); /* nfsd4_check_resp_size guarantees enough room for error status */ @@ -5458,10 +5244,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) so->so_replay.rp_buf, len); } status: - *p = op->status; -release: - if (opdesc && opdesc->op_release) - opdesc->op_release(&op->u); + /* Note that op->status is already in network byte order: */ + write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); } /* @@ -5487,14 +5271,22 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); } +int +nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) +{ + return xdr_ressize_check(rqstp, p); +} + void nfsd4_release_compoundargs(struct svc_rqst *rqstp) { struct nfsd4_compoundargs *args = rqstp->rq_argp; if (args->ops != args->iops) { - vfree(args->ops); + kfree(args->ops); args->ops = args->iops; } + kfree(args->tmpp); + args->tmpp = NULL; while (args->to_free) { struct svcxdr_tmpbuf *tb = args->to_free; args->to_free = tb->next; @@ -5502,44 +5294,57 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) } } -bool -nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs4svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p) +{ + return 1; +} + +int +nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd4_compoundargs *args = rqstp->rq_argp; - /* svcxdr_tmp_alloc */ + if (rqstp->rq_arg.head[0].iov_len % 4) { + /* client is nuts */ + dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)", + __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid)); + return 0; + } + args->p = p; + args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; + args->pagelist = rqstp->rq_arg.pages; + args->pagelen = rqstp->rq_arg.page_len; + args->tail = false; + args->tmpp = NULL; args->to_free = NULL; - - args->xdr = xdr; args->ops = args->iops; args->rqstp = rqstp; - return nfsd4_decode_compound(args); + return !nfsd4_decode_compound(args); } -bool -nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd4_compoundres *resp = rqstp->rq_resp; - __be32 *p; + struct xdr_buf *buf = resp->xdr.buf; - /* - * Send buffer space for the following items is reserved - * at the top of nfsd4_proc_compound(). - */ - p = resp->statusp; + WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + + buf->tail[0].iov_len); - *p++ = resp->cstate.status; + *p = resp->cstate.status; - rqstp->rq_next_page = xdr->page_ptr + 1; + rqstp->rq_next_page = resp->xdr.page_ptr + 1; + p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); nfsd4_sequence_done(resp); - return true; + return 1; } /* diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 2b5417e06d80..80c90fc231a5 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -84,6 +84,12 @@ nfsd_hashsize(unsigned int limit) return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); } +static u32 +nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) +{ + return hash_32(be32_to_cpu(xid), nn->maskbits); +} + static struct svc_cacherep * nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, struct nfsd_net *nn) @@ -115,14 +121,14 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, struct nfsd_net *nn) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { - nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len); + nn->drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } if (rp->c_state != RC_UNUSED) { rb_erase(&rp->c_node, &b->rb_head); list_del(&rp->c_lru); atomic_dec(&nn->num_drc_entries); - nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp)); + nn->drc_mem_usage -= sizeof(*rp); } kmem_cache_free(drc_slab, rp); } @@ -148,16 +154,6 @@ void nfsd_drc_slab_free(void) kmem_cache_destroy(drc_slab); } -static int nfsd_reply_cache_stats_init(struct nfsd_net *nn) -{ - return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); -} - -static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn) -{ - nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); -} - int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; @@ -169,16 +165,12 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) hashsize = nfsd_hashsize(nn->max_drc_entries); nn->maskbits = ilog2(hashsize); - status = nfsd_reply_cache_stats_init(nn); - if (status) - goto out_nomem; - nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; nn->nfsd_reply_cache_shrinker.seeks = 1; status = register_shrinker(&nn->nfsd_reply_cache_shrinker); if (status) - goto out_stats_destroy; + goto out_nomem; nn->drc_hashtbl = kvzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl)), GFP_KERNEL); @@ -194,8 +186,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) return 0; out_shrinker: unregister_shrinker(&nn->nfsd_reply_cache_shrinker); -out_stats_destroy: - nfsd_reply_cache_stats_destroy(nn); out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); return -ENOMEM; @@ -216,7 +206,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) rp, nn); } } - nfsd_reply_cache_stats_destroy(nn); kvfree(nn->drc_hashtbl); nn->drc_hashtbl = NULL; @@ -235,16 +224,8 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) list_move_tail(&rp->c_lru, &b->lru_head); } -static noinline struct nfsd_drc_bucket * -nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn) -{ - unsigned int hash = hash_32((__force u32)xid, nn->maskbits); - - return &nn->drc_hashtbl[hash]; -} - -static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn, - unsigned int max) +static long +prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) { struct svc_cacherep *rp, *tmp; long freed = 0; @@ -260,17 +241,11 @@ static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn, time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(b, rp, nn); - if (max && freed++ > max) - break; + freed++; } return freed; } -static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) -{ - return prune_bucket(b, nn, 3); -} - /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -287,7 +262,7 @@ prune_cache_entries(struct nfsd_net *nn) if (list_empty(&b->lru_head)) continue; spin_lock(&b->cache_lock); - freed += prune_bucket(b, nn, 0); + freed += prune_bucket(b, nn); spin_unlock(&b->cache_lock); } return freed; @@ -349,7 +324,7 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, { if (key->c_key.k_xid == rp->c_key.k_xid && key->c_key.k_csum != rp->c_key.k_csum) { - nfsd_stats_payload_misses_inc(nn); + ++nn->payload_misses; trace_nfsd_drc_mismatch(nn, key, rp); } @@ -421,16 +396,18 @@ out: */ int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct nfsd_net *nn; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp, *found; + __be32 xid = rqstp->rq_xid; __wsum csum; - struct nfsd_drc_bucket *b; + u32 hash = nfsd_cache_hash(xid, nn); + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash]; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; rqstp->rq_cacherep = NULL; if (type == RC_NOCACHE) { - nfsd_stats_rc_nocache_inc(); + nfsdstats.rcnocache++; goto out; } @@ -440,25 +417,27 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp) * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ - nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rp = nfsd_reply_cache_alloc(rqstp, csum, nn); if (!rp) goto out; - b = nfsd_cache_bucket_find(rqstp->rq_xid, nn); spin_lock(&b->cache_lock); found = nfsd_cache_insert(b, rp, nn); - if (found != rp) + if (found != rp) { + nfsd_reply_cache_free_locked(NULL, rp, nn); + rp = found; goto found_entry; + } - nfsd_stats_rc_misses_inc(); + nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; atomic_inc(&nn->num_drc_entries); - nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); + nn->drc_mem_usage += sizeof(*rp); - nfsd_prune_bucket(b, nn); + /* go ahead and prune the cache */ + prune_bucket(b, nn); out_unlock: spin_unlock(&b->cache_lock); @@ -467,10 +446,8 @@ out: found_entry: /* We found a matching entry which is either in progress or done. */ - nfsd_reply_cache_free_locked(NULL, rp, nn); - nfsd_stats_rc_hits_inc(); + nfsdstats.rchits++; rtn = RC_DROPIT; - rp = found; /* Request being processed */ if (rp->c_state == RC_INPROG) @@ -529,6 +506,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; + u32 hash; struct nfsd_drc_bucket *b; int len; size_t bufsize = 0; @@ -536,7 +514,8 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) if (!rp) return; - b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn); + hash = nfsd_cache_hash(rp->c_key.k_xid, nn); + b = &nn->drc_hashtbl[hash]; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; @@ -569,7 +548,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) return; } spin_lock(&b->cache_lock); - nfsd_stats_drc_mem_usage_add(nn, bufsize); + nn->drc_mem_usage += bufsize; lru_put_end(b, rp); rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); rp->c_type = cachetype; @@ -603,26 +582,28 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) * scraping this file for info should test the labels to ensure they're * getting the correct field. */ -int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) +static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - struct nfsd_net *nn = net_generic(file_inode(m->file)->i_sb->s_fs_info, - nfsd_net_id); + struct nfsd_net *nn = m->private; seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "num entries: %u\n", - atomic_read(&nn->num_drc_entries)); + atomic_read(&nn->num_drc_entries)); seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); - seq_printf(m, "mem usage: %lld\n", - percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE])); - seq_printf(m, "cache hits: %lld\n", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS])); - seq_printf(m, "cache misses: %lld\n", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES])); - seq_printf(m, "not cached: %lld\n", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE])); - seq_printf(m, "payload misses: %lld\n", - percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES])); + seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage); + seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); + seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); + seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); + seq_printf(m, "payload misses: %u\n", nn->payload_misses); seq_printf(m, "longest chain len: %u\n", nn->longest_chain); seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); return 0; } + +int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) +{ + struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info, + nfsd_net_id); + + return single_open(file, nfsd_reply_cache_stats_show, nn); +} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 682f5226e79a..7c36634598d3 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,7 +25,6 @@ #include "state.h" #include "netns.h" #include "pnfs.h" -#include "filecache.h" /* * We have a single directory with several nodes in it. @@ -33,7 +32,6 @@ enum { NFSD_Root = 1, NFSD_List, - NFSD_Export_Stats, NFSD_Export_features, NFSD_Fh, NFSD_FO_UnlockIP, @@ -46,7 +44,6 @@ enum { NFSD_Ports, NFSD_MaxBlkSize, NFSD_MaxConnections, - NFSD_Filecache, NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] @@ -185,7 +182,17 @@ static int export_features_show(struct seq_file *m, void *v) return 0; } -DEFINE_SHOW_ATTRIBUTE(export_features); +static int export_features_open(struct inode *inode, struct file *file) +{ + return single_open(file, export_features_show, NULL); +} + +static const struct file_operations export_features_operations = { + .open = export_features_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) static int supported_enctypes_show(struct seq_file *m, void *v) @@ -194,7 +201,17 @@ static int supported_enctypes_show(struct seq_file *m, void *v) return 0; } -DEFINE_SHOW_ATTRIBUTE(supported_enctypes); +static int supported_enctypes_open(struct inode *inode, struct file *file) +{ + return single_open(file, supported_enctypes_show, NULL); +} + +static const struct file_operations supported_enctypes_ops = { + .open = supported_enctypes_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ static const struct file_operations pool_stats_operations = { @@ -204,9 +221,12 @@ static const struct file_operations pool_stats_operations = { .release = nfsd_pool_stats_release, }; -DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats); - -DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats); +static const struct file_operations reply_cache_stats_operations = { + .open = nfsd_reply_cache_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; /*----------------------------------------------------------------------------*/ /* @@ -374,12 +394,12 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) auth_domain_put(dom); if (len) return len; - + mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; - qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size); + qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); mesg[-1] = '\n'; - return mesg - buf; + return mesg - buf; } /* @@ -581,9 +601,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { -#ifdef CONFIG_NFSD_V2 case 2: -#endif case 3: nfsd_vers(nn, num, cmd); break; @@ -603,9 +621,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } break; default: - /* Ignore requests to disable non-existent versions */ - if (cmd == NFSD_SET) - return -EINVAL; + return -EINVAL; } vers += len + 1; } while ((len = qword_get(&mesg, vers, size)) > 0); @@ -616,6 +632,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } /* Now write current state into reply buffer */ + len = 0; sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { @@ -709,25 +726,28 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred char *mesg = buf; int fd, err; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct svc_serv *serv; err = get_int(&mesg, &fd); if (err != 0 || fd < 0) return -EINVAL; + if (svc_alien_sock(net, fd)) { + printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); + return -EINVAL; + } + err = nfsd_create_serv(net); if (err != 0) return err; - serv = nn->nfsd_serv; - err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); + err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); + if (err < 0) { + nfsd_destroy(net); + return err; + } - if (err < 0 && !serv->sv_nrthreads && !nn->keep_active) - nfsd_last_thread(net); - else if (err >= 0 && !serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) - svc_get(serv); - - svc_put(serv); + /* Decrease the count, but don't shut down the service */ + nn->nfsd_serv->sv_nrthreads--; return err; } @@ -741,7 +761,6 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr struct svc_xprt *xprt; int port, err; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct svc_serv *serv; if (sscanf(buf, "%15s %5u", transport, &port) != 2) return -EINVAL; @@ -753,33 +772,30 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr if (err != 0) return err; - serv = nn->nfsd_serv; - err = svc_xprt_create(serv, transport, net, - PF_INET, port, SVC_SOCK_ANONYMOUS, cred); + err = svc_create_xprt(nn->nfsd_serv, transport, net, + PF_INET, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0) goto out_err; - err = svc_xprt_create(serv, transport, net, - PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); + err = svc_create_xprt(nn->nfsd_serv, transport, net, + PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; - if (!serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) - svc_get(serv); - - svc_put(serv); + /* Decrease the count, but don't shut down the service */ + nn->nfsd_serv->sv_nrthreads--; return 0; out_close: - xprt = svc_find_xprt(serv, transport, net, PF_INET, port); + xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); if (xprt != NULL) { - svc_xprt_close(xprt); + svc_close_xprt(xprt); svc_xprt_put(xprt); } out_err: - if (!serv->sv_nrthreads && !nn->keep_active) - nfsd_last_thread(net); - - svc_put(serv); + if (!list_empty(&nn->nfsd_serv->sv_permsocks)) + nn->nfsd_serv->sv_nrthreads--; + else + nfsd_destroy(net); return err; } @@ -1152,7 +1168,6 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); - break; default: break; } @@ -1254,8 +1269,7 @@ static void nfsdfs_remove_files(struct dentry *root) /* XXX: cut'n'paste from simple_fill_super; figure out if we could share * code instead. */ static int nfsdfs_create_files(struct dentry *root, - const struct tree_descr *files, - struct dentry **fdentries) + const struct tree_descr *files) { struct inode *dir = d_inode(root); struct inode *inode; @@ -1264,6 +1278,8 @@ static int nfsdfs_create_files(struct dentry *root, inode_lock(dir); for (i = 0; files->name && files->name[0]; i++, files++) { + if (!files->name) + continue; dentry = d_alloc_name(root, files->name); if (!dentry) goto out; @@ -1277,8 +1293,6 @@ static int nfsdfs_create_files(struct dentry *root, inode->i_private = __get_nfsdfs_client(dir); d_add(dentry, inode); fsnotify_create(dir, dentry); - if (fdentries) - fdentries[i] = dentry; } inode_unlock(dir); return 0; @@ -1290,9 +1304,8 @@ out: /* on success, returns positive number unique to that client. */ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, - struct nfsdfs_client *ncl, u32 id, - const struct tree_descr *files, - struct dentry **fdentries) + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *files) { struct dentry *dentry; char name[11]; @@ -1303,7 +1316,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); if (IS_ERR(dentry)) /* XXX: tossing errors? */ return NULL; - ret = nfsdfs_create_files(dentry, files, fdentries); + ret = nfsdfs_create_files(dentry, files); if (ret) { nfsd_client_rmdir(dentry); return NULL; @@ -1339,10 +1352,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, - /* Per-export io stats use same ops as exports file */ - [NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", - &export_features_fops, S_IRUGO}, + &export_features_operations, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_FO_UnlockFS] = {"unlock_filesystem", @@ -1351,16 +1362,13 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, - [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", - &nfsd_reply_cache_stats_fops, S_IRUGO}, + [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, - [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) - [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", - &supported_enctypes_fops, S_IRUGO}, + [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, @@ -1460,16 +1468,25 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - nfsd4_init_leases_net(nn); retval = nfsd_reply_cache_init(nn); if (retval) - goto out_cache_error; - get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); - seqlock_init(&nn->writeverf_lock); + goto out_drc_error; + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; + nn->somebody_reclaimed = false; + nn->track_reclaim_completes = false; + nn->clverifier_counter = prandom_u32(); + nn->clientid_base = prandom_u32(); + nn->clientid_counter = nn->clientid_base + 1; + nn->s2s_cp_cl_id = nn->clientid_counter++; + + atomic_set(&nn->ntf_refcnt, 0); + init_waitqueue_head(&nn->ntf_wq); + seqlock_init(&nn->boot_lock); return 0; -out_cache_error: +out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); @@ -1497,6 +1514,7 @@ static struct pernet_operations nfsd_net_ops = { static int __init init_nfsd(void) { int retval; + printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); retval = nfsd4_init_slabs(); if (retval) @@ -1504,9 +1522,7 @@ static int __init init_nfsd(void) retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; - retval = nfsd_stat_init(); /* Statistics */ - if (retval) - goto out_free_pnfs; + nfsd_stat_init(); /* Statistics */ retval = nfsd_drc_slab_create(); if (retval) goto out_free_stat; @@ -1514,25 +1530,20 @@ static int __init init_nfsd(void) retval = create_proc_exports_entry(); if (retval) goto out_free_lockd; + retval = register_filesystem(&nfsd_fs_type); + if (retval) + goto out_free_exports; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_exports; + goto out_free_filesystem; retval = register_cld_notifier(); - if (retval) - goto out_free_subsys; - retval = nfsd4_create_laundry_wq(); - if (retval) - goto out_free_cld; - retval = register_filesystem(&nfsd_fs_type); if (retval) goto out_free_all; return 0; out_free_all: - nfsd4_destroy_laundry_wq(); -out_free_cld: - unregister_cld_notifier(); -out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); +out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); @@ -1541,7 +1552,6 @@ out_free_lockd: nfsd_drc_slab_free(); out_free_stat: nfsd_stat_shutdown(); -out_free_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); @@ -1550,8 +1560,6 @@ out_free_slabs: static void __exit exit_nfsd(void) { - unregister_filesystem(&nfsd_fs_type); - nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); @@ -1561,6 +1569,7 @@ static void __exit exit_nfsd(void) nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); + unregister_filesystem(&nfsd_fs_type); } MODULE_AUTHOR("Olaf Kirch "); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 013bfa24ced2..4362d295ed34 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -24,8 +24,8 @@ #include #include "netns.h" -#include "export.h" #include "stats.h" +#include "export.h" #undef ifdebug #ifdef CONFIG_SUNRPC_DEBUG @@ -64,7 +64,8 @@ struct readdir_cd { extern struct svc_program nfsd_program; -extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; +extern const struct svc_version nfsd_version2, nfsd_version3, + nfsd_version4; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; @@ -72,16 +73,6 @@ extern unsigned long nfsd_drc_mem_used; extern const struct seq_operations nfs_exports_op; -/* - * Common void argument and result helpers - */ -struct nfsd_voidargs { }; -struct nfsd_voidres { }; -bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, - struct xdr_stream *xdr); -bool nfssvc_encode_voidres(struct svc_rqst *rqstp, - struct xdr_stream *xdr); - /* * Function prototypes. */ @@ -96,6 +87,8 @@ int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_shutdown_threads(struct net *net); +void nfsd_destroy(struct net *net); + bool i_am_nfsd(void); struct nfsdfs_client { @@ -105,9 +98,7 @@ struct nfsdfs_client { struct nfsdfs_client *get_nfsdfs_client(struct inode *); struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, - struct nfsdfs_client *ncl, u32 id, - const struct tree_descr *, - struct dentry **fdentries); + struct nfsdfs_client *ncl, u32 id, const struct tree_descr *); void nfsd_client_rmdir(struct dentry *dentry); @@ -131,7 +122,6 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change); int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change); void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); -void nfsd_last_thread(struct net *net); extern int nfsd_max_blksize; @@ -160,9 +150,6 @@ void nfs4_state_shutdown_net(struct net *net); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); bool nfsd4_spo_must_allow(struct svc_rqst *rqstp); -int nfsd4_create_laundry_wq(void); -void nfsd4_destroy_laundry_wq(void); -bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode); #else static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } @@ -176,13 +163,6 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) { return false; } -static inline int nfsd4_create_laundry_wq(void) { return 0; }; -static inline void nfsd4_destroy_laundry_wq(void) {}; -static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, - struct inode *inode) -{ - return false; -} #endif /* @@ -344,10 +324,6 @@ void nfsd_lockd_shutdown(void); #define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */ #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ -#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ -#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 -#define NFS4_CLIENTS_PER_GB 1024 -#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */ /* * The following attributes are currently not supported by the NFSv4 server: @@ -376,7 +352,7 @@ void nfsd_lockd_shutdown(void); | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \ | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \ | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \ - | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_CREATE \ + | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \ | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID) #define NFSD4_SUPPORTED_ATTRS_WORD2 0 @@ -410,6 +386,7 @@ void nfsd_lockd_shutdown(void); #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ + FATTR4_WORD2_CHANGE_ATTR_TYPE | \ FATTR4_WORD2_MODE_UMASK | \ NFSD4_2_SECURITY_ATTRS | \ FATTR4_WORD2_XATTR_SUPPORT) @@ -472,8 +449,7 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \ - | FATTR4_WORD1_TIME_MODIFY_SET) + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #define MAYBE_FATTR4_WORD2_SECURITY_LABEL \ FATTR4_WORD2_SECURITY_LABEL @@ -499,20 +475,12 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) extern int nfsd4_is_junction(struct dentry *dentry); extern int register_cld_notifier(void); extern void unregister_cld_notifier(void); -#ifdef CONFIG_NFSD_V4_2_INTER_SSC -extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); -#endif - -extern void nfsd4_init_leases_net(struct nfsd_net *nn); - #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) { return 0; } -static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; - #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index db8d62632a5b..c81dbbad8792 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -153,12 +153,11 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; - struct fid *fid = NULL; + struct fid *fid = NULL, sfid; struct svc_export *exp; struct dentry *dentry; int fileid_type; int data_left = fh->fh_size/4; - int len; __be32 error; error = nfserr_stale; @@ -167,35 +166,48 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers == 4 && fh->fh_size == 0) return nfserr_nofilehandle; - if (fh->fh_version != 1) - return error; + if (fh->fh_version == 1) { + int len; - if (--data_left < 0) - return error; - if (fh->fh_auth_type != 0) - return error; - len = key_len(fh->fh_fsid_type) / 4; - if (len == 0) - return error; - if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { - /* deprecated, convert to type 3 */ - len = key_len(FSID_ENCODE_DEV)/4; - fh->fh_fsid_type = FSID_ENCODE_DEV; - /* - * struct knfsd_fh uses host-endian fields, which are - * sometimes used to hold net-endian values. This - * confuses sparse, so we must use __force here to - * keep it from complaining. - */ - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), - ntohl((__force __be32)fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + /* + * struct knfsd_fh uses host-endian fields, which are + * sometimes used to hold net-endian values. This + * confuses sparse, so we must use __force here to + * keep it from complaining. + */ + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), + ntohl((__force __be32)fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; + } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); + } else { + __u32 tfh[2]; + dev_t xdev; + ino_t xino; + + if (fh->fh_size != NFS_FHSIZE) + return error; + /* assume old filehandle format */ + xdev = old_decode_dev(fh->ofh_xdev); + xino = u32_to_ino_t(fh->ofh_xino); + mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); + exp = rqst_exp_find(rqstp, FSID_DEV, tfh); } - data_left -= len; - if (data_left < 0) - return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); - fid = (struct fid *)(fh->fh_fsid + len); error = nfserr_stale; if (IS_ERR(exp)) { @@ -240,25 +252,28 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers > 2) error = nfserr_badhandle; - fileid_type = fh->fh_fileid_type; + if (fh->fh_version != 1) { + sfid.i32.ino = fh->ofh_ino; + sfid.i32.gen = fh->ofh_generation; + sfid.i32.parent_ino = fh->ofh_dirino; + fid = &sfid; + data_left = 3; + if (fh->ofh_dirino == 0) + fileid_type = FILEID_INO32_GEN; + else + fileid_type = FILEID_INO32_GEN_PARENT; + } else + fileid_type = fh->fh_fileid_type; if (fileid_type == FILEID_ROOT) dentry = dget(exp->ex_path.dentry); else { - dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid, - data_left, fileid_type, - nfsd_acceptable, exp); - if (IS_ERR_OR_NULL(dentry)) { + dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, + data_left, fileid_type, + nfsd_acceptable, exp); + if (IS_ERR_OR_NULL(dentry)) trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, dentry ? PTR_ERR(dentry) : -ESTALE); - switch (PTR_ERR(dentry)) { - case -ENOMEM: - case -ETIMEDOUT: - break; - default: - dentry = ERR_PTR(-ESTALE); - } - } } if (dentry == NULL) goto out; @@ -276,20 +291,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_dentry = dentry; fhp->fh_export = exp; - - switch (rqstp->rq_vers) { - case 4: - if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR) - fhp->fh_no_atomic_attr = true; - break; - case 3: - if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC) - fhp->fh_no_wcc = true; - break; - case 2: - fhp->fh_no_wcc = true; - } - return 0; out: exp_put(exp); @@ -326,7 +327,7 @@ out: __be32 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) { - struct svc_export *exp = NULL; + struct svc_export *exp; struct dentry *dentry; __be32 error; @@ -399,7 +400,7 @@ skip_pseudoflavor_check: } out: if (error == nfserr_stale) - nfsd_stats_fh_stale_inc(exp); + nfsdstats.fh_stale++; return error; } @@ -428,6 +429,20 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, } } +/* + * for composing old style file handles + */ +static inline void _fh_update_old(struct dentry *dentry, + struct svc_export *exp, + struct knfsd_fh *fh) +{ + fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino); + fh->ofh_generation = d_inode(dentry)->i_generation; + if (d_is_dir(dentry) || + (exp->ex_flags & NFSEXP_NOSUBTREECHECK)) + fh->ofh_dirino = 0; +} + static bool is_root_export(struct svc_export *exp) { return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; @@ -524,6 +539,9 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, /* ref_fh is a reference file handle. * if it is non-null and for the same filesystem, then we should compose * a filehandle which is of the same version, where possible. + * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca + * Then create a 32byte filehandle using nfs_fhbase_old + * */ struct inode * inode = d_inode(dentry); @@ -541,13 +559,10 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ set_version_and_fsid_type(fhp, exp, ref_fh); - /* If we have a ref_fh, then copy the fh_no_wcc setting from it. */ - fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false; - if (ref_fh == fhp) fh_put(ref_fh); - if (fhp->fh_dentry) { + if (fhp->fh_locked || fhp->fh_dentry) { printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", dentry); } @@ -559,21 +574,35 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp_get(exp); - fhp->fh_handle.fh_size = - key_len(fhp->fh_handle.fh_fsid_type) + 4; - fhp->fh_handle.fh_auth_type = 0; + if (fhp->fh_handle.fh_version == 0xca) { + /* old style filehandle please */ + memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); + fhp->fh_handle.fh_size = NFS_FHSIZE; + fhp->fh_handle.ofh_dcookie = 0xfeebbaca; + fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); + fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; + fhp->fh_handle.ofh_xino = + ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino); + fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); + if (inode) + _fh_update_old(dentry, exp, &fhp->fh_handle); + } else { + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; + fhp->fh_handle.fh_auth_type = 0; - mk_fsid(fhp->fh_handle.fh_fsid_type, - fhp->fh_handle.fh_fsid, - ex_dev, - d_inode(exp->ex_path.dentry)->i_ino, - exp->ex_fsid, exp->ex_uuid); + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, + d_inode(exp->ex_path.dentry)->i_ino, + exp->ex_fsid, exp->ex_uuid); - if (inode) - _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { - fh_put(fhp); - return nfserr_opnotsupp; + if (inode) + _fh_update(fhp, exp, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { + fh_put(fhp); + return nfserr_opnotsupp; + } } return 0; @@ -594,12 +623,16 @@ fh_update(struct svc_fh *fhp) dentry = fhp->fh_dentry; if (d_really_is_negative(dentry)) goto out_negative; - if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - return 0; + if (fhp->fh_handle.fh_version != 1) { + _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); + } else { + if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) + return 0; - _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) - return nfserr_opnotsupp; + _fh_update(fhp, fhp->fh_export, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) + return nfserr_opnotsupp; + } return 0; out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); @@ -610,85 +643,6 @@ out_negative: return nfserr_serverfault; } -/** - * fh_fill_pre_attrs - Fill in pre-op attributes - * @fhp: file handle to be updated - * - */ -void fh_fill_pre_attrs(struct svc_fh *fhp) -{ - bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode; - struct kstat stat; - __be32 err; - - if (fhp->fh_no_wcc || fhp->fh_pre_saved) - return; - - inode = d_inode(fhp->fh_dentry); - err = fh_getattr(fhp, &stat); - if (err) { - /* Grab the times from inode anyway */ - stat.mtime = inode->i_mtime; - stat.ctime = inode->i_ctime; - stat.size = inode->i_size; - } - if (v4) - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); - - fhp->fh_pre_mtime = stat.mtime; - fhp->fh_pre_ctime = stat.ctime; - fhp->fh_pre_size = stat.size; - fhp->fh_pre_saved = true; -} - -/** - * fh_fill_post_attrs - Fill in post-op attributes - * @fhp: file handle to be updated - * - */ -void fh_fill_post_attrs(struct svc_fh *fhp) -{ - bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode = d_inode(fhp->fh_dentry); - __be32 err; - - if (fhp->fh_no_wcc) - return; - - if (fhp->fh_post_saved) - printk("nfsd: inode locked twice during operation.\n"); - - err = fh_getattr(fhp, &fhp->fh_post_attr); - if (err) { - fhp->fh_post_saved = false; - fhp->fh_post_attr.ctime = inode->i_ctime; - } else - fhp->fh_post_saved = true; - if (v4) - fhp->fh_post_change = - nfsd4_change_attribute(&fhp->fh_post_attr, inode); -} - -/** - * fh_fill_both_attrs - Fill pre-op and post-op attributes - * @fhp: file handle to be updated - * - * This is used when the directory wasn't changed, but wcc attributes - * are needed anyway. - */ -void fh_fill_both_attrs(struct svc_fh *fhp) -{ - fh_fill_post_attrs(fhp); - if (!fhp->fh_post_saved) - return; - fhp->fh_pre_change = fhp->fh_post_change; - fhp->fh_pre_mtime = fhp->fh_post_attr.mtime; - fhp->fh_pre_ctime = fhp->fh_post_attr.ctime; - fhp->fh_pre_size = fhp->fh_post_attr.size; - fhp->fh_pre_saved = true; -} - /* * Release a file handle. */ @@ -698,16 +652,16 @@ fh_put(struct svc_fh *fhp) struct dentry * dentry = fhp->fh_dentry; struct svc_export * exp = fhp->fh_export; if (dentry) { + fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); - fh_clear_pre_post_attrs(fhp); + fh_clear_wcc(fhp); } fh_drop_write(fhp); if (exp) { exp_put(exp); fhp->fh_export = NULL; } - fhp->fh_no_wcc = false; return; } @@ -717,15 +671,20 @@ fh_put(struct svc_fh *fhp) char * SVCFH_fmt(struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; - static char buf[2+1+1+64*3+1]; - if (fh->fh_size < 0 || fh->fh_size> 64) - return "bad-fh"; - sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw); + static char buf[80]; + sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x", + fh->fh_size, + fh->fh_base.fh_pad[0], + fh->fh_base.fh_pad[1], + fh->fh_base.fh_pad[2], + fh->fh_base.fh_pad[3], + fh->fh_base.fh_pad[4], + fh->fh_base.fh_pad[5]); return buf; } -enum fsid_source fsid_source(const struct svc_fh *fhp) +enum fsid_source fsid_source(struct svc_fh *fhp) { if (fhp->fh_handle.fh_version != 1) return FSIDSOURCE_DEV; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 513e028b0bbe..56cfbc361561 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -10,56 +10,8 @@ #include #include +#include #include -#include -#include - -/* - * The file handle starts with a sequence of four-byte words. - * The first word contains a version number (1) and three descriptor bytes - * that tell how the remaining 3 variable length fields should be handled. - * These three bytes are auth_type, fsid_type and fileid_type. - * - * All four-byte values are in host-byte-order. - * - * The auth_type field is deprecated and must be set to 0. - * - * The fsid_type identifies how the filesystem (or export point) is - * encoded. - * Current values: - * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number - * NOTE: we cannot use the kdev_t device id value, because kdev_t.h - * says we mustn't. We must break it up and reassemble. - * 1 - 4 byte user specified identifier - * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED - * 3 - 4 byte device id, encoded for user-space, 4 byte inode number - * 4 - 4 byte inode number and 4 byte uuid - * 5 - 8 byte uuid - * 6 - 16 byte uuid - * 7 - 8 byte inode number and 16 byte uuid - * - * The fileid_type identifies how the file within the filesystem is encoded. - * The values for this field are filesystem specific, exccept that - * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' - * in include/linux/exportfs.h for currently registered values. - */ - -struct knfsd_fh { - unsigned int fh_size; /* - * Points to the current size while - * building a new file handle. - */ - union { - char fh_raw[NFS4_FHSIZE]; - struct { - u8 fh_version; /* == 1 */ - u8 fh_auth_type; /* deprecated */ - u8 fh_fsid_type; - u8 fh_fileid_type; - u32 fh_fsid[]; /* flexible-array member */ - }; - }; -}; static inline __u32 ino_t_to_u32(ino_t ino) { @@ -81,18 +33,14 @@ typedef struct svc_fh { struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ + bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ - bool fh_no_wcc; /* no wcc data needed */ - bool fh_no_atomic_attr; - /* - * wcc data is not atomic with - * operation - */ int fh_flags; /* FH flags */ +#ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ - /* Pre-op attributes saved when inode is locked */ + /* Pre-op attributes saved during fh_lock */ __u64 fh_pre_size; /* size before operation */ struct timespec64 fh_pre_mtime; /* mtime before oper */ struct timespec64 fh_pre_ctime; /* ctime before oper */ @@ -102,9 +50,11 @@ typedef struct svc_fh { */ u64 fh_pre_change; - /* Post-op attributes saved in fh_fill_post_attrs() */ + /* Post-op attributes saved in fh_unlock */ struct kstat fh_post_attr; /* full attrs after operation */ u64 fh_post_change; /* nfsv4 change; see above */ +#endif /* CONFIG_NFSD_V3 */ + } svc_fh; #define NFSD4_FH_FOREIGN (1<<0) #define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f)) @@ -126,7 +76,7 @@ enum fsid_source { FSIDSOURCE_FSID, FSIDSOURCE_UUID, }; -extern enum fsid_source fsid_source(const struct svc_fh *fhp); +extern enum fsid_source fsid_source(struct svc_fh *fhp); /* @@ -220,19 +170,19 @@ __be32 fh_update(struct svc_fh *); void fh_put(struct svc_fh *); static __inline__ struct svc_fh * -fh_copy(struct svc_fh *dst, const struct svc_fh *src) +fh_copy(struct svc_fh *dst, struct svc_fh *src) { - WARN_ON(src->fh_dentry); - + WARN_ON(src->fh_dentry || src->fh_locked); + *dst = *src; return dst; } static inline void -fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src) +fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) { dst->fh_size = src->fh_size; - memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); + memcpy(&dst->fh_base, &src->fh_base, src->fh_size); } static __inline__ struct svc_fh * @@ -243,18 +193,16 @@ fh_init(struct svc_fh *fhp, int maxsize) return fhp; } -static inline bool fh_match(const struct knfsd_fh *fh1, - const struct knfsd_fh *fh2) +static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; - if (memcmp(fh1->fh_raw, fh2->fh_raw, fh1->fh_size) != 0) + if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0) return false; return true; } -static inline bool fh_fsid_match(const struct knfsd_fh *fh1, - const struct knfsd_fh *fh2) +static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) { if (fh1->fh_fsid_type != fh2->fh_fsid_type) return false; @@ -271,23 +219,27 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1, * returns a crc32 hash for the filehandle that is compatible with * the one displayed by "wireshark". */ -static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) + +static inline u32 +knfsd_fh_hash(struct knfsd_fh *fh) { - return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); + return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size); } #else -static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) +static inline u32 +knfsd_fh_hash(struct knfsd_fh *fh) { return 0; } #endif -/** - * fh_clear_pre_post_attrs - Reset pre/post attributes - * @fhp: file handle to be updated - * +#ifdef CONFIG_NFSD_V3 +/* + * The wcc data stored in current_fh should be cleared + * between compound ops. */ -static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) +static inline void +fh_clear_wcc(struct svc_fh *fhp) { fhp->fh_post_saved = false; fhp->fh_pre_saved = false; @@ -307,21 +259,68 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) static inline u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode) { - if (inode->i_sb->s_export_op->fetch_iversion) - return inode->i_sb->s_export_op->fetch_iversion(inode); - else if (IS_I_VERSION(inode)) { - u64 chattr; + u64 chattr; - chattr = stat->ctime.tv_sec; - chattr <<= 30; - chattr += stat->ctime.tv_nsec; - chattr += inode_query_iversion(inode); - return chattr; - } else - return time_to_chattr(&stat->ctime); + chattr = stat->ctime.tv_sec; + chattr <<= 30; + chattr += stat->ctime.tv_nsec; + chattr += inode_query_iversion(inode); + return chattr; +} + +extern void fill_pre_wcc(struct svc_fh *fhp); +extern void fill_post_wcc(struct svc_fh *fhp); +#else +#define fh_clear_wcc(ignored) +#define fill_pre_wcc(ignored) +#define fill_post_wcc(notused) +#endif /* CONFIG_NFSD_V3 */ + + +/* + * Lock a file handle/inode + * NOTE: both fh_lock and fh_unlock are done "by hand" in + * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once + * so, any changes here should be reflected there. + */ + +static inline void +fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) +{ + struct dentry *dentry = fhp->fh_dentry; + struct inode *inode; + + BUG_ON(!dentry); + + if (fhp->fh_locked) { + printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", + dentry); + return; + } + + inode = d_inode(dentry); + inode_lock_nested(inode, subclass); + fill_pre_wcc(fhp); + fhp->fh_locked = true; +} + +static inline void +fh_lock(struct svc_fh *fhp) +{ + fh_lock_nested(fhp, I_MUTEX_NORMAL); +} + +/* + * Unlock a file handle/inode + */ +static inline void +fh_unlock(struct svc_fh *fhp) +{ + if (fhp->fh_locked) { + fill_post_wcc(fhp); + inode_unlock(d_inode(fhp->fh_dentry)); + fhp->fh_locked = false; + } } -extern void fh_fill_pre_attrs(struct svc_fh *fhp); -extern void fh_fill_post_attrs(struct svc_fh *fhp); -extern void fh_fill_both_attrs(struct svc_fh *fhp); #endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 96426dea7d41..bbd01e8397f6 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -51,9 +51,6 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) struct nfsd_sattrargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; struct iattr *iap = &argp->attrs; - struct nfsd_attrs attrs = { - .na_iattr = iap, - }; struct svc_fh *fhp; dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", @@ -103,7 +100,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0); + resp->status = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0); if (resp->status != nfs_ok) goto out; @@ -152,16 +149,14 @@ out: static __be32 nfsd_proc_readlink(struct svc_rqst *rqstp) { - struct nfsd_fhandle *argp = rqstp->rq_argp; + struct nfsd_readlinkargs *argp = rqstp->rq_argp; struct nfsd_readlinkres *resp = rqstp->rq_resp; dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh)); /* Read the symlink. */ resp->len = NFS_MAXPATHLEN; - resp->page = *(rqstp->rq_next_page++); - resp->status = nfsd_readlink(rqstp, &argp->fh, - page_address(resp->page), &resp->len); + resp->status = nfsd_readlink(rqstp, &argp->fh, argp->buffer, &resp->len); fh_put(&argp->fh); return rpc_success; @@ -176,42 +171,36 @@ nfsd_proc_read(struct svc_rqst *rqstp) { struct nfsd_readargs *argp = rqstp->rq_argp; struct nfsd_readres *resp = rqstp->rq_resp; - unsigned int len; u32 eof; - int v; dprintk("nfsd: READ %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, argp->offset); - argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); - argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); - - v = 0; - len = argp->count; - resp->pages = rqstp->rq_next_page; - while (len > 0) { - struct page *page = *(rqstp->rq_next_page++); - - rqstp->rq_vec[v].iov_base = page_address(page); - rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); - len -= rqstp->rq_vec[v].iov_len; - v++; - } - /* Obtain buffer pointer for payload. 19 is 1 word for * status, 17 words for fattr, and 1 word for the byte count. */ + + if (NFSSVC_MAXBLKSIZE_V2 < argp->count) { + char buf[RPC_MAX_ADDRBUFLEN]; + printk(KERN_NOTICE + "oversized read request from %s (%d bytes)\n", + svc_print_addr(rqstp, buf, sizeof(buf)), + argp->count); + argp->count = NFSSVC_MAXBLKSIZE_V2; + } svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); resp->count = argp->count; - fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, v, &resp->count, &eof); + resp->status = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), + argp->offset, + rqstp->rq_vec, argp->vlen, + &resp->count, + &eof); if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - set_bit(RQ_DROPME, &rqstp->rq_flags); + return rpc_drop_reply; return rpc_success; } @@ -238,7 +227,12 @@ nfsd_proc_write(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, &argp->payload); + nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, + &argp->first, cnt); + if (!nvecs) { + resp->status = nfserr_io; + goto out; + } resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, rqstp->rq_vec, nvecs, @@ -246,7 +240,8 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - set_bit(RQ_DROPME, &rqstp->rq_flags); + return rpc_drop_reply; +out: return rpc_success; } @@ -264,9 +259,6 @@ nfsd_proc_create(struct svc_rqst *rqstp) svc_fh *dirfhp = &argp->fh; svc_fh *newfhp = &resp->fh; struct iattr *attr = &argp->attrs; - struct nfsd_attrs attrs = { - .na_iattr = attr, - }; struct inode *inode; struct dentry *dchild; int type, mode; @@ -292,7 +284,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) goto done; } - inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT); + fh_lock_nested(dirfhp, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { resp->status = nfserrno(PTR_ERR(dchild)); @@ -391,8 +383,9 @@ nfsd_proc_create(struct svc_rqst *rqstp) resp->status = nfs_ok; if (!inode) { /* File doesn't exist. Create it and set attrs */ - resp->status = nfsd_create_locked(rqstp, dirfhp, &attrs, type, - rdev, newfhp); + resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name, + argp->len, attr, type, rdev, + newfhp); } else if (type == S_IFREG) { dprintk("nfsd: existing %s, valid=%x, size=%ld\n", argp->name, attr->ia_valid, (long) attr->ia_size); @@ -402,12 +395,13 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0, + resp->status = nfsd_setattr(rqstp, newfhp, attr, 0, (time64_t)0); } out_unlock: - inode_unlock(dirfhp->fh_dentry->d_inode); + /* We don't really need to unlock, as fh_put does it. */ + fh_unlock(dirfhp); fh_drop_write(dirfhp); done: fh_put(dirfhp); @@ -477,9 +471,6 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) { struct nfsd_symlinkargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - struct nfsd_attrs attrs = { - .na_iattr = &argp->attrs, - }; struct svc_fh newfh; if (argp->tlen > NFS_MAXPATHLEN) { @@ -501,7 +492,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) fh_init(&newfh, NFS_FHSIZE); resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, &attrs, &newfh); + argp->tname, &newfh); kfree(argp->tname); fh_put(&argp->ffh); @@ -519,9 +510,6 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd_createargs *argp = rqstp->rq_argp; struct nfsd_diropres *resp = rqstp->rq_resp; - struct nfsd_attrs attrs = { - .na_iattr = &argp->attrs, - }; dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); @@ -533,7 +521,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) argp->attrs.ia_valid &= ~ATTR_SIZE; fh_init(&resp->fh, NFS_FHSIZE); resp->status = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, - &attrs, S_IFDIR, 0, &resp->fh); + &argp->attrs, S_IFDIR, 0, &resp->fh); fh_put(&argp->fh); if (resp->status != nfs_ok) goto out; @@ -560,24 +548,6 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) return rpc_success; } -static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, - struct nfsd_readdirres *resp, - u32 count) -{ - struct xdr_buf *buf = &resp->dirlist; - struct xdr_stream *xdr = &resp->xdr; - - memset(buf, 0, sizeof(*buf)); - - /* Reserve room for the NULL ptr & eof flag (-2 words) */ - buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE); - buf->buflen -= XDR_UNIT * 2; - buf->pages = rqstp->rq_next_page; - rqstp->rq_next_page++; - - xdr_init_encode_pages(xdr, buf, buf->pages, NULL); -} - /* * Read a portion of a directory. */ @@ -586,20 +556,33 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) { struct nfsd_readdirargs *argp = rqstp->rq_argp; struct nfsd_readdirres *resp = rqstp->rq_resp; + int count; loff_t offset; dprintk("nfsd: READDIR %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, argp->cookie); - nfsd_init_dirlist_pages(rqstp, resp, argp->count); + /* Shrink to the client read size */ + count = (argp->count >> 2) - 2; + /* Make sure we've room for the NULL ptr & eof flag */ + count -= 2; + if (count < 0) + count = 0; + + resp->buffer = argp->buffer; + resp->offset = NULL; + resp->buflen = count; resp->common.err = nfs_ok; - resp->cookie_offset = 0; + /* Read directory and encode entries on the fly */ offset = argp->cookie; resp->status = nfsd_readdir(rqstp, &argp->fh, &offset, &resp->common, nfssvc_encode_entry); - nfssvc_encode_nfscookie(resp, offset); + + resp->count = resp->buffer - argp->buffer; + if (resp->offset) + *resp->offset = htonl(offset); fh_put(&argp->fh); return rpc_success; @@ -626,6 +609,7 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) * NFSv2 Server procedures. * Only the results of non-idempotent operations are cached. */ +struct nfsd_void { int dummy; }; #define ST 1 /* status */ #define FH 8 /* filehandle */ @@ -634,49 +618,41 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_NULL] = { .pc_func = nfsd_proc_null, - .pc_decode = nfssvc_decode_voidarg, - .pc_encode = nfssvc_encode_voidres, - .pc_argsize = sizeof(struct nfsd_voidargs), - .pc_argzero = sizeof(struct nfsd_voidargs), - .pc_ressize = sizeof(struct nfsd_voidres), + .pc_decode = nfssvc_decode_void, + .pc_encode = nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, - .pc_name = "NULL", }, [NFSPROC_GETATTR] = { .pc_func = nfsd_proc_getattr, - .pc_decode = nfssvc_decode_fhandleargs, - .pc_encode = nfssvc_encode_attrstatres, + .pc_decode = nfssvc_decode_fhandle, + .pc_encode = nfssvc_encode_attrstat, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), - .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, - .pc_name = "GETATTR", }, [NFSPROC_SETATTR] = { .pc_func = nfsd_proc_setattr, .pc_decode = nfssvc_decode_sattrargs, - .pc_encode = nfssvc_encode_attrstatres, + .pc_encode = nfssvc_encode_attrstat, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_sattrargs), - .pc_argzero = sizeof(struct nfsd_sattrargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+AT, - .pc_name = "SETATTR", }, [NFSPROC_ROOT] = { .pc_func = nfsd_proc_root, - .pc_decode = nfssvc_decode_voidarg, - .pc_encode = nfssvc_encode_voidres, - .pc_argsize = sizeof(struct nfsd_voidargs), - .pc_argzero = sizeof(struct nfsd_voidargs), - .pc_ressize = sizeof(struct nfsd_voidres), + .pc_decode = nfssvc_decode_void, + .pc_encode = nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, - .pc_name = "ROOT", }, [NFSPROC_LOOKUP] = { .pc_func = nfsd_proc_lookup, @@ -684,22 +660,18 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_diropargs), - .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+FH+AT, - .pc_name = "LOOKUP", }, [NFSPROC_READLINK] = { .pc_func = nfsd_proc_readlink, - .pc_decode = nfssvc_decode_fhandleargs, + .pc_decode = nfssvc_decode_readlinkargs, .pc_encode = nfssvc_encode_readlinkres, - .pc_argsize = sizeof(struct nfsd_fhandle), - .pc_argzero = sizeof(struct nfsd_fhandle), + .pc_argsize = sizeof(struct nfsd_readlinkargs), .pc_ressize = sizeof(struct nfsd_readlinkres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4, - .pc_name = "READLINK", }, [NFSPROC_READ] = { .pc_func = nfsd_proc_read, @@ -707,34 +679,28 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_readres, .pc_release = nfssvc_release_readres, .pc_argsize = sizeof(struct nfsd_readargs), - .pc_argzero = sizeof(struct nfsd_readargs), .pc_ressize = sizeof(struct nfsd_readres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, - .pc_name = "READ", }, [NFSPROC_WRITECACHE] = { .pc_func = nfsd_proc_writecache, - .pc_decode = nfssvc_decode_voidarg, - .pc_encode = nfssvc_encode_voidres, - .pc_argsize = sizeof(struct nfsd_voidargs), - .pc_argzero = sizeof(struct nfsd_voidargs), - .pc_ressize = sizeof(struct nfsd_voidres), + .pc_decode = nfssvc_decode_void, + .pc_encode = nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 0, - .pc_name = "WRITECACHE", }, [NFSPROC_WRITE] = { .pc_func = nfsd_proc_write, .pc_decode = nfssvc_decode_writeargs, - .pc_encode = nfssvc_encode_attrstatres, + .pc_encode = nfssvc_encode_attrstat, .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_writeargs), - .pc_argzero = sizeof(struct nfsd_writeargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+AT, - .pc_name = "WRITE", }, [NFSPROC_CREATE] = { .pc_func = nfsd_proc_create, @@ -742,55 +708,45 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_createargs), - .pc_argzero = sizeof(struct nfsd_createargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+FH+AT, - .pc_name = "CREATE", }, [NFSPROC_REMOVE] = { .pc_func = nfsd_proc_remove, .pc_decode = nfssvc_decode_diropargs, - .pc_encode = nfssvc_encode_statres, + .pc_encode = nfssvc_encode_stat, .pc_argsize = sizeof(struct nfsd_diropargs), - .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, - .pc_name = "REMOVE", }, [NFSPROC_RENAME] = { .pc_func = nfsd_proc_rename, .pc_decode = nfssvc_decode_renameargs, - .pc_encode = nfssvc_encode_statres, + .pc_encode = nfssvc_encode_stat, .pc_argsize = sizeof(struct nfsd_renameargs), - .pc_argzero = sizeof(struct nfsd_renameargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, - .pc_name = "RENAME", }, [NFSPROC_LINK] = { .pc_func = nfsd_proc_link, .pc_decode = nfssvc_decode_linkargs, - .pc_encode = nfssvc_encode_statres, + .pc_encode = nfssvc_encode_stat, .pc_argsize = sizeof(struct nfsd_linkargs), - .pc_argzero = sizeof(struct nfsd_linkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, - .pc_name = "LINK", }, [NFSPROC_SYMLINK] = { .pc_func = nfsd_proc_symlink, .pc_decode = nfssvc_decode_symlinkargs, - .pc_encode = nfssvc_encode_statres, + .pc_encode = nfssvc_encode_stat, .pc_argsize = sizeof(struct nfsd_symlinkargs), - .pc_argzero = sizeof(struct nfsd_symlinkargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, - .pc_name = "SYMLINK", }, [NFSPROC_MKDIR] = { .pc_func = nfsd_proc_mkdir, @@ -798,43 +754,35 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_encode = nfssvc_encode_diropres, .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_createargs), - .pc_argzero = sizeof(struct nfsd_createargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+FH+AT, - .pc_name = "MKDIR", }, [NFSPROC_RMDIR] = { .pc_func = nfsd_proc_rmdir, .pc_decode = nfssvc_decode_diropargs, - .pc_encode = nfssvc_encode_statres, + .pc_encode = nfssvc_encode_stat, .pc_argsize = sizeof(struct nfsd_diropargs), - .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, - .pc_name = "RMDIR", }, [NFSPROC_READDIR] = { .pc_func = nfsd_proc_readdir, .pc_decode = nfssvc_decode_readdirargs, .pc_encode = nfssvc_encode_readdirres, .pc_argsize = sizeof(struct nfsd_readdirargs), - .pc_argzero = sizeof(struct nfsd_readdirargs), .pc_ressize = sizeof(struct nfsd_readdirres), .pc_cachetype = RC_NOCACHE, - .pc_name = "READDIR", }, [NFSPROC_STATFS] = { .pc_func = nfsd_proc_statfs, - .pc_decode = nfssvc_decode_fhandleargs, + .pc_decode = nfssvc_decode_fhandle, .pc_encode = nfssvc_encode_statfsres, .pc_argsize = sizeof(struct nfsd_fhandle), - .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_statfsres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+5, - .pc_name = "STATFS", }, }; @@ -848,3 +796,61 @@ const struct svc_version nfsd_version2 = { .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS2_SVC_XDRSIZE, }; + +/* + * Map errnos to NFS errnos. + */ +__be32 +nfserrno (int errno) +{ + static struct { + __be32 nfserr; + int syserr; + } nfs_errtbl[] = { + { nfs_ok, 0 }, + { nfserr_perm, -EPERM }, + { nfserr_noent, -ENOENT }, + { nfserr_io, -EIO }, + { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, + { nfserr_acces, -EACCES }, + { nfserr_exist, -EEXIST }, + { nfserr_xdev, -EXDEV }, + { nfserr_mlink, -EMLINK }, + { nfserr_nodev, -ENODEV }, + { nfserr_notdir, -ENOTDIR }, + { nfserr_isdir, -EISDIR }, + { nfserr_inval, -EINVAL }, + { nfserr_fbig, -EFBIG }, + { nfserr_nospc, -ENOSPC }, + { nfserr_rofs, -EROFS }, + { nfserr_mlink, -EMLINK }, + { nfserr_nametoolong, -ENAMETOOLONG }, + { nfserr_notempty, -ENOTEMPTY }, +#ifdef EDQUOT + { nfserr_dquot, -EDQUOT }, +#endif + { nfserr_stale, -ESTALE }, + { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_jukebox, -ERESTARTSYS }, + { nfserr_jukebox, -EAGAIN }, + { nfserr_jukebox, -EWOULDBLOCK }, + { nfserr_jukebox, -ENOMEM }, + { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, + { nfserr_toosmall, -ETOOSMALL }, + { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EUCLEAN }, + { nfserr_perm, -ENOKEY }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { + if (nfs_errtbl[i].syserr == errno) + return nfs_errtbl[i].nfserr; + } + WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 3d4fd40c987b..2e61a565cdbd 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -30,10 +29,14 @@ #include "netns.h" #include "filecache.h" -#include "trace.h" - #define NFSDDBG_FACILITY NFSDDBG_SVC +bool inter_copy_offload_enable; +EXPORT_SYMBOL_GPL(inter_copy_offload_enable); +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); + extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) @@ -56,17 +59,18 @@ static __be32 nfsd_init_request(struct svc_rqst *, struct svc_process_info *); /* - * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members - * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks. + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members + * of the svc_serv struct. In particular, ->sv_nrthreads but also to some + * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt * * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a - * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless - * nn->keep_active is set). That number of nfsd threads must - * exist and each must be listed in ->sp_all_threads in some entry of - * ->sv_pools[]. + * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number + * of nfsd threads must exist and each must listed in ->sp_all_threads in each + * entry of ->sv_pools[]. * - * Each active thread holds a counted reference on nn->nfsd_serv, as does - * the nn->keep_active flag and various transient calls to svc_get(). + * Transitions of the thread count between zero and non-zero are of particular + * interest since the svc_serv needs to be created and initialized at that + * point, or freed. * * Finally, the nfsd_mutex also protects some of the global variables that are * accessed when nfsd starts and that are settable via the write_* routines in @@ -84,19 +88,15 @@ DEFINE_MUTEX(nfsd_mutex); * version 4.1 DRC caches. * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ -DEFINE_SPINLOCK(nfsd_drc_lock); +spinlock_t nfsd_drc_lock; unsigned long nfsd_drc_max_mem; unsigned long nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static const struct svc_version *nfsd_acl_version[] = { -# if defined(CONFIG_NFSD_V2_ACL) [2] = &nfsd_acl_version2, -# endif -# if defined(CONFIG_NFSD_V3_ACL) [3] = &nfsd_acl_version3, -# endif }; #define NFSD_ACL_MINVERS 2 @@ -120,10 +120,10 @@ static struct svc_stat nfsd_acl_svcstats = { #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ static const struct svc_version *nfsd_version[] = { -#if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, -#endif +#if defined(CONFIG_NFSD_V3) [3] = &nfsd_version3, +#endif #if defined(CONFIG_NFSD_V4) [4] = &nfsd_version4, #endif @@ -297,13 +297,13 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred) if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; - error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); + error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; - error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); + error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; @@ -312,7 +312,7 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred) static int nfsd_users = 0; -static int nfsd_startup_generic(void) +static int nfsd_startup_generic(int nrservs) { int ret; @@ -349,60 +349,36 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn) return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } -/** - * nfsd_copy_write_verifier - Atomically copy a write verifier - * @verf: buffer in which to receive the verifier cookie - * @nn: NFS net namespace - * - * This function provides a wait-free mechanism for copying the - * namespace's write verifier without tearing it. - */ -void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn) +void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn) { int seq = 0; do { - read_seqbegin_or_lock(&nn->writeverf_lock, &seq); - memcpy(verf, nn->writeverf, sizeof(nn->writeverf)); - } while (need_seqretry(&nn->writeverf_lock, seq)); - done_seqretry(&nn->writeverf_lock, seq); + read_seqbegin_or_lock(&nn->boot_lock, &seq); + /* + * This is opaque to client, so no need to byte-swap. Use + * __force to keep sparse happy. y2038 time_t overflow is + * irrelevant in this usage + */ + verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; + } while (need_seqretry(&nn->boot_lock, seq)); + done_seqretry(&nn->boot_lock, seq); } -static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn) +static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn) { - struct timespec64 now; - u64 verf; - - /* - * Because the time value is hashed, y2038 time_t overflow - * is irrelevant in this usage. - */ - ktime_get_raw_ts64(&now); - verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key); - memcpy(nn->writeverf, &verf, sizeof(nn->writeverf)); + ktime_get_real_ts64(&nn->nfssvc_boot); } -/** - * nfsd_reset_write_verifier - Generate a new write verifier - * @nn: NFS net namespace - * - * This function updates the ->writeverf field of @nn. This field - * contains an opaque cookie that, according to Section 18.32.3 of - * RFC 8881, "the client can use to determine whether a server has - * changed instance state (e.g., server restart) between a call to - * WRITE and a subsequent call to either WRITE or COMMIT. This - * cookie MUST be unchanged during a single instance of the NFSv4.1 - * server and MUST be unique between instances of the NFSv4.1 - * server." - */ -void nfsd_reset_write_verifier(struct nfsd_net *nn) +void nfsd_reset_boot_verifier(struct nfsd_net *nn) { - write_seqlock(&nn->writeverf_lock); - nfsd_reset_write_verifier_locked(nn); - write_sequnlock(&nn->writeverf_lock); + write_seqlock(&nn->boot_lock); + nfsd_reset_boot_verifier_locked(nn); + write_sequnlock(&nn->boot_lock); } -static int nfsd_startup_net(struct net *net, const struct cred *cred) +static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; @@ -410,7 +386,7 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) if (nn->nfsd_net_up) return 0; - ret = nfsd_startup_generic(); + ret = nfsd_startup_generic(nrservs); if (ret) return ret; ret = nfsd_init_socks(net, cred); @@ -431,9 +407,6 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) if (ret) goto out_filecache; -#ifdef CONFIG_NFSD_V4_2_INTER_SSC - nfsd4_ssc_init_umount_work(nn); -#endif nn->nfsd_net_up = true; return 0; @@ -463,7 +436,6 @@ static void nfsd_shutdown_net(struct net *net) nfsd_shutdown_generic(); } -static DEFINE_SPINLOCK(nfsd_notifier_lock); static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -473,17 +445,18 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in sin; - if (event != NETDEV_DOWN || !nn->nfsd_serv) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nn->ntf_refcnt)) goto out; - spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); } - spin_unlock(&nfsd_notifier_lock); + atomic_dec(&nn->ntf_refcnt); + wake_up(&nn->ntf_wq); out: return NOTIFY_DONE; @@ -503,10 +476,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in6 sin6; - if (event != NETDEV_DOWN || !nn->nfsd_serv) + if ((event != NETDEV_DOWN) || + !atomic_inc_not_zero(&nn->ntf_refcnt)) goto out; - spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; @@ -515,8 +488,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } - spin_unlock(&nfsd_notifier_lock); - + atomic_dec(&nn->ntf_refcnt); + wake_up(&nn->ntf_wq); out: return NOTIFY_DONE; } @@ -529,15 +502,11 @@ static struct notifier_block nfsd_inet6addr_notifier = { /* Only used under nfsd_mutex, so this atomic may be overkill: */ static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); -void nfsd_last_thread(struct net *net) +static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct svc_serv *serv = nn->nfsd_serv; - - spin_lock(&nfsd_notifier_lock); - nn->nfsd_serv = NULL; - spin_unlock(&nfsd_notifier_lock); + atomic_dec(&nn->ntf_refcnt); /* check if the notifier still has clients */ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -545,8 +514,7 @@ void nfsd_last_thread(struct net *net) unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - - svc_xprt_destroy_all(serv, net); + wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0); /* * write_ports can create the server without actually starting @@ -599,6 +567,7 @@ static void set_max_drc(void) nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; + spin_lock_init(&nfsd_drc_lock); dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); } @@ -623,6 +592,24 @@ static int nfsd_get_default_max_blksize(void) return ret; } +static const struct svc_serv_ops nfsd_thread_sv_ops = { + .svo_shutdown = nfsd_last_thread, + .svo_function = nfsd, + .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads, + .svo_module = THIS_MODULE, +}; + +static void nfsd_complete_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + WARN_ON(!mutex_is_locked(&nfsd_mutex)); + + nn->nfsd_serv = NULL; + complete(&nn->nfsd_shutdown_complete); +} + void nfsd_shutdown_threads(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -637,10 +624,11 @@ void nfsd_shutdown_threads(struct net *net) svc_get(serv); /* Kill outstanding nfsd threads */ - svc_set_num_threads(serv, NULL, 0); - nfsd_last_thread(net); - svc_put(serv); + serv->sv_ops->svo_setup(serv, NULL, 0); + nfsd_destroy(net); mutex_unlock(&nfsd_mutex); + /* Wait for shutdown of nfsd_serv to complete */ + wait_for_completion(&nn->nfsd_shutdown_complete); } bool i_am_nfsd(void) @@ -652,7 +640,6 @@ int nfsd_create_serv(struct net *net) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct svc_serv *serv; WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nn->nfsd_serv) { @@ -662,19 +649,19 @@ int nfsd_create_serv(struct net *net) if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); - serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd); - if (serv == NULL) + nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + &nfsd_thread_sv_ops); + if (nn->nfsd_serv == NULL) return -ENOMEM; + init_completion(&nn->nfsd_shutdown_complete); - serv->sv_maxconn = nn->max_connections; - error = svc_bind(serv, net); + nn->nfsd_serv->sv_maxconn = nn->max_connections; + error = svc_bind(nn->nfsd_serv, net); if (error < 0) { - svc_put(serv); + svc_destroy(nn->nfsd_serv); + nfsd_complete_shutdown(net); return error; } - spin_lock(&nfsd_notifier_lock); - nn->nfsd_serv = serv; - spin_unlock(&nfsd_notifier_lock); set_max_drc(); /* check if the notifier is already set */ @@ -684,7 +671,8 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - nfsd_reset_write_verifier(nn); + atomic_inc(&nn->ntf_refcnt); + nfsd_reset_boot_verifier(nn); return 0; } @@ -711,6 +699,18 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) return 0; } +void nfsd_destroy(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int destroy = (nn->nfsd_serv->sv_nrthreads == 1); + + if (destroy) + svc_shutdown_net(nn->nfsd_serv, net); + svc_destroy(nn->nfsd_serv); + if (destroy) + nfsd_complete_shutdown(net); +} + int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; @@ -735,7 +735,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) if (tot > NFSD_MAXSERVS) { /* total too large: scale down requested numbers */ for (i = 0; i < n && tot > 0; i++) { - int new = nthreads[i] * NFSD_MAXSERVS / tot; + int new = nthreads[i] * NFSD_MAXSERVS / tot; tot -= (nthreads[i] - new); nthreads[i] = new; } @@ -755,13 +755,12 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* apply the new numbers */ svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = svc_set_num_threads(nn->nfsd_serv, - &nn->nfsd_serv->sv_pools[i], - nthreads[i]); + err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) break; } - svc_put(nn->nfsd_serv); + nfsd_destroy(net); return err; } @@ -776,7 +775,6 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) int error; bool nfsd_up_before; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct svc_serv *serv; mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); @@ -788,7 +786,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; - strscpy(nn->nfsd_name, utsname()->nodename, + strlcpy(nn->nfsd_name, utsname()->nodename, sizeof(nn->nfsd_name)); error = nfsd_create_serv(net); @@ -796,25 +794,24 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) goto out; nfsd_up_before = nn->nfsd_net_up; - serv = nn->nfsd_serv; - error = nfsd_startup_net(net, cred); + error = nfsd_startup_net(nrservs, net, cred); if (error) - goto out_put; - error = svc_set_num_threads(serv, NULL, nrservs); + goto out_destroy; + error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, + NULL, nrservs); if (error) goto out_shutdown; - error = serv->sv_nrthreads; - if (error == 0) - nfsd_last_thread(net); + /* We are holding a reference to nn->nfsd_serv which + * we don't want to count in the return value, + * so subtract 1 + */ + error = nn->nfsd_serv->sv_nrthreads - 1; out_shutdown: if (error < 0 && !nfsd_up_before) nfsd_shutdown_net(net); -out_put: - /* Threads now hold service active */ - if (xchg(&nn->keep_active, 0)) - svc_put(serv); - svc_put(serv); +out_destroy: + nfsd_destroy(net); /* Release server */ out: mutex_unlock(&nfsd_mutex); return error; @@ -928,6 +925,9 @@ nfsd(void *vrqstp) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int err; + /* Lock module and set up kernel thread */ + mutex_lock(&nfsd_mutex); + /* At this point, the thread shares current->fs * with the init process. We need to create files with the * umask as defined by the client instead of init's umask. */ @@ -938,7 +938,17 @@ nfsd(void *vrqstp) current->fs->umask = 0; - atomic_inc(&nfsdstats.th_cnt); + /* + * thread is spawned with all signals set to SIG_IGN, re-enable + * the ones that will bring down the thread + */ + allow_signal(SIGKILL); + allow_signal(SIGHUP); + allow_signal(SIGINT); + allow_signal(SIGQUIT); + + nfsdstats.th_cnt++; + mutex_unlock(&nfsd_mutex); set_freezable(); @@ -962,14 +972,57 @@ nfsd(void *vrqstp) validate_process_creds(); } - atomic_dec(&nfsdstats.th_cnt); + /* Clear signals before calling svc_exit_thread() */ + flush_signals(current); + + mutex_lock(&nfsd_mutex); + nfsdstats.th_cnt --; out: + rqstp->rq_server = NULL; + /* Release the thread */ svc_exit_thread(rqstp); + + nfsd_destroy(net); + + /* Release module */ + mutex_unlock(&nfsd_mutex); + module_put_and_exit(0); return 0; } +/* + * A write procedure can have a large argument, and a read procedure can + * have a large reply, but no NFSv2 or NFSv3 procedure has argument and + * reply that can both be larger than a page. The xdr code has taken + * advantage of this assumption to be a sloppy about bounds checking in + * some cases. Pending a rewrite of the NFSv2/v3 xdr code to fix that + * problem, we enforce these assumptions here: + */ +static bool nfs_request_too_big(struct svc_rqst *rqstp, + const struct svc_procedure *proc) +{ + /* + * The ACL code has more careful bounds-checking and is not + * susceptible to this problem: + */ + if (rqstp->rq_prog != NFS_PROGRAM) + return false; + /* + * Ditto NFSv4 (which can in theory have argument and reply both + * more than a page): + */ + if (rqstp->rq_vers >= 4) + return false; + /* The reply will be small, we're OK: */ + if (proc->pc_xdrressize > 0 && + proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE)) + return false; + + return rqstp->rq_arg.len > PAGE_SIZE; +} + /** * nfsd_dispatch - Process an NFS or NFSACL Request * @rqstp: incoming request @@ -984,15 +1037,22 @@ out: int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { const struct svc_procedure *proc = rqstp->rq_procinfo; + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + __be32 *p; + + dprintk("nfsd_dispatch: vers %d proc %d\n", + rqstp->rq_vers, rqstp->rq_proc); + + if (nfs_request_too_big(rqstp, proc)) + goto out_too_large; /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) */ rqstp->rq_cachetype = proc->pc_cachetype; - - svcxdr_init_decode(rqstp); - if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) + if (!proc->pc_decode(rqstp, argv->iov_base)) goto out_decode_err; switch (nfsd_cache_lookup(rqstp)) { @@ -1008,64 +1068,43 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) * Need to grab the location to store the status, as * NFSv4 does some encoding while processing */ - svcxdr_init_encode(rqstp); + p = resv->iov_base + resv->iov_len; + resv->iov_len += sizeof(__be32); *statp = proc->pc_func(rqstp); - if (test_bit(RQ_DROPME, &rqstp->rq_flags)) + if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; - if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) + if (!proc->pc_encode(rqstp, p)) goto out_encode_err; nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); out_cached_reply: return 1; +out_too_large: + dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers); + *statp = rpc_garbage_args; + return 1; + out_decode_err: - trace_nfsd_garbage_args_err(rqstp); + dprintk("nfsd: failed to decode arguments!\n"); *statp = rpc_garbage_args; return 1; out_update_drop: + dprintk("nfsd: Dropping request; may be revisited later\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); out_dropit: return 0; out_encode_err: - trace_nfsd_cant_encode_err(rqstp); + dprintk("nfsd: failed to encode result!\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); *statp = rpc_system_err; return 1; } -/** - * nfssvc_decode_voidarg - Decode void arguments - * @rqstp: Server RPC transaction context - * @xdr: XDR stream positioned at arguments to decode - * - * Return values: - * %false: Arguments were not valid - * %true: Decoding was successful - */ -bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - return true; -} - -/** - * nfssvc_encode_voidres - Encode void results - * @rqstp: Server RPC transaction context - * @xdr: XDR stream into which to encode results - * - * Return values: - * %false: Local error while encoding - * %true: Encoding was successful - */ -bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) -{ - return true; -} - int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; @@ -1076,6 +1115,7 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) mutex_unlock(&nfsd_mutex); return -ENODEV; } + /* bump up the psudo refcount while traversing */ svc_get(nn->nfsd_serv); ret = svc_pool_stats_open(nn->nfsd_serv, file); mutex_unlock(&nfsd_mutex); @@ -1084,12 +1124,12 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { - struct seq_file *seq = file->private_data; - struct svc_serv *serv = seq->private; int ret = seq_release(inode, file); + struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); - svc_put(serv); + /* this function really, really should have been called svc_put() */ + nfsd_destroy(net); mutex_unlock(&nfsd_mutex); return ret; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index caf6355b18fa..8a288c8fcd57 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -9,10 +9,12 @@ #include "xdr.h" #include "auth.h" +#define NFSDDBG_FACILITY NFSDDBG_XDR + /* * Mapping of S_IF* types to NFS file types */ -static const u32 nfs_ftypes[] = { +static u32 nfs_ftypes[] = { NFNON, NFCHR, NFCHR, NFBAD, NFDIR, NFBAD, NFBLK, NFBAD, NFREG, NFBAD, NFLNK, NFBAD, @@ -21,168 +23,93 @@ static const u32 nfs_ftypes[] = { /* - * Basic NFSv2 data types (RFC 1094 Section 2.3) + * XDR functions for basic NFS types */ - -/** - * svcxdr_encode_stat - Encode an NFSv2 status code - * @xdr: XDR stream - * @status: status value to encode - * - * Return values: - * %false: Send buffer space was exhausted - * %true: Success - */ -bool -svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status) +static __be32 * +decode_fh(__be32 *p, struct svc_fh *fhp) { - __be32 *p; - - p = xdr_reserve_space(xdr, sizeof(status)); - if (!p) - return false; - *p = status; - - return true; -} - -/** - * svcxdr_decode_fhandle - Decode an NFSv2 file handle - * @xdr: XDR stream positioned at an encoded NFSv2 FH - * @fhp: OUT: filled-in server file handle - * - * Return values: - * %false: The encoded file handle was not valid - * %true: @fhp has been initialized - */ -bool -svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp) -{ - __be32 *p; - - p = xdr_inline_decode(xdr, NFS_FHSIZE); - if (!p) - return false; fh_init(fhp, NFS_FHSIZE); - memcpy(&fhp->fh_handle.fh_raw, p, NFS_FHSIZE); + memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE); fhp->fh_handle.fh_size = NFS_FHSIZE; - return true; + /* FIXME: Look up export pointer here and verify + * Sun Secure RPC if requested */ + return p + (NFS_FHSIZE >> 2); } -static bool -svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp) +/* Helper function for NFSv2 ACL code */ +__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp) { - __be32 *p; - - p = xdr_reserve_space(xdr, NFS_FHSIZE); - if (!p) - return false; - memcpy(p, &fhp->fh_handle.fh_raw, NFS_FHSIZE); - - return true; + return decode_fh(p, fhp); } static __be32 * -encode_timeval(__be32 *p, const struct timespec64 *time) +encode_fh(__be32 *p, struct svc_fh *fhp) { - *p++ = cpu_to_be32((u32)time->tv_sec); - if (time->tv_nsec) - *p++ = cpu_to_be32(time->tv_nsec / NSEC_PER_USEC); - else - *p++ = xdr_zero; + memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); + return p + (NFS_FHSIZE>> 2); +} + +/* + * Decode a file name and make sure that the path contains + * no slashes or null bytes. + */ +static __be32 * +decode_filename(__be32 *p, char **namp, unsigned int *lenp) +{ + char *name; + unsigned int i; + + if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0' || *name == '/') + return NULL; + } + } + return p; } -static bool -svcxdr_decode_filename(struct xdr_stream *xdr, char **name, unsigned int *len) +static __be32 * +decode_sattr(__be32 *p, struct iattr *iap, struct user_namespace *userns) { - u32 size, i; - __be32 *p; - char *c; - - if (xdr_stream_decode_u32(xdr, &size) < 0) - return false; - if (size == 0 || size > NFS_MAXNAMLEN) - return false; - p = xdr_inline_decode(xdr, size); - if (!p) - return false; - - *len = size; - *name = (char *)p; - for (i = 0, c = *name; i < size; i++, c++) - if (*c == '\0' || *c == '/') - return false; - - return true; -} - -static bool -svcxdr_decode_diropargs(struct xdr_stream *xdr, struct svc_fh *fhp, - char **name, unsigned int *len) -{ - return svcxdr_decode_fhandle(xdr, fhp) && - svcxdr_decode_filename(xdr, name, len); -} - -static bool -svcxdr_decode_sattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, - struct iattr *iap) -{ - u32 tmp1, tmp2; - __be32 *p; - - p = xdr_inline_decode(xdr, XDR_UNIT * 8); - if (!p) - return false; + u32 tmp, tmp1; iap->ia_valid = 0; - /* - * Some Sun clients put 0xffff in the mode field when they - * mean 0xffffffff. + /* Sun client bug compatibility check: some sun clients seem to + * put 0xffff in the mode field when they mean 0xffffffff. + * Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah. */ - tmp1 = be32_to_cpup(p++); - if (tmp1 != (u32)-1 && tmp1 != 0xffff) { + if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) { iap->ia_valid |= ATTR_MODE; - iap->ia_mode = tmp1; + iap->ia_mode = tmp; } - - tmp1 = be32_to_cpup(p++); - if (tmp1 != (u32)-1) { - iap->ia_uid = make_kuid(nfsd_user_namespace(rqstp), tmp1); + if ((tmp = ntohl(*p++)) != (u32)-1) { + iap->ia_uid = make_kuid(userns, tmp); if (uid_valid(iap->ia_uid)) iap->ia_valid |= ATTR_UID; } - - tmp1 = be32_to_cpup(p++); - if (tmp1 != (u32)-1) { - iap->ia_gid = make_kgid(nfsd_user_namespace(rqstp), tmp1); + if ((tmp = ntohl(*p++)) != (u32)-1) { + iap->ia_gid = make_kgid(userns, tmp); if (gid_valid(iap->ia_gid)) iap->ia_valid |= ATTR_GID; } - - tmp1 = be32_to_cpup(p++); - if (tmp1 != (u32)-1) { + if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; - iap->ia_size = tmp1; + iap->ia_size = tmp; } - - tmp1 = be32_to_cpup(p++); - tmp2 = be32_to_cpup(p++); - if (tmp1 != (u32)-1 && tmp2 != (u32)-1) { + tmp = ntohl(*p++); tmp1 = ntohl(*p++); + if (tmp != (u32)-1 && tmp1 != (u32)-1) { iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; - iap->ia_atime.tv_sec = tmp1; - iap->ia_atime.tv_nsec = tmp2 * NSEC_PER_USEC; + iap->ia_atime.tv_sec = tmp; + iap->ia_atime.tv_nsec = tmp1 * 1000; } - - tmp1 = be32_to_cpup(p++); - tmp2 = be32_to_cpup(p++); - if (tmp1 != (u32)-1 && tmp2 != (u32)-1) { + tmp = ntohl(*p++); tmp1 = ntohl(*p++); + if (tmp != (u32)-1 && tmp1 != (u32)-1) { iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; - iap->ia_mtime.tv_sec = tmp1; - iap->ia_mtime.tv_nsec = tmp2 * NSEC_PER_USEC; + iap->ia_mtime.tv_sec = tmp; + iap->ia_mtime.tv_nsec = tmp1 * 1000; /* * Passing the invalid value useconds=1000000 for mtime * is a Sun convention for "set both mtime and atime to @@ -192,447 +119,476 @@ svcxdr_decode_sattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, * sattr in section 6.1 of "NFS Illustrated" by * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 */ - if (tmp2 == 1000000) + if (tmp1 == 1000000) iap->ia_valid &= ~(ATTR_ATIME_SET|ATTR_MTIME_SET); } - - return true; + return p; } -/** - * svcxdr_encode_fattr - Encode NFSv2 file attributes - * @rqstp: Context of a completed RPC transaction - * @xdr: XDR stream - * @fhp: File handle to encode - * @stat: Attributes to encode - * - * Return values: - * %false: Send buffer space was exhausted - * %true: Success - */ -bool -svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, - const struct svc_fh *fhp, const struct kstat *stat) +static __be32 * +encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, + struct kstat *stat) { struct user_namespace *userns = nfsd_user_namespace(rqstp); - struct dentry *dentry = fhp->fh_dentry; - int type = stat->mode & S_IFMT; + struct dentry *dentry = fhp->fh_dentry; + int type; struct timespec64 time; - __be32 *p; - u32 fsid; + u32 f; - p = xdr_reserve_space(xdr, XDR_UNIT * 17); - if (!p) - return false; + type = (stat->mode & S_IFMT); - *p++ = cpu_to_be32(nfs_ftypes[type >> 12]); - *p++ = cpu_to_be32((u32)stat->mode); - *p++ = cpu_to_be32((u32)stat->nlink); - *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid)); - *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid)); + *p++ = htonl(nfs_ftypes[type >> 12]); + *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); + *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); - if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) - *p++ = cpu_to_be32(NFS_MAXPATHLEN); - else - *p++ = cpu_to_be32((u32) stat->size); - *p++ = cpu_to_be32((u32) stat->blksize); + if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { + *p++ = htonl(NFS_MAXPATHLEN); + } else { + *p++ = htonl((u32) stat->size); + } + *p++ = htonl((u32) stat->blksize); if (S_ISCHR(type) || S_ISBLK(type)) - *p++ = cpu_to_be32(new_encode_dev(stat->rdev)); + *p++ = htonl(new_encode_dev(stat->rdev)); else - *p++ = cpu_to_be32(0xffffffff); - *p++ = cpu_to_be32((u32)stat->blocks); - + *p++ = htonl(0xffffffff); + *p++ = htonl((u32) stat->blocks); switch (fsid_source(fhp)) { + default: + case FSIDSOURCE_DEV: + *p++ = htonl(new_encode_dev(stat->dev)); + break; case FSIDSOURCE_FSID: - fsid = (u32)fhp->fh_export->ex_fsid; + *p++ = htonl((u32) fhp->fh_export->ex_fsid); break; case FSIDSOURCE_UUID: - fsid = ((u32 *)fhp->fh_export->ex_uuid)[0]; - fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[1]; - fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[2]; - fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[3]; - break; - default: - fsid = new_encode_dev(stat->dev); + f = ((u32*)fhp->fh_export->ex_uuid)[0]; + f ^= ((u32*)fhp->fh_export->ex_uuid)[1]; + f ^= ((u32*)fhp->fh_export->ex_uuid)[2]; + f ^= ((u32*)fhp->fh_export->ex_uuid)[3]; + *p++ = htonl(f); break; } - *p++ = cpu_to_be32(fsid); - - *p++ = cpu_to_be32((u32)stat->ino); - p = encode_timeval(p, &stat->atime); + *p++ = htonl((u32) stat->ino); + *p++ = htonl((u32) stat->atime.tv_sec); + *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); time = stat->mtime; - lease_get_mtime(d_inode(dentry), &time); - p = encode_timeval(p, &time); - encode_timeval(p, &stat->ctime); + lease_get_mtime(d_inode(dentry), &time); + *p++ = htonl((u32) time.tv_sec); + *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); + *p++ = htonl((u32) stat->ctime.tv_sec); + *p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0); - return true; + return p; +} + +/* Helper function for NFSv2 ACL code */ +__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) +{ + return encode_fattr(rqstp, p, fhp, stat); } /* * XDR decode functions */ +int +nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p) +{ + return xdr_argsize_check(rqstp, p); +} -bool -nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_fhandle *args = rqstp->rq_argp; - return svcxdr_decode_fhandle(xdr, &args->fh); + p = decode_fh(p, &args->fh); + if (!p) + return 0; + return xdr_argsize_check(rqstp, p); } -bool -nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_sattrargs *args = rqstp->rq_argp; - return svcxdr_decode_fhandle(xdr, &args->fh) && - svcxdr_decode_sattr(rqstp, xdr, &args->attrs); + p = decode_fh(p, &args->fh); + if (!p) + return 0; + p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); + + return xdr_argsize_check(rqstp, p); } -bool -nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_diropargs *args = rqstp->rq_argp; - return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len); + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + return xdr_argsize_check(rqstp, p); } -bool -nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_readargs *args = rqstp->rq_argp; - u32 totalcount; + unsigned int len; + int v; + p = decode_fh(p, &args->fh); + if (!p) + return 0; - if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u32(xdr, &args->offset) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return false; - /* totalcount is ignored */ - if (xdr_stream_decode_u32(xdr, &totalcount) < 0) - return false; + args->offset = ntohl(*p++); + len = args->count = ntohl(*p++); + p++; /* totalcount - unused */ - return true; + len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2); + + /* set up somewhere to store response. + * We take pages, put them on reslist and include in iovec + */ + v=0; + while (len > 0) { + struct page *p = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(p); + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); + len -= rqstp->rq_vec[v].iov_len; + v++; + } + args->vlen = v; + return xdr_argsize_check(rqstp, p); } -bool -nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_writeargs *args = rqstp->rq_argp; - u32 beginoffset, totalcount; + unsigned int len, hdr, dlen; + struct kvec *head = rqstp->rq_arg.head; - if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return false; - /* beginoffset is ignored */ - if (xdr_stream_decode_u32(xdr, &beginoffset) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &args->offset) < 0) - return false; - /* totalcount is ignored */ - if (xdr_stream_decode_u32(xdr, &totalcount) < 0) - return false; + p = decode_fh(p, &args->fh); + if (!p) + return 0; - /* opaque data */ - if (xdr_stream_decode_u32(xdr, &args->len) < 0) - return false; - if (args->len > NFSSVC_MAXBLKSIZE_V2) - return false; + p++; /* beginoffset */ + args->offset = ntohl(*p++); /* offset */ + p++; /* totalcount */ + len = args->len = ntohl(*p++); + /* + * The protocol specifies a maximum of 8192 bytes. + */ + if (len > NFSSVC_MAXBLKSIZE_V2) + return 0; - return xdr_stream_subsegment(xdr, &args->payload, args->len); + /* + * Check to make sure that we got the right number of + * bytes. + */ + hdr = (void*)p - head->iov_base; + if (hdr > head->iov_len) + return 0; + dlen = head->iov_len + rqstp->rq_arg.page_len - hdr; + + /* + * Round the length of the data which was specified up to + * the next multiple of XDR units and then compare that + * against the length which was actually received. + * Note that when RPCSEC/GSS (for example) is used, the + * data buffer can be padded so dlen might be larger + * than required. It must never be smaller. + */ + if (dlen < XDR_QUADLEN(len)*4) + return 0; + + args->first.iov_base = (void *)p; + args->first.iov_len = head->iov_len - hdr; + return 1; } -bool -nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_createargs *args = rqstp->rq_argp; - return svcxdr_decode_diropargs(xdr, &args->fh, - &args->name, &args->len) && - svcxdr_decode_sattr(rqstp, xdr, &args->attrs); + if ( !(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); + + return xdr_argsize_check(rqstp, p); } -bool -nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_renameargs *args = rqstp->rq_argp; - return svcxdr_decode_diropargs(xdr, &args->ffh, - &args->fname, &args->flen) && - svcxdr_decode_diropargs(xdr, &args->tfh, - &args->tname, &args->tlen); + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); } -bool -nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p) +{ + struct nfsd_readlinkargs *args = rqstp->rq_argp; + + p = decode_fh(p, &args->fh); + if (!p) + return 0; + args->buffer = page_address(*(rqstp->rq_next_page++)); + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_linkargs *args = rqstp->rq_argp; - return svcxdr_decode_fhandle(xdr, &args->ffh) && - svcxdr_decode_diropargs(xdr, &args->tfh, - &args->tname, &args->tlen); + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); } -bool -nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_symlinkargs *args = rqstp->rq_argp; - struct kvec *head = rqstp->rq_arg.head; + char *base = (char *)p; + size_t xdrlen; - if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen)) - return false; - if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) - return false; + if ( !(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen))) + return 0; + + args->tlen = ntohl(*p++); if (args->tlen == 0) - return false; + return 0; - args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); - args->first.iov_base = xdr_inline_decode(xdr, args->tlen); - if (!args->first.iov_base) - return false; - return svcxdr_decode_sattr(rqstp, xdr, &args->attrs); + args->first.iov_base = p; + args->first.iov_len = rqstp->rq_arg.head[0].iov_len; + args->first.iov_len -= (char *)p - base; + + /* This request is never larger than a page. Therefore, + * transport will deliver either: + * 1. pathname in the pagelist -> sattr is in the tail. + * 2. everything in the head buffer -> sattr is in the head. + */ + if (rqstp->rq_arg.page_len) { + if (args->tlen != rqstp->rq_arg.page_len) + return 0; + p = rqstp->rq_arg.tail[0].iov_base; + } else { + xdrlen = XDR_QUADLEN(args->tlen); + if (xdrlen > args->first.iov_len - (8 * sizeof(__be32))) + return 0; + p += xdrlen; + } + decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); + + return 1; } -bool -nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_readdirargs *args = rqstp->rq_argp; - if (!svcxdr_decode_fhandle(xdr, &args->fh)) - return false; - if (xdr_stream_decode_u32(xdr, &args->cookie) < 0) - return false; - if (xdr_stream_decode_u32(xdr, &args->count) < 0) - return false; + p = decode_fh(p, &args->fh); + if (!p) + return 0; + args->cookie = ntohl(*p++); + args->count = ntohl(*p++); + args->count = min_t(u32, args->count, PAGE_SIZE); + args->buffer = page_address(*(rqstp->rq_next_page++)); - return true; + return xdr_argsize_check(rqstp, p); } /* * XDR encode functions */ +int +nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p) +{ + return xdr_ressize_check(rqstp, p); +} -bool -nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_encode_stat(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_stat *resp = rqstp->rq_resp; - return svcxdr_encode_stat(xdr, resp->status); + *p++ = resp->status; + return xdr_ressize_check(rqstp, p); } -bool -nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_attrstat *resp = rqstp->rq_resp; - if (!svcxdr_encode_stat(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return false; - break; - } - - return true; + *p++ = resp->status; + if (resp->status != nfs_ok) + goto out; + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); +out: + return xdr_ressize_check(rqstp, p); } -bool -nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_diropres *resp = rqstp->rq_resp; - if (!svcxdr_encode_stat(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_fhandle(xdr, &resp->fh)) - return false; - if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return false; - break; - } - - return true; + *p++ = resp->status; + if (resp->status != nfs_ok) + goto out; + p = encode_fh(p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); +out: + return xdr_ressize_check(rqstp, p); } -bool -nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_readlinkres *resp = rqstp->rq_resp; - struct kvec *head = rqstp->rq_res.head; - if (!svcxdr_encode_stat(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (xdr_stream_encode_u32(xdr, resp->len) < 0) - return false; - xdr_write_pages(xdr, &resp->page, 0, resp->len); - if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) - return false; - break; + *p++ = resp->status; + if (resp->status != nfs_ok) + return xdr_ressize_check(rqstp, p); + + *p++ = htonl(resp->len); + xdr_ressize_check(rqstp, p); + rqstp->rq_res.page_len = resp->len; + if (resp->len & 3) { + /* need to pad the tail */ + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); } - - return true; + return 1; } -bool -nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_readres *resp = rqstp->rq_resp; - struct kvec *head = rqstp->rq_res.head; - if (!svcxdr_encode_stat(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) - return false; - if (xdr_stream_encode_u32(xdr, resp->count) < 0) - return false; - xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, - resp->count); - if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) - return false; - break; + *p++ = resp->status; + if (resp->status != nfs_ok) + return xdr_ressize_check(rqstp, p); + + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); + *p++ = htonl(resp->count); + xdr_ressize_check(rqstp, p); + + /* now update rqstp->rq_res to reflect data as well */ + rqstp->rq_res.page_len = resp->count; + if (resp->count & 3) { + /* need to pad the tail */ + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); } - - return true; + return 1; } -bool -nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_readdirres *resp = rqstp->rq_resp; - struct xdr_buf *dirlist = &resp->dirlist; - if (!svcxdr_encode_stat(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); - /* no more entries */ - if (xdr_stream_encode_item_absent(xdr) < 0) - return false; - if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) - return false; - break; - } + *p++ = resp->status; + if (resp->status != nfs_ok) + return xdr_ressize_check(rqstp, p); - return true; + xdr_ressize_check(rqstp, p); + p = resp->buffer; + *p++ = 0; /* no more entries */ + *p++ = htonl((resp->common.err == nfserr_eof)); + rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1; + + return 1; } -bool -nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +int +nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd_statfsres *resp = rqstp->rq_resp; struct kstatfs *stat = &resp->stats; - __be32 *p; - if (!svcxdr_encode_stat(xdr, resp->status)) - return false; - switch (resp->status) { - case nfs_ok: - p = xdr_reserve_space(xdr, XDR_UNIT * 5); - if (!p) - return false; - *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); - *p++ = cpu_to_be32(stat->f_bsize); - *p++ = cpu_to_be32(stat->f_blocks); - *p++ = cpu_to_be32(stat->f_bfree); - *p = cpu_to_be32(stat->f_bavail); - break; + *p++ = resp->status; + if (resp->status != nfs_ok) + return xdr_ressize_check(rqstp, p); + + *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */ + *p++ = htonl(stat->f_bsize); + *p++ = htonl(stat->f_blocks); + *p++ = htonl(stat->f_bfree); + *p++ = htonl(stat->f_bavail); + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_entry(void *ccdv, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = ccdv; + struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common); + __be32 *p = cd->buffer; + int buflen, slen; + + /* + dprintk("nfsd: entry(%.*s off %ld ino %ld)\n", + namlen, name, offset, ino); + */ + + if (offset > ~((u32) 0)) { + cd->common.err = nfserr_fbig; + return -EINVAL; } + if (cd->offset) + *cd->offset = htonl(offset); - return true; -} + /* truncate filename */ + namlen = min(namlen, NFS2_MAXNAMLEN); + slen = XDR_QUADLEN(namlen); -/** - * nfssvc_encode_nfscookie - Encode a directory offset cookie - * @resp: readdir result context - * @offset: offset cookie to encode - * - * The buffer space for the offset cookie has already been reserved - * by svcxdr_encode_entry_common(). - */ -void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset) -{ - __be32 cookie = cpu_to_be32(offset); + if ((buflen = cd->buflen - slen - 4) < 0) { + cd->common.err = nfserr_toosmall; + return -EINVAL; + } + if (ino > ~((u32) 0)) { + cd->common.err = nfserr_fbig; + return -EINVAL; + } + *p++ = xdr_one; /* mark entry present */ + *p++ = htonl((u32) ino); /* file id */ + p = xdr_encode_array(p, name, namlen);/* name length & name */ + cd->offset = p; /* remember pointer */ + *p++ = htonl(~0U); /* offset of next entry */ - if (!resp->cookie_offset) - return; - - write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie, - sizeof(cookie)); - resp->cookie_offset = 0; -} - -static bool -svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name, - int namlen, loff_t offset, u64 ino) -{ - struct xdr_buf *dirlist = &resp->dirlist; - struct xdr_stream *xdr = &resp->xdr; - - if (xdr_stream_encode_item_present(xdr) < 0) - return false; - /* fileid */ - if (xdr_stream_encode_u32(xdr, (u32)ino) < 0) - return false; - /* name */ - if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS2_MAXNAMLEN)) < 0) - return false; - /* cookie */ - resp->cookie_offset = dirlist->len; - if (xdr_stream_encode_u32(xdr, ~0U) < 0) - return false; - - return true; -} - -/** - * nfssvc_encode_entry - encode one NFSv2 READDIR entry - * @data: directory context - * @name: name of the object to be encoded - * @namlen: length of that name, in bytes - * @offset: the offset of the previous entry - * @ino: the fileid of this entry - * @d_type: unused - * - * Return values: - * %0: Entry was successfully encoded. - * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err - * - * On exit, the following fields are updated: - * - resp->xdr - * - resp->common.err - * - resp->cookie_offset - */ -int nfssvc_encode_entry(void *data, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) -{ - struct readdir_cd *ccd = data; - struct nfsd_readdirres *resp = container_of(ccd, - struct nfsd_readdirres, - common); - unsigned int starting_length = resp->dirlist.len; - - /* The offset cookie for the previous entry */ - nfssvc_encode_nfscookie(resp, offset); - - if (!svcxdr_encode_entry_common(resp, name, namlen, offset, ino)) - goto out_toosmall; - - xdr_commit_encode(&resp->xdr); - resp->common.err = nfs_ok; + cd->buflen = buflen; + cd->buffer = p; + cd->common.err = nfs_ok; return 0; - -out_toosmall: - resp->cookie_offset = 0; - resp->common.err = nfserr_toosmall; - resp->dirlist.len = starting_length; - return -EINVAL; } /* diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e94634d30591..9eae11a9d21c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -57,11 +57,11 @@ typedef struct { } stateid_t; typedef struct { - stateid_t cs_stid; + stateid_t stid; #define NFS4_COPY_STID 1 #define NFS4_COPYNOTIFY_STID 2 - unsigned char cs_type; - refcount_t cs_count; + unsigned char sc_type; + refcount_t sc_count; } copy_stateid_t; struct nfsd4_callback { @@ -149,7 +149,6 @@ struct nfs4_delegation { /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; - bool dl_recalled; }; #define cb_to_delegation(cb) \ @@ -175,7 +174,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 /* Maximum number of operations per session compound */ -#define NFSD_MAX_OPS_PER_COMPOUND 50 +#define NFSD_MAX_OPS_PER_COMPOUND 16 /* Maximum session per slot cache size */ #define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ @@ -283,28 +282,6 @@ struct nfsd4_sessionid { #define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */ -/* - * State Meaning Where set - * -------------------------------------------------------------------------- - * | NFSD4_ACTIVE | Confirmed, active | Default | - * |------------------- ----------------------------------------------------| - * | NFSD4_COURTESY | Courtesy state. | nfs4_get_client_reaplist | - * | | Lease/lock/share | | - * | | reservation conflict | | - * | | can cause Courtesy | | - * | | client to be expired | | - * |------------------------------------------------------------------------| - * | NFSD4_EXPIRABLE | Courtesy client to be| nfs4_laundromat | - * | | expired by Laundromat| try_to_expire_client | - * | | due to conflict | | - * |------------------------------------------------------------------------| - */ -enum { - NFSD4_ACTIVE = 0, - NFSD4_COURTESY, - NFSD4_EXPIRABLE, -}; - /* * struct nfs4_client - one per client. Clientids live here. * @@ -368,7 +345,6 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) -#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; @@ -395,10 +371,6 @@ struct nfs4_client { /* debugging info directory under nfsd/clients/ : */ struct dentry *cl_nfsd_dentry; - /* 'info' file within that directory. Ref is not counted, - * but will remain valid iff cl_nfsd_dentry != NULL - */ - struct dentry *cl_nfsd_info_dentry; /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ @@ -409,13 +381,6 @@ struct nfs4_client { struct list_head async_copies; /* list of async copies */ spinlock_t async_lock; /* lock for async copies */ atomic_t cl_cb_inflight; /* Outstanding callbacks */ - - unsigned int cl_state; - atomic_t cl_delegs_in_recall; - - struct nfsd4_cb_recall_any *cl_ra; - time64_t cl_ra_time; - struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset @@ -541,13 +506,14 @@ struct nfs4_clnt_odstate { * inode can have multiple filehandles associated with it, so there is * (potentially) a many to one relationship between this struct and struct * inode. + * + * These are hashed by filehandle in the file_hashtbl, which is protected by + * the global state_lock spinlock. */ struct nfs4_file { refcount_t fi_ref; - struct inode * fi_inode; - bool fi_aliased; spinlock_t fi_lock; - struct rhlist_head fi_rlist; + struct hlist_node fi_hash; /* hash on fi_fhandle */ struct list_head fi_stateids; union { struct list_head fi_delegations; @@ -596,10 +562,6 @@ struct nfs4_ol_stateid { struct list_head st_locks; struct nfs4_stateowner *st_stateowner; struct nfs4_clnt_odstate *st_clnt_odstate; -/* - * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the - * comment above bmap_to_share_mode() for explanation: - */ unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid *st_openstp; @@ -641,7 +603,6 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_OFFLOAD, NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, - NFSPROC4_CLNT_CB_RECALL_ANY, }; /* Returns true iff a is later than b: */ @@ -662,7 +623,6 @@ struct nfsd4_blocked_lock { struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; - struct kref nbl_kref; }; struct nfsd4_compound_state; @@ -689,22 +649,26 @@ void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *) extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn); -extern __be32 nfs4_check_open_reclaim(struct nfs4_client *); +extern __be32 nfs4_check_open_reclaim(clientid_t *clid, + struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); -extern bool nfsd4_run_cb(struct nfsd4_callback *cb); +extern void nfsd4_run_cb(struct nfsd4_callback *cb); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); +extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); +struct nfs4_file *find_file(struct knfsd_fh *fh); void put_nfs4_file(struct nfs4_file *fi); +extern void nfs4_put_copy(struct nfsd4_copy *copy); extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); extern void nfs4_put_cpntf_state(struct nfsd_net *nn, @@ -729,9 +693,4 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp); extern int nfsd4_client_record_check(struct nfs4_client *clp); extern void nfsd4_record_grace_done(struct nfsd_net *nn); -static inline bool try_to_expire_client(struct nfs4_client *clp) -{ - cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE); - return clp->cl_state == NFSD4_EXPIRABLE; -} #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 777e24e5da33..b1bc582b0493 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -7,14 +7,16 @@ * Format: * rc * Statistsics for the reply cache - * fh + * fh * statistics for filehandle lookup * io * statistics for IO throughput - * th - * number of threads - * ra - * + * th <10%-20%> <20%-30%> ... <90%-100%> <100%> + * time (seconds) when nfsd thread usage above thresholds + * and number of times that all threads were in use + * ra cache-size <10% <20% <30% ... <100% not-found + * number of times that read-ahead entry was found that deep in + * the cache. * plus generic RPC stats (see net/sunrpc/stats.c) * * Copyright (C) 1995, 1996, 1997 Olaf Kirch @@ -32,28 +34,35 @@ struct svc_stat nfsd_svcstats = { .program = &nfsd_program, }; -static int nfsd_show(struct seq_file *seq, void *v) +static int nfsd_proc_show(struct seq_file *seq, void *v) { int i; - seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); - + seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", + nfsdstats.rchits, + nfsdstats.rcmisses, + nfsdstats.rcnocache, + nfsdstats.fh_stale, + nfsdstats.fh_lookup, + nfsdstats.fh_anon, + nfsdstats.fh_nocache_dir, + nfsdstats.fh_nocache_nondir, + nfsdstats.io_read, + nfsdstats.io_write); /* thread usage: */ - seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); - - /* deprecated thread usage histogram stats */ - for (i = 0; i < 10; i++) - seq_puts(seq, " 0.000"); - - /* deprecated ra-cache stats */ - seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n"); + seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); + for (i=0; i<10; i++) { + unsigned int jifs = nfsdstats.th_usage[i]; + unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ; + seq_printf(seq, " %u.%03u", sec, msec); + } + /* newline and ra-cache */ + seq_printf(seq, "\nra %u", nfsdstats.ra_size); + for (i=0; i<11; i++) + seq_printf(seq, " %u", nfsdstats.ra_depth[i]); + seq_putc(seq, '\n'); + /* show my rpc info */ svc_seq_show(seq, &nfsd_svcstats); @@ -61,10 +70,8 @@ static int nfsd_show(struct seq_file *seq, void *v) /* Show count for individual nfsv4 operations */ /* Writing operation numbers 0 1 2 also for maintaining uniformity */ seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); - for (i = 0; i <= LAST_NFS4_OP; i++) { - seq_printf(seq, " %lld", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)])); - } + for (i = 0; i <= LAST_NFS4_OP; i++) + seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]); seq_putc(seq, '\n'); #endif @@ -72,65 +79,26 @@ static int nfsd_show(struct seq_file *seq, void *v) return 0; } -DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); - -int nfsd_percpu_counters_init(struct percpu_counter counters[], int num) +static int nfsd_proc_open(struct inode *inode, struct file *file) { - int i, err = 0; - - for (i = 0; !err && i < num; i++) - err = percpu_counter_init(&counters[i], 0, GFP_KERNEL); - - if (!err) - return 0; - - for (; i > 0; i--) - percpu_counter_destroy(&counters[i-1]); - - return err; + return single_open(file, nfsd_proc_show, NULL); } -void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num) +static const struct proc_ops nfsd_proc_ops = { + .proc_open = nfsd_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +void +nfsd_stat_init(void) { - int i; - - for (i = 0; i < num; i++) - percpu_counter_set(&counters[i], 0); -} - -void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num) -{ - int i; - - for (i = 0; i < num; i++) - percpu_counter_destroy(&counters[i]); -} - -static int nfsd_stat_counters_init(void) -{ - return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); -} - -static void nfsd_stat_counters_destroy(void) -{ - nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); -} - -int nfsd_stat_init(void) -{ - int err; - - err = nfsd_stat_counters_init(); - if (err) - return err; - svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops); - - return 0; } -void nfsd_stat_shutdown(void) +void +nfsd_stat_shutdown(void) { - nfsd_stat_counters_destroy(); svc_proc_unregister(&init_net, "nfsd"); } diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index 9b43dc3d9991..b23fdac69820 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -8,89 +8,37 @@ #define _NFSD_STATS_H #include -#include -enum { - NFSD_STATS_RC_HITS, /* repcache hits */ - NFSD_STATS_RC_MISSES, /* repcache misses */ - NFSD_STATS_RC_NOCACHE, /* uncached reqs */ - NFSD_STATS_FH_STALE, /* FH stale error */ - NFSD_STATS_IO_READ, /* bytes returned to read requests */ - NFSD_STATS_IO_WRITE, /* bytes passed in write requests */ -#ifdef CONFIG_NFSD_V4 - NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */ - NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP, -#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op)) -#endif - NFSD_STATS_COUNTERS_NUM -}; - struct nfsd_stats { - struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; + unsigned int rchits; /* repcache hits */ + unsigned int rcmisses; /* repcache hits */ + unsigned int rcnocache; /* uncached reqs */ + unsigned int fh_stale; /* FH stale error */ + unsigned int fh_lookup; /* dentry cached */ + unsigned int fh_anon; /* anon file dentry returned */ + unsigned int fh_nocache_dir; /* filehandle not found in dcache */ + unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ + unsigned int io_read; /* bytes returned to read requests */ + unsigned int io_write; /* bytes passed in write requests */ + unsigned int th_cnt; /* number of available threads */ + unsigned int th_usage[10]; /* number of ticks during which n perdeciles + * of available threads were in use */ + unsigned int th_fullcnt; /* number of times last free thread was used */ + unsigned int ra_size; /* size of ra cache */ + unsigned int ra_depth[11]; /* number of times ra entry was found that deep + * in the cache (10percentiles). [10] = not found */ +#ifdef CONFIG_NFSD_V4 + unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ +#endif - atomic_t th_cnt; /* number of available threads */ }; -extern struct nfsd_stats nfsdstats; +extern struct nfsd_stats nfsdstats; extern struct svc_stat nfsd_svcstats; -int nfsd_percpu_counters_init(struct percpu_counter counters[], int num); -void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num); -void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num); -int nfsd_stat_init(void); -void nfsd_stat_shutdown(void); - -static inline void nfsd_stats_rc_hits_inc(void) -{ - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]); -} - -static inline void nfsd_stats_rc_misses_inc(void) -{ - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]); -} - -static inline void nfsd_stats_rc_nocache_inc(void) -{ - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]); -} - -static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp) -{ - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]); - if (exp) - percpu_counter_inc(&exp->ex_stats.counter[EXP_STATS_FH_STALE]); -} - -static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount) -{ - percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount); - if (exp) - percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_READ], amount); -} - -static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount) -{ - percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount); - if (exp) - percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_WRITE], amount); -} - -static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn) -{ - percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]); -} - -static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount) -{ - percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); -} - -static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount) -{ - percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); -} +void nfsd_stat_init(void); +void nfsd_stat_shutdown(void); #endif /* _NFSD_STATS_H */ diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c index f008b95ceec2..90967466a1e5 100644 --- a/fs/nfsd/trace.c +++ b/fs/nfsd/trace.c @@ -1,4 +1,3 @@ -// SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 445d00f00eab..a952f4a9b2a6 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -12,86 +12,6 @@ #include "export.h" #include "nfsfh.h" -#define NFSD_TRACE_PROC_ARG_FIELDS \ - __field(unsigned int, netns_ino) \ - __field(u32, xid) \ - __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ - __array(unsigned char, client, sizeof(struct sockaddr_in6)) - -#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \ - do { \ - __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ - __entry->xid = be32_to_cpu(rqstp->rq_xid); \ - memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ - rqstp->rq_xprt->xpt_locallen); \ - memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ - rqstp->rq_xprt->xpt_remotelen); \ - } while (0); - -#define NFSD_TRACE_PROC_RES_FIELDS \ - __field(unsigned int, netns_ino) \ - __field(u32, xid) \ - __field(unsigned long, status) \ - __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ - __array(unsigned char, client, sizeof(struct sockaddr_in6)) - -#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \ - do { \ - __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ - __entry->xid = be32_to_cpu(rqstp->rq_xid); \ - __entry->status = be32_to_cpu(error); \ - memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ - rqstp->rq_xprt->xpt_locallen); \ - memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ - rqstp->rq_xprt->xpt_remotelen); \ - } while (0); - -DECLARE_EVENT_CLASS(nfsd_xdr_err_class, - TP_PROTO( - const struct svc_rqst *rqstp - ), - TP_ARGS(rqstp), - TP_STRUCT__entry( - NFSD_TRACE_PROC_ARG_FIELDS - - __field(u32, vers) - __field(u32, proc) - ), - TP_fast_assign( - NFSD_TRACE_PROC_ARG_ASSIGNMENTS - - __entry->vers = rqstp->rq_vers; - __entry->proc = rqstp->rq_proc; - ), - TP_printk("xid=0x%08x vers=%u proc=%u", - __entry->xid, __entry->vers, __entry->proc - ) -); - -#define DEFINE_NFSD_XDR_ERR_EVENT(name) \ -DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \ - TP_PROTO(const struct svc_rqst *rqstp), \ - TP_ARGS(rqstp)) - -DEFINE_NFSD_XDR_ERR_EVENT(garbage_args); -DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); - -#define show_nfsd_may_flags(x) \ - __print_flags(x, "|", \ - { NFSD_MAY_EXEC, "EXEC" }, \ - { NFSD_MAY_WRITE, "WRITE" }, \ - { NFSD_MAY_READ, "READ" }, \ - { NFSD_MAY_SATTR, "SATTR" }, \ - { NFSD_MAY_TRUNC, "TRUNC" }, \ - { NFSD_MAY_LOCK, "LOCK" }, \ - { NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \ - { NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \ - { NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \ - { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \ - { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \ - { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \ - { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) - TRACE_EVENT(nfsd_compound, TP_PROTO(const struct svc_rqst *rqst, u32 args_opcnt), @@ -131,56 +51,6 @@ TRACE_EVENT(nfsd_compound_status, __get_str(name), __entry->status) ) -TRACE_EVENT(nfsd_compound_decode_err, - TP_PROTO( - const struct svc_rqst *rqstp, - u32 args_opcnt, - u32 resp_opcnt, - u32 opnum, - __be32 status - ), - TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status), - TP_STRUCT__entry( - NFSD_TRACE_PROC_RES_FIELDS - - __field(u32, args_opcnt) - __field(u32, resp_opcnt) - __field(u32, opnum) - ), - TP_fast_assign( - NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) - - __entry->args_opcnt = args_opcnt; - __entry->resp_opcnt = resp_opcnt; - __entry->opnum = opnum; - ), - TP_printk("op=%u/%u opnum=%u status=%lu", - __entry->resp_opcnt, __entry->args_opcnt, - __entry->opnum, __entry->status) -); - -TRACE_EVENT(nfsd_compound_encode_err, - TP_PROTO( - const struct svc_rqst *rqstp, - u32 opnum, - __be32 status - ), - TP_ARGS(rqstp, opnum, status), - TP_STRUCT__entry( - NFSD_TRACE_PROC_RES_FIELDS - - __field(u32, opnum) - ), - TP_fast_assign( - NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) - - __entry->opnum = opnum; - ), - TP_printk("opnum=%u status=%lu", - __entry->opnum, __entry->status) -); - - DECLARE_EVENT_CLASS(nfsd_fh_err_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -377,106 +247,10 @@ DEFINE_EVENT(nfsd_err_class, nfsd_##name, \ DEFINE_NFSD_ERR_EVENT(read_err); DEFINE_NFSD_ERR_EVENT(write_err); -TRACE_EVENT(nfsd_dirent, - TP_PROTO(struct svc_fh *fhp, - u64 ino, - const char *name, - int namlen), - TP_ARGS(fhp, ino, name, namlen), - TP_STRUCT__entry( - __field(u32, fh_hash) - __field(u64, ino) - __field(int, len) - __dynamic_array(unsigned char, name, namlen) - ), - TP_fast_assign( - __entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0; - __entry->ino = ino; - __entry->len = namlen; - memcpy(__get_str(name), name, namlen); - ), - TP_printk("fh_hash=0x%08x ino=%llu name=%.*s", - __entry->fh_hash, __entry->ino, - __entry->len, __get_str(name)) -) - -DECLARE_EVENT_CLASS(nfsd_copy_err_class, - TP_PROTO(struct svc_rqst *rqstp, - struct svc_fh *src_fhp, - loff_t src_offset, - struct svc_fh *dst_fhp, - loff_t dst_offset, - u64 count, - int status), - TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status), - TP_STRUCT__entry( - __field(u32, xid) - __field(u32, src_fh_hash) - __field(loff_t, src_offset) - __field(u32, dst_fh_hash) - __field(loff_t, dst_offset) - __field(u64, count) - __field(int, status) - ), - TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle); - __entry->src_offset = src_offset; - __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle); - __entry->dst_offset = dst_offset; - __entry->count = count; - __entry->status = status; - ), - TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld " - "dst_fh_hash=0x%08x dst_offset=%lld " - "count=%llu status=%d", - __entry->xid, __entry->src_fh_hash, __entry->src_offset, - __entry->dst_fh_hash, __entry->dst_offset, - (unsigned long long)__entry->count, - __entry->status) -) - -#define DEFINE_NFSD_COPY_ERR_EVENT(name) \ -DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \ - TP_PROTO(struct svc_rqst *rqstp, \ - struct svc_fh *src_fhp, \ - loff_t src_offset, \ - struct svc_fh *dst_fhp, \ - loff_t dst_offset, \ - u64 count, \ - int status), \ - TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \ - count, status)) - -DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); - #include "state.h" #include "filecache.h" #include "vfs.h" -TRACE_EVENT(nfsd_delegret_wakeup, - TP_PROTO( - const struct svc_rqst *rqstp, - const struct inode *inode, - long timeo - ), - TP_ARGS(rqstp, inode, timeo), - TP_STRUCT__entry( - __field(u32, xid) - __field(const void *, inode) - __field(long, timeo) - ), - TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->inode = inode; - __entry->timeo = timeo; - ), - TP_printk("xid=0x%08x inode=%p%s", - __entry->xid, __entry->inode, - __entry->timeo == 0 ? " (timed out)" : "" - ) -); - DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), TP_ARGS(stp), @@ -517,7 +291,7 @@ DEFINE_STATEID_EVENT(layout_recall_release); DEFINE_STATEID_EVENT(open); DEFINE_STATEID_EVENT(deleg_read); -DEFINE_STATEID_EVENT(deleg_return); +DEFINE_STATEID_EVENT(deleg_break); DEFINE_STATEID_EVENT(deleg_recall); DECLARE_EVENT_CLASS(nfsd_stateseqid_class, @@ -550,61 +324,6 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ DEFINE_STATESEQID_EVENT(preprocess); DEFINE_STATESEQID_EVENT(open_confirm); -TRACE_DEFINE_ENUM(NFS4_OPEN_STID); -TRACE_DEFINE_ENUM(NFS4_LOCK_STID); -TRACE_DEFINE_ENUM(NFS4_DELEG_STID); -TRACE_DEFINE_ENUM(NFS4_CLOSED_STID); -TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID); -TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID); -TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID); - -#define show_stid_type(x) \ - __print_flags(x, "|", \ - { NFS4_OPEN_STID, "OPEN" }, \ - { NFS4_LOCK_STID, "LOCK" }, \ - { NFS4_DELEG_STID, "DELEG" }, \ - { NFS4_CLOSED_STID, "CLOSED" }, \ - { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \ - { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \ - { NFS4_LAYOUT_STID, "LAYOUT" }) - -DECLARE_EVENT_CLASS(nfsd_stid_class, - TP_PROTO( - const struct nfs4_stid *stid - ), - TP_ARGS(stid), - TP_STRUCT__entry( - __field(unsigned long, sc_type) - __field(int, sc_count) - __field(u32, cl_boot) - __field(u32, cl_id) - __field(u32, si_id) - __field(u32, si_generation) - ), - TP_fast_assign( - const stateid_t *stp = &stid->sc_stateid; - - __entry->sc_type = stid->sc_type; - __entry->sc_count = refcount_read(&stid->sc_count); - __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; - __entry->cl_id = stp->si_opaque.so_clid.cl_id; - __entry->si_id = stp->si_opaque.so_id; - __entry->si_generation = stp->si_generation; - ), - TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s", - __entry->cl_boot, __entry->cl_id, - __entry->si_id, __entry->si_generation, - __entry->sc_count, show_stid_type(__entry->sc_type) - ) -); - -#define DEFINE_STID_EVENT(name) \ -DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \ - TP_PROTO(const struct nfs4_stid *stid), \ - TP_ARGS(stid)) - -DEFINE_STID_EVENT(revoke); - DECLARE_EVENT_CLASS(nfsd_clientid_class, TP_PROTO(const clientid_t *clid), TP_ARGS(clid), @@ -624,12 +343,7 @@ DEFINE_EVENT(nfsd_clientid_class, nfsd_clid_##name, \ TP_PROTO(const clientid_t *clid), \ TP_ARGS(clid)) -DEFINE_CLIENTID_EVENT(expire_unconf); -DEFINE_CLIENTID_EVENT(reclaim_complete); -DEFINE_CLIENTID_EVENT(confirmed); -DEFINE_CLIENTID_EVENT(destroyed); -DEFINE_CLIENTID_EVENT(admin_expired); -DEFINE_CLIENTID_EVENT(replaced); +DEFINE_CLIENTID_EVENT(expired); DEFINE_CLIENTID_EVENT(purged); DEFINE_CLIENTID_EVENT(renew); DEFINE_CLIENTID_EVENT(stale); @@ -654,145 +368,56 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \ DEFINE_NET_EVENT(grace_start); DEFINE_NET_EVENT(grace_complete); -TRACE_EVENT(nfsd_writeverf_reset, - TP_PROTO( - const struct nfsd_net *nn, - const struct svc_rqst *rqstp, - int error - ), - TP_ARGS(nn, rqstp, error), - TP_STRUCT__entry( - __field(unsigned long long, boot_time) - __field(u32, xid) - __field(int, error) - __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) - ), - TP_fast_assign( - __entry->boot_time = nn->boot_time; - __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->error = error; - - /* avoid seqlock inside TP_fast_assign */ - memcpy(__entry->verifier, nn->writeverf, - NFS4_VERIFIER_SIZE); - ), - TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s", - __entry->boot_time, __entry->xid, __entry->error, - __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) - ) -); - -TRACE_EVENT(nfsd_clid_cred_mismatch, - TP_PROTO( - const struct nfs4_client *clp, - const struct svc_rqst *rqstp - ), - TP_ARGS(clp, rqstp), - TP_STRUCT__entry( - __field(u32, cl_boot) - __field(u32, cl_id) - __field(unsigned long, cl_flavor) - __field(unsigned long, new_flavor) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) - ), - TP_fast_assign( - __entry->cl_boot = clp->cl_clientid.cl_boot; - __entry->cl_id = clp->cl_clientid.cl_id; - __entry->cl_flavor = clp->cl_cred.cr_flavor; - __entry->new_flavor = rqstp->rq_cred.cr_flavor; - memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote, - sizeof(struct sockaddr_in6)); - ), - TP_printk("client %08x:%08x flavor=%s, conflict=%s from addr=%pISpc", - __entry->cl_boot, __entry->cl_id, - show_nfsd_authflavor(__entry->cl_flavor), - show_nfsd_authflavor(__entry->new_flavor), __entry->addr - ) -) - -TRACE_EVENT(nfsd_clid_verf_mismatch, - TP_PROTO( - const struct nfs4_client *clp, - const struct svc_rqst *rqstp, - const nfs4_verifier *verf - ), - TP_ARGS(clp, rqstp, verf), - TP_STRUCT__entry( - __field(u32, cl_boot) - __field(u32, cl_id) - __array(unsigned char, cl_verifier, NFS4_VERIFIER_SIZE) - __array(unsigned char, new_verifier, NFS4_VERIFIER_SIZE) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) - ), - TP_fast_assign( - __entry->cl_boot = clp->cl_clientid.cl_boot; - __entry->cl_id = clp->cl_clientid.cl_id; - memcpy(__entry->cl_verifier, (void *)&clp->cl_verifier, - NFS4_VERIFIER_SIZE); - memcpy(__entry->new_verifier, (void *)verf, - NFS4_VERIFIER_SIZE); - memcpy(__entry->addr, &rqstp->rq_xprt->xpt_remote, - sizeof(struct sockaddr_in6)); - ), - TP_printk("client %08x:%08x verf=0x%s, updated=0x%s from addr=%pISpc", - __entry->cl_boot, __entry->cl_id, - __print_hex_str(__entry->cl_verifier, NFS4_VERIFIER_SIZE), - __print_hex_str(__entry->new_verifier, NFS4_VERIFIER_SIZE), - __entry->addr - ) -); - -DECLARE_EVENT_CLASS(nfsd_clid_class, +TRACE_EVENT(nfsd_clid_inuse_err, TP_PROTO(const struct nfs4_client *clp), TP_ARGS(clp), TP_STRUCT__entry( __field(u32, cl_boot) __field(u32, cl_id) __array(unsigned char, addr, sizeof(struct sockaddr_in6)) - __field(unsigned long, flavor) - __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) - __dynamic_array(char, name, clp->cl_name.len + 1) + __field(unsigned int, namelen) + __dynamic_array(unsigned char, name, clp->cl_name.len) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; memcpy(__entry->addr, &clp->cl_addr, sizeof(struct sockaddr_in6)); - __entry->flavor = clp->cl_cred.cr_flavor; - memcpy(__entry->verifier, (void *)&clp->cl_verifier, - NFS4_VERIFIER_SIZE); - memcpy(__get_str(name), clp->cl_name.data, clp->cl_name.len); - __get_str(name)[clp->cl_name.len] = '\0'; + __entry->namelen = clp->cl_name.len; + memcpy(__get_dynamic_array(name), clp->cl_name.data, + clp->cl_name.len); ), - TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x", - __entry->addr, __get_str(name), - __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE), - show_nfsd_authflavor(__entry->flavor), + TP_printk("nfs4_clientid %.*s already in use by %pISpc, client %08x:%08x", + __entry->namelen, __get_str(name), __entry->addr, __entry->cl_boot, __entry->cl_id) -); +) -#define DEFINE_CLID_EVENT(name) \ -DEFINE_EVENT(nfsd_clid_class, nfsd_clid_##name, \ - TP_PROTO(const struct nfs4_client *clp), \ - TP_ARGS(clp)) +TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); +TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); +TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); +TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE); +TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED); -DEFINE_CLID_EVENT(fresh); -DEFINE_CLID_EVENT(confirmed_r); - -/* - * from fs/nfsd/filecache.h - */ #define show_nf_flags(val) \ __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ - { 1 << NFSD_FILE_GC, "GC" }) + { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ + { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) + +/* FIXME: This should probably be fleshed out in the future. */ +#define show_nf_may(val) \ + __print_flags(val, "|", \ + { NFSD_MAY_READ, "READ" }, \ + { NFSD_MAY_WRITE, "WRITE" }, \ + { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), TP_ARGS(nf), TP_STRUCT__entry( + __field(unsigned int, nf_hashval) __field(void *, nf_inode) __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -800,17 +425,19 @@ DECLARE_EVENT_CLASS(nfsd_file_class, __field(struct file *, nf_file) ), TP_fast_assign( + __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p", + TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p", + __entry->nf_hashval, __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), + show_nf_may(__entry->nf_may), __entry->nf_file) ) @@ -819,60 +446,34 @@ DEFINE_EVENT(nfsd_file_class, name, \ TP_PROTO(struct nfsd_file *nf), \ TP_ARGS(nf)) -DEFINE_NFSD_FILE_EVENT(nfsd_file_free); +DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); +DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); -DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); - -TRACE_EVENT(nfsd_file_alloc, - TP_PROTO( - const struct nfsd_file *nf - ), - TP_ARGS(nf), - TP_STRUCT__entry( - __field(const void *, nf_inode) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(unsigned int, nf_ref) - ), - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_flags = nf->nf_flags; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->nf_may = nf->nf_may; - ), - TP_printk("inode=%p ref=%u flags=%s may=%s", - __entry->nf_inode, __entry->nf_ref, - show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may) - ) -); +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); TRACE_EVENT(nfsd_file_acquire, - TP_PROTO( - const struct svc_rqst *rqstp, - const struct inode *inode, - unsigned int may_flags, - const struct nfsd_file *nf, - __be32 status - ), + TP_PROTO(struct svc_rqst *rqstp, unsigned int hash, + struct inode *inode, unsigned int may_flags, + struct nfsd_file *nf, __be32 status), - TP_ARGS(rqstp, inode, may_flags, nf, status), + TP_ARGS(rqstp, hash, inode, may_flags, nf, status), TP_STRUCT__entry( __field(u32, xid) - __field(const void *, inode) - __field(unsigned long, may_flags) - __field(unsigned int, nf_ref) + __field(unsigned int, hash) + __field(void *, inode) + __field(unsigned int, may_flags) + __field(int, nf_ref) __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(const void *, nf_file) + __field(unsigned char, nf_may) + __field(struct file *, nf_file) __field(u32, status) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; @@ -882,132 +483,40 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u", - __entry->xid, __entry->inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), - __entry->nf_file, __entry->status - ) + TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", + __entry->xid, __entry->hash, __entry->inode, + show_nf_may(__entry->may_flags), __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nf_may(__entry->nf_may), __entry->nf_file, + __entry->status) ); -TRACE_EVENT(nfsd_file_insert_err, - TP_PROTO( - const struct svc_rqst *rqstp, - const struct inode *inode, - unsigned int may_flags, - long error - ), - TP_ARGS(rqstp, inode, may_flags, error), +DECLARE_EVENT_CLASS(nfsd_file_search_class, + TP_PROTO(struct inode *inode, unsigned int hash, int found), + TP_ARGS(inode, hash, found), TP_STRUCT__entry( - __field(u32, xid) - __field(const void *, inode) - __field(unsigned long, may_flags) - __field(long, error) - ), - TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->inode = inode; - __entry->may_flags = may_flags; - __entry->error = error; - ), - TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld", - __entry->xid, __entry->inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->error - ) -); - -TRACE_EVENT(nfsd_file_cons_err, - TP_PROTO( - const struct svc_rqst *rqstp, - const struct inode *inode, - unsigned int may_flags, - const struct nfsd_file *nf - ), - TP_ARGS(rqstp, inode, may_flags, nf), - TP_STRUCT__entry( - __field(u32, xid) - __field(const void *, inode) - __field(unsigned long, may_flags) - __field(unsigned int, nf_ref) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(const void *, nf_file) - ), - TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->inode = inode; - __entry->may_flags = may_flags; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_file = nf->nf_file; - ), - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", - __entry->xid, __entry->inode, - show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, - show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file - ) -); - -DECLARE_EVENT_CLASS(nfsd_file_open_class, - TP_PROTO(const struct nfsd_file *nf, __be32 status), - TP_ARGS(nf, status), - TP_STRUCT__entry( - __field(void *, nf_inode) /* cannot be dereferenced */ - __field(int, nf_ref) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(void *, nf_file) /* cannot be dereferenced */ - ), - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_file = nf->nf_file; - ), - TP_printk("inode=%p ref=%d flags=%s may=%s file=%p", - __entry->nf_inode, - __entry->nf_ref, - show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), - __entry->nf_file) -) - -#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ -DEFINE_EVENT(nfsd_file_open_class, name, \ - TP_PROTO( \ - const struct nfsd_file *nf, \ - __be32 status \ - ), \ - TP_ARGS(nf, status)) - -DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); -DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); - -TRACE_EVENT(nfsd_file_is_cached, - TP_PROTO( - const struct inode *inode, - int found - ), - TP_ARGS(inode, found), - TP_STRUCT__entry( - __field(const struct inode *, inode) + __field(struct inode *, inode) + __field(unsigned int, hash) __field(int, found) ), TP_fast_assign( __entry->inode = inode; + __entry->hash = hash; __entry->found = found; ), - TP_printk("inode=%p is %scached", - __entry->inode, - __entry->found ? "" : "not " - ) + TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, + __entry->inode, __entry->found) ); +#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ +DEFINE_EVENT(nfsd_file_search_class, name, \ + TP_PROTO(struct inode *inode, unsigned int hash, int found), \ + TP_ARGS(inode, hash, found)) + +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); +DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); + TRACE_EVENT(nfsd_file_fsnotify_handle_event, TP_PROTO(struct inode *inode, u32 mask), TP_ARGS(inode, mask), @@ -1023,95 +532,10 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->mode = inode->i_mode; __entry->mask = mask; ), - TP_printk("inode=%p nlink=%u mode=0%ho mask=0x%x", __entry->inode, + TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode, __entry->nlink, __entry->mode, __entry->mask) ); -DECLARE_EVENT_CLASS(nfsd_file_gc_class, - TP_PROTO( - const struct nfsd_file *nf - ), - TP_ARGS(nf), - TP_STRUCT__entry( - __field(void *, nf_inode) - __field(void *, nf_file) - __field(int, nf_ref) - __field(unsigned long, nf_flags) - ), - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_file = nf->nf_file; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->nf_flags = nf->nf_flags; - ), - TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p", - __entry->nf_inode, __entry->nf_ref, - show_nf_flags(__entry->nf_flags), - __entry->nf_file - ) -); - -#define DEFINE_NFSD_FILE_GC_EVENT(name) \ -DEFINE_EVENT(nfsd_file_gc_class, name, \ - TP_PROTO( \ - const struct nfsd_file *nf \ - ), \ - TP_ARGS(nf)) - -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); - -DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, - TP_PROTO( - unsigned long removed, - unsigned long remaining - ), - TP_ARGS(removed, remaining), - TP_STRUCT__entry( - __field(unsigned long, removed) - __field(unsigned long, remaining) - ), - TP_fast_assign( - __entry->removed = removed; - __entry->remaining = remaining; - ), - TP_printk("%lu entries removed, %lu remaining", - __entry->removed, __entry->remaining) -); - -#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name) \ -DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ - TP_PROTO( \ - unsigned long removed, \ - unsigned long remaining \ - ), \ - TP_ARGS(removed, remaining)) - -DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); -DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); - -TRACE_EVENT(nfsd_file_close, - TP_PROTO( - const struct inode *inode - ), - TP_ARGS(inode), - TP_STRUCT__entry( - __field(const void *, inode) - ), - TP_fast_assign( - __entry->inode = inode; - ), - TP_printk("inode=%p", - __entry->inode - ) -); - #include "cache.h" TRACE_DEFINE_ENUM(RC_DROPIT); @@ -1192,9 +616,9 @@ TRACE_EVENT(nfsd_cb_args, memcpy(__entry->addr, &conn->cb_addr, sizeof(struct sockaddr_in6)); ), - TP_printk("addr=%pISpc client %08x:%08x prog=%u ident=%u", - __entry->addr, __entry->cl_boot, __entry->cl_id, - __entry->prog, __entry->ident) + TP_printk("client %08x:%08x callback addr=%pISpc prog=%u ident=%u", + __entry->cl_boot, __entry->cl_id, + __entry->addr, __entry->prog, __entry->ident) ); TRACE_EVENT(nfsd_cb_nodelegs, @@ -1211,6 +635,11 @@ TRACE_EVENT(nfsd_cb_nodelegs, TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id) ) +TRACE_DEFINE_ENUM(NFSD4_CB_UP); +TRACE_DEFINE_ENUM(NFSD4_CB_UNKNOWN); +TRACE_DEFINE_ENUM(NFSD4_CB_DOWN); +TRACE_DEFINE_ENUM(NFSD4_CB_FAULT); + #define show_cb_state(val) \ __print_symbolic(val, \ { NFSD4_CB_UP, "UP" }, \ @@ -1244,53 +673,10 @@ DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \ TP_PROTO(const struct nfs4_client *clp), \ TP_ARGS(clp)) +DEFINE_NFSD_CB_EVENT(setup); DEFINE_NFSD_CB_EVENT(state); -DEFINE_NFSD_CB_EVENT(probe); -DEFINE_NFSD_CB_EVENT(lost); DEFINE_NFSD_CB_EVENT(shutdown); -TRACE_DEFINE_ENUM(RPC_AUTH_NULL); -TRACE_DEFINE_ENUM(RPC_AUTH_UNIX); -TRACE_DEFINE_ENUM(RPC_AUTH_GSS); -TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5); -TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I); -TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P); - -#define show_nfsd_authflavor(val) \ - __print_symbolic(val, \ - { RPC_AUTH_NULL, "none" }, \ - { RPC_AUTH_UNIX, "sys" }, \ - { RPC_AUTH_GSS, "gss" }, \ - { RPC_AUTH_GSS_KRB5, "krb5" }, \ - { RPC_AUTH_GSS_KRB5I, "krb5i" }, \ - { RPC_AUTH_GSS_KRB5P, "krb5p" }) - -TRACE_EVENT(nfsd_cb_setup, - TP_PROTO(const struct nfs4_client *clp, - const char *netid, - rpc_authflavor_t authflavor - ), - TP_ARGS(clp, netid, authflavor), - TP_STRUCT__entry( - __field(u32, cl_boot) - __field(u32, cl_id) - __field(unsigned long, authflavor) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) - __array(unsigned char, netid, 8) - ), - TP_fast_assign( - __entry->cl_boot = clp->cl_clientid.cl_boot; - __entry->cl_id = clp->cl_clientid.cl_id; - strlcpy(__entry->netid, netid, sizeof(__entry->netid)); - __entry->authflavor = authflavor; - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); - ), - TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s", - __entry->addr, __entry->cl_boot, __entry->cl_id, - __entry->netid, show_nfsd_authflavor(__entry->authflavor)) -); - TRACE_EVENT(nfsd_cb_setup_err, TP_PROTO( const struct nfs4_client *clp, @@ -1314,138 +700,54 @@ TRACE_EVENT(nfsd_cb_setup_err, __entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error) ); -TRACE_EVENT(nfsd_cb_recall, - TP_PROTO( - const struct nfs4_stid *stid - ), - TP_ARGS(stid), - TP_STRUCT__entry( - __field(u32, cl_boot) - __field(u32, cl_id) - __field(u32, si_id) - __field(u32, si_generation) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) - ), - TP_fast_assign( - const stateid_t *stp = &stid->sc_stateid; - const struct nfs4_client *clp = stid->sc_client; - - __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; - __entry->cl_id = stp->si_opaque.so_clid.cl_id; - __entry->si_id = stp->si_opaque.so_id; - __entry->si_generation = stp->si_generation; - if (clp) - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); - else - memset(__entry->addr, 0, sizeof(struct sockaddr_in6)); - ), - TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x", - __entry->addr, __entry->cl_boot, __entry->cl_id, - __entry->si_id, __entry->si_generation) -); - -TRACE_EVENT(nfsd_cb_notify_lock, - TP_PROTO( - const struct nfs4_lockowner *lo, - const struct nfsd4_blocked_lock *nbl - ), - TP_ARGS(lo, nbl), - TP_STRUCT__entry( - __field(u32, cl_boot) - __field(u32, cl_id) - __field(u32, fh_hash) - __array(unsigned char, addr, sizeof(struct sockaddr_in6)) - ), - TP_fast_assign( - const struct nfs4_client *clp = lo->lo_owner.so_client; - - __entry->cl_boot = clp->cl_clientid.cl_boot; - __entry->cl_id = clp->cl_clientid.cl_id; - __entry->fh_hash = knfsd_fh_hash(&nbl->nbl_fh); - memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, - sizeof(struct sockaddr_in6)); - ), - TP_printk("addr=%pISpc client %08x:%08x fh_hash=0x%08x", - __entry->addr, __entry->cl_boot, __entry->cl_id, - __entry->fh_hash) -); - -TRACE_EVENT(nfsd_cb_offload, +TRACE_EVENT(nfsd_cb_work, TP_PROTO( const struct nfs4_client *clp, - const stateid_t *stp, - const struct knfsd_fh *fh, - u64 count, - __be32 status + const char *procedure ), - TP_ARGS(clp, stp, fh, count, status), + TP_ARGS(clp, procedure), TP_STRUCT__entry( __field(u32, cl_boot) __field(u32, cl_id) - __field(u32, si_id) - __field(u32, si_generation) - __field(u32, fh_hash) - __field(int, status) - __field(u64, count) + __string(procedure, procedure) __array(unsigned char, addr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( - __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; - __entry->cl_id = stp->si_opaque.so_clid.cl_id; - __entry->si_id = stp->si_opaque.so_id; - __entry->si_generation = stp->si_generation; - __entry->fh_hash = knfsd_fh_hash(fh); - __entry->status = be32_to_cpu(status); - __entry->count = count; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_str(procedure, procedure) memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, sizeof(struct sockaddr_in6)); ), - TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x fh_hash=0x%08x count=%llu status=%d", + TP_printk("addr=%pISpc client %08x:%08x procedure=%s", __entry->addr, __entry->cl_boot, __entry->cl_id, - __entry->si_id, __entry->si_generation, - __entry->fh_hash, __entry->count, __entry->status) + __get_str(procedure)) ); -DECLARE_EVENT_CLASS(nfsd_cb_done_class, +TRACE_EVENT(nfsd_cb_done, TP_PROTO( - const stateid_t *stp, - const struct rpc_task *task + const struct nfs4_client *clp, + int status ), - TP_ARGS(stp, task), + TP_ARGS(clp, status), TP_STRUCT__entry( __field(u32, cl_boot) __field(u32, cl_id) - __field(u32, si_id) - __field(u32, si_generation) __field(int, status) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( - __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; - __entry->cl_id = stp->si_opaque.so_clid.cl_id; - __entry->si_id = stp->si_opaque.so_id; - __entry->si_generation = stp->si_generation; - __entry->status = task->tk_status; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->status = status; + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); ), - TP_printk("client %08x:%08x stateid %08x:%08x status=%d", - __entry->cl_boot, __entry->cl_id, __entry->si_id, - __entry->si_generation, __entry->status - ) + TP_printk("addr=%pISpc client %08x:%08x status=%d", + __entry->addr, __entry->cl_boot, __entry->cl_id, + __entry->status) ); -#define DEFINE_NFSD_CB_DONE_EVENT(name) \ -DEFINE_EVENT(nfsd_cb_done_class, name, \ - TP_PROTO( \ - const stateid_t *stp, \ - const struct rpc_task *task \ - ), \ - TP_ARGS(stp, task)) - -DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done); -DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); -DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); -DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); - #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0ea05ddff0d0..31edb883afd0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -32,13 +32,14 @@ #include #include +#ifdef CONFIG_NFSD_V3 #include "xdr3.h" +#endif /* CONFIG_NFSD_V3 */ #ifdef CONFIG_NFSD_V4 #include "../internal.h" #include "acl.h" #include "idmap.h" -#include "xdr4.h" #endif /* CONFIG_NFSD_V4 */ #include "nfsd.h" @@ -48,69 +49,6 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP -/** - * nfserrno - Map Linux errnos to NFS errnos - * @errno: POSIX(-ish) error code to be mapped - * - * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If - * it's an error we don't expect, log it once and return nfserr_io. - */ -__be32 -nfserrno (int errno) -{ - static struct { - __be32 nfserr; - int syserr; - } nfs_errtbl[] = { - { nfs_ok, 0 }, - { nfserr_perm, -EPERM }, - { nfserr_noent, -ENOENT }, - { nfserr_io, -EIO }, - { nfserr_nxio, -ENXIO }, - { nfserr_fbig, -E2BIG }, - { nfserr_stale, -EBADF }, - { nfserr_acces, -EACCES }, - { nfserr_exist, -EEXIST }, - { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, - { nfserr_nodev, -ENODEV }, - { nfserr_notdir, -ENOTDIR }, - { nfserr_isdir, -EISDIR }, - { nfserr_inval, -EINVAL }, - { nfserr_fbig, -EFBIG }, - { nfserr_nospc, -ENOSPC }, - { nfserr_rofs, -EROFS }, - { nfserr_mlink, -EMLINK }, - { nfserr_nametoolong, -ENAMETOOLONG }, - { nfserr_notempty, -ENOTEMPTY }, - { nfserr_dquot, -EDQUOT }, - { nfserr_stale, -ESTALE }, - { nfserr_jukebox, -ETIMEDOUT }, - { nfserr_jukebox, -ERESTARTSYS }, - { nfserr_jukebox, -EAGAIN }, - { nfserr_jukebox, -EWOULDBLOCK }, - { nfserr_jukebox, -ENOMEM }, - { nfserr_io, -ETXTBSY }, - { nfserr_notsupp, -EOPNOTSUPP }, - { nfserr_toosmall, -ETOOSMALL }, - { nfserr_serverfault, -ESERVERFAULT }, - { nfserr_serverfault, -ENFILE }, - { nfserr_io, -EREMOTEIO }, - { nfserr_stale, -EOPENSTALE }, - { nfserr_io, -EUCLEAN }, - { nfserr_perm, -ENOKEY }, - { nfserr_no_grace, -ENOGRACE}, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { - if (nfs_errtbl[i].syserr == errno) - return nfs_errtbl[i].nfserr; - } - WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno); - return nfserr_io; -} - /* * Called from nfsd_lookup and encode_dirent. Check if we have crossed * a mount point. @@ -261,13 +199,27 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - dentry = lookup_one_len_unlocked(name, dparent, len); + /* + * In the nfsd4_open() case, this may be held across + * subsequent open and delegation acquisition which may + * need to take the child's i_mutex: + */ + fh_lock_nested(fhp, I_MUTEX_PARENT); + dentry = lookup_one_len(name, dparent, len); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; if (nfsd_mountpoint(dentry, exp)) { - host_err = nfsd_cross_mnt(rqstp, &dentry, &exp); - if (host_err) { + /* + * We don't need the i_mutex after all. It's + * still possible we could open this (regular + * files can be mountpoints too), but the + * i_mutex is just there to prevent renames of + * something that we might be about to delegate, + * and a mountpoint won't be renamed: + */ + fh_unlock(fhp); + if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { dput(dentry); goto out_nfserr; } @@ -282,15 +234,7 @@ out_nfserr: return nfserrno(host_err); } -/** - * nfsd_lookup - look up a single path component for nfsd - * - * @rqstp: the request context - * @fhp: the file handle of the directory - * @name: the component name, or %NULL to look up parent - * @len: length of name to examine - * @resfh: pointer to pre-initialised filehandle to hold result. - * +/* * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put * @@ -300,11 +244,11 @@ out_nfserr: * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 - * + * NeilBrown */ __be32 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, - unsigned int len, struct svc_fh *resfh) + unsigned int len, struct svc_fh *resfh) { struct svc_export *exp; struct dentry *dentry; @@ -362,10 +306,6 @@ commit_metadata(struct svc_fh *fhp) static void nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - /* Ignore mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; @@ -419,77 +359,21 @@ out_nfserrno: return nfserrno(host_err); } -static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) -{ - int host_err; - - if (iap->ia_valid & ATTR_SIZE) { - /* - * RFC5661, Section 18.30.4: - * Changing the size of a file with SETATTR indirectly - * changes the time_modify and change attributes. - * - * (and similar for the older RFCs) - */ - struct iattr size_attr = { - .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, - .ia_size = iap->ia_size, - }; - - if (iap->ia_size < 0) - return -EFBIG; - - host_err = notify_change(dentry, &size_attr, NULL); - if (host_err) - return host_err; - iap->ia_valid &= ~ATTR_SIZE; - - /* - * Avoid the additional setattr call below if the only other - * attribute that the client sends is the mtime, as we update - * it as part of the size change above. - */ - if ((iap->ia_valid & ~ATTR_MTIME) == 0) - return 0; - } - - if (!iap->ia_valid) - return 0; - - iap->ia_valid |= ATTR_CTIME; - return notify_change(dentry, iap, NULL); -} - -/** - * nfsd_setattr - Set various file attributes. - * @rqstp: controlling RPC transaction - * @fhp: filehandle of target - * @attr: attributes to set - * @check_guard: set to 1 if guardtime is a valid timestamp - * @guardtime: do not act if ctime.tv_sec does not match this timestamp - * - * This call may adjust the contents of @attr (in particular, this - * call may change the bits in the na_iattr.ia_valid field). - * - * Returns nfs_ok on success, otherwise an NFS status code is - * returned. Caller must release @fhp by calling fh_put in either - * case. +/* + * Set various file attributes. After this call fhp needs an fh_put. */ __be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfsd_attrs *attr, +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; - struct iattr *iap = attr->na_iattr; int accmode = NFSD_MAY_SATTR; umode_t ftype = 0; __be32 err; - int host_err = 0; + int host_err; bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); - int retries; if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -525,6 +409,13 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = fhp->fh_dentry; inode = d_inode(dentry); + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + + if (!iap->ia_valid) + return 0; + nfsd_sanitize_attrs(inode, iap); if (check_guard && guardtime != inode->i_ctime.tv_sec) @@ -543,41 +434,45 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; } - inode_lock(inode); - fh_fill_pre_attrs(fhp); - for (retries = 1;;) { - struct iattr attrs; + fh_lock(fhp); + if (size_change) { + /* + * RFC5661, Section 18.30.4: + * Changing the size of a file with SETATTR indirectly + * changes the time_modify and change attributes. + * + * (and similar for the older RFCs) + */ + struct iattr size_attr = { + .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, + .ia_size = iap->ia_size, + }; + + host_err = notify_change(dentry, &size_attr, NULL); + if (host_err) + goto out_unlock; + iap->ia_valid &= ~ATTR_SIZE; /* - * notify_change() can alter its iattr argument, making - * @iap unsuitable for submission multiple times. Make a - * copy for every loop iteration. + * Avoid the additional setattr call below if the only other + * attribute that the client sends is the mtime, as we update + * it as part of the size change above. */ - attrs = *iap; - host_err = __nfsd_setattr(dentry, &attrs); - if (host_err != -EAGAIN || !retries--) - break; - if (!nfsd_wait_for_delegreturn(rqstp, inode)) - break; + if ((iap->ia_valid & ~ATTR_MTIME) == 0) + goto out_unlock; } - if (attr->na_seclabel && attr->na_seclabel->len) - attr->na_labelerr = security_inode_setsecctx(dentry, - attr->na_seclabel->data, attr->na_seclabel->len); - if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl) - attr->na_aclerr = set_posix_acl(inode, ACL_TYPE_ACCESS, - attr->na_pacl); - if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && - !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode)) - attr->na_aclerr = set_posix_acl(inode, ACL_TYPE_DEFAULT, - attr->na_dpacl); - fh_fill_post_attrs(fhp); - inode_unlock(inode); + + iap->ia_valid |= ATTR_CTIME; + host_err = notify_change(dentry, iap, NULL); + +out_unlock: + fh_unlock(fhp); if (size_change) put_write_access(inode); out: if (!host_err) host_err = commit_metadata(fhp); - return err != 0 ? err : nfserrno(host_err); + return nfserrno(host_err); } #if defined(CONFIG_NFSD_V4) @@ -608,16 +503,35 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } - -static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) { - return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate; -} + __be32 error; + int host_error; + struct dentry *dentry; -__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, - struct nfsd_file *nf_src, u64 src_pos, - struct nfsd_file *nf_dst, u64 dst_pos, - u64 count, bool sync) + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + + inode_lock(d_inode(dentry)); + host_error = security_inode_setsecctx(dentry, label->data, label->len); + inode_unlock(d_inode(dentry)); + return nfserrno(host_error); +} +#else +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + return nfserr_notsupp; +} +#endif + +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) { struct file *src = nf_src->nf_file; struct file *dst = nf_dst->nf_file; @@ -644,17 +558,8 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, if (!status) status = commit_inode_metadata(file_inode(src)); if (status < 0) { - struct nfsd_net *nn = net_generic(nf_dst->nf_net, - nfsd_net_id); - - trace_nfsd_clone_file_range_err(rqstp, - &nfsd4_get_cstate(rqstp)->save_fh, - src_pos, - &nfsd4_get_cstate(rqstp)->current_fh, - dst_pos, - count, status); - nfsd_reset_write_verifier(nn); - trace_nfsd_writeverf_reset(nn, rqstp, status); + nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net, + nfsd_net_id)); ret = nfserrno(status); } } @@ -701,6 +606,7 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif /* defined(CONFIG_NFSD_V4) */ +#ifdef CONFIG_NFSD_V3 /* * Check server access rights to a file system object */ @@ -812,6 +718,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor out: return error; } +#endif /* CONFIG_NFSD_V3 */ int nfsd_open_break_lease(struct inode *inode, int access) { @@ -844,6 +751,9 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, path.dentry = fhp->fh_dentry; inode = d_inode(path.dentry); + /* Disallow write access to files with the append-only bit set + * or any access when mandatory locking enabled + */ err = nfserr_perm; if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; @@ -898,7 +808,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { __be32 err; - bool retried = false; validate_process_creds(); /* @@ -914,37 +823,21 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, */ if (type == S_IFREG) may_flags |= NFSD_MAY_OWNER_OVERRIDE; -retry: err = fh_verify(rqstp, fhp, type, may_flags); - if (!err) { + if (!err) err = __nfsd_open(rqstp, fhp, type, may_flags, filp); - if (err == nfserr_stale && !retried) { - retried = true; - fh_put(fhp); - goto retry; - } - } validate_process_creds(); return err; } -/** - * nfsd_open_verified - Open a regular file for the filecache - * @rqstp: RPC request - * @fhp: NFS filehandle of the file to open - * @may_flags: internal permission flags - * @filp: OUT: open "struct file *" - * - * Returns an nfsstat value in network byte order. - */ __be32 -nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, - struct file **filp) +nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, + int may_flags, struct file **filp) { __be32 err; validate_process_creds(); - err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); + err = __nfsd_open(rqstp, fhp, type, may_flags, filp); validate_process_creds(); return err; } @@ -959,24 +852,28 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct svc_rqst *rqstp = sd->u.data; - struct page *page = buf->page; // may be a compound one - unsigned offset = buf->offset; - struct page *last_page; + struct page **pp = rqstp->rq_next_page; + struct page *page = buf->page; + size_t size; - last_page = page + (offset + sd->len - 1) / PAGE_SIZE; - for (page += offset / PAGE_SIZE; page <= last_page; page++) { - /* - * Skip page replacement when extending the contents - * of the current page. - */ - if (page == *(rqstp->rq_next_page - 1)) - continue; - svc_rqst_replace_page(rqstp, page); - } - if (rqstp->rq_res.page_len == 0) // first call - rqstp->rq_res.page_base = offset % PAGE_SIZE; - rqstp->rq_res.page_len += sd->len; - return sd->len; + size = sd->len; + + if (rqstp->rq_res.page_len == 0) { + get_page(page); + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; + rqstp->rq_res.page_base = buf->offset; + rqstp->rq_res.page_len = size; + } else if (page != pp[-1]) { + get_page(page); + if (*rqstp->rq_next_page) + put_page(*rqstp->rq_next_page); + *(rqstp->rq_next_page++) = page; + rqstp->rq_res.page_len += size; + } else + rqstp->rq_res.page_len += size; + + return size; } static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, @@ -1000,7 +897,7 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long *count, u32 *eof, ssize_t host_err) { if (host_err >= 0) { - nfsd_stats_io_read_add(fhp->fh_export, host_err); + nfsdstats.io_read += host_err; *eof = nfsd_eof_on_read(file, offset, host_err, *count); *count = host_err; fsnotify_access(file); @@ -1088,9 +985,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, unsigned long *cnt, int stable, __be32 *verf) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file *file = nf->nf_file; - struct super_block *sb = file_inode(file)->i_sb; struct svc_export *exp; struct iov_iter iter; errseq_t since; @@ -1098,18 +993,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, int host_err; int use_wgather; loff_t pos = offset; - unsigned long exp_op_flags = 0; unsigned int pflags = current->flags; rwf_t flags = 0; - bool restore_flags = false; trace_nfsd_write_opened(rqstp, fhp, offset, *cnt); - if (sb->s_export_op) - exp_op_flags = sb->s_export_op->flags; - - if (test_bit(RQ_LOCAL, &rqstp->rq_flags) && - !(exp_op_flags & EXPORT_OP_REMOTE_FS)) { + if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) /* * We want throttling in balance_dirty_pages() * and shrink_inactive_list() to only consider @@ -1118,8 +1007,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, * the client's dirty pages or its congested queue. */ current->flags |= PF_LOCAL_THROTTLE; - restore_flags = true; - } exp = fhp->fh_export; use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); @@ -1132,18 +1019,29 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); since = READ_ONCE(file->f_wb_err); - if (verf) - nfsd_copy_write_verifier(verf, nn); - file_start_write(file); - host_err = vfs_iter_write(file, &iter, &pos, flags); - file_end_write(file); + if (flags & RWF_SYNC) { + if (verf) + nfsd_copy_boot_verifier(verf, + net_generic(SVC_NET(rqstp), + nfsd_net_id)); + host_err = vfs_iter_write(file, &iter, &pos, flags); + if (host_err < 0) + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); + } else { + if (verf) + nfsd_copy_boot_verifier(verf, + net_generic(SVC_NET(rqstp), + nfsd_net_id)); + host_err = vfs_iter_write(file, &iter, &pos, flags); + } if (host_err < 0) { - nfsd_reset_write_verifier(nn); - trace_nfsd_writeverf_reset(nn, rqstp, host_err); + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); goto out_nfserr; } *cnt = host_err; - nfsd_stats_io_write_add(exp, *cnt); + nfsdstats.io_write += *cnt; fsnotify_modify(file); host_err = filemap_check_wb_err(file->f_mapping, since); if (host_err < 0) @@ -1151,10 +1049,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, if (stable && use_wgather) { host_err = wait_for_concurrent_writes(file); - if (host_err < 0) { - nfsd_reset_write_verifier(nn); - trace_nfsd_writeverf_reset(nn, rqstp, host_err); - } + if (host_err < 0) + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); } out_nfserr: @@ -1165,7 +1062,7 @@ out_nfserr: trace_nfsd_write_err(rqstp, fhp, offset, host_err); nfserr = nfserrno(host_err); } - if (restore_flags) + if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) current_restore_flags(pflags, PF_LOCAL_THROTTLE); return nfserr; } @@ -1184,7 +1081,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; trace_nfsd_read_start(rqstp, fhp, offset, *count); - err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf); + err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; @@ -1216,7 +1113,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, trace_nfsd_write_start(rqstp, fhp, offset, *cnt); - err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf); + err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf); if (err) goto out; @@ -1228,59 +1125,45 @@ out: return err; } -/** - * nfsd_commit - Commit pending writes to stable storage - * @rqstp: RPC request being processed - * @fhp: NFS filehandle - * @nf: target file - * @offset: raw offset from beginning of file - * @count: raw count of bytes to sync - * @verf: filled in with the server's current write verifier +#ifdef CONFIG_NFSD_V3 +/* + * Commit all pending writes to stable storage. * - * Note: we guarantee that data that lies within the range specified - * by the 'offset' and 'count' parameters will be synced. The server - * is permitted to sync data that lies outside this range at the - * same time. + * Note: we only guarantee that data that lies within the range specified + * by the 'offset' and 'count' parameters will be synced. * * Unfortunately we cannot lock the file to make sure we return full WCC * data to the client, as locking happens lower down in the filesystem. - * - * Return values: - * An nfsstat value in network byte order. */ __be32 -nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, - u64 offset, u32 count, __be32 *verf) +nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, unsigned long count, __be32 *verf) { - __be32 err = nfs_ok; - u64 maxbytes; - loff_t start, end; - struct nfsd_net *nn; + struct nfsd_file *nf; + loff_t end = LLONG_MAX; + __be32 err = nfserr_inval; - /* - * Convert the client-provided (offset, count) range to a - * (start, end) range. If the client-provided range falls - * outside the maximum file size of the underlying FS, - * clamp the sync range appropriately. - */ - start = 0; - end = LLONG_MAX; - maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes; - if (offset < maxbytes) { - start = offset; - if (count && (offset + count - 1 < maxbytes)) - end = offset + count - 1; + if (offset < 0) + goto out; + if (count != 0) { + end = offset + (loff_t)count - 1; + if (end < offset) + goto out; } - nn = net_generic(nf->nf_net, nfsd_net_id); + err = nfsd_file_acquire(rqstp, fhp, + NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); + if (err) + goto out; if (EX_ISSYNC(fhp->fh_export)) { errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); int err2; - err2 = vfs_fsync_range(nf->nf_file, start, end, 0); + err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: - nfsd_copy_write_verifier(verf, nn); + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); err2 = filemap_check_wb_err(nf->nf_file->f_mapping, since); err = nfserrno(err2); @@ -1289,37 +1172,28 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, err = nfserr_notsupp; break; default: - nfsd_reset_write_verifier(nn); - trace_nfsd_writeverf_reset(nn, rqstp, err2); + nfsd_reset_boot_verifier(net_generic(nf->nf_net, + nfsd_net_id)); err = nfserrno(err2); } } else - nfsd_copy_write_verifier(verf, nn); + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); + nfsd_file_put(nf); +out: return err; } +#endif /* CONFIG_NFSD_V3 */ -/** - * nfsd_create_setattr - Set a created file's attributes - * @rqstp: RPC transaction being executed - * @fhp: NFS filehandle of parent directory - * @resfhp: NFS filehandle of new object - * @attrs: requested attributes of new object - * - * Returns nfs_ok on success, or an nfsstat in network byte order. - */ -__be32 -nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct svc_fh *resfhp, struct nfsd_attrs *attrs) +static __be32 +nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, + struct iattr *iap) { - struct iattr *iap = attrs->na_iattr; - __be32 status; - /* - * Mode has already been set by file creation. + * Mode has already been set earlier in create: */ iap->ia_valid &= ~ATTR_MODE; - /* * Setting uid/gid works only for root. Irix appears to * send along the gid on create when it tries to implement @@ -1327,31 +1201,10 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, */ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); - - /* - * Callers expect new file metadata to be committed even - * if the attributes have not changed. - */ if (iap->ia_valid) - status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0); - else - status = nfserrno(commit_metadata(resfhp)); - - /* - * Transactional filesystems had a chance to commit changes - * for both parent and child simultaneously making the - * following commit_metadata a noop in many cases. - */ - if (!status) - status = nfserrno(commit_metadata(fhp)); - - /* - * Update the new filehandle to pick up the new attributes. - */ - if (!status) - status = fh_update(resfhp); - - return status; + return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); + /* Callers expect file metadata to be committed here */ + return nfserrno(commit_metadata(resfhp)); } /* HPUX client sometimes creates a file in mode 000, and sets size to 0. @@ -1372,19 +1225,26 @@ nfsd_check_ignore_resizing(struct iattr *iap) /* The parent directory should already be locked: */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfsd_attrs *attrs, - int type, dev_t rdev, struct svc_fh *resfhp) + char *fname, int flen, struct iattr *iap, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild; struct inode *dirp; - struct iattr *iap = attrs->na_iattr; __be32 err; + __be32 err2; int host_err; dentry = fhp->fh_dentry; dirp = d_inode(dentry); dchild = dget(resfhp->fh_dentry); + if (!fhp->fh_locked) { + WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n", + dentry); + err = nfserr_io; + goto out; + } + err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE); if (err) goto out; @@ -1397,6 +1257,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode &= ~current_umask(); err = 0; + host_err = 0; switch (type) { case S_IFREG: host_err = vfs_create(dirp, dchild, iap->ia_mode, true); @@ -1442,8 +1303,22 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err < 0) goto out_nfserr; - err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); + err = nfsd_create_setattr(rqstp, resfhp, iap); + /* + * nfsd_create_setattr already committed the child. Transactional + * filesystems had a chance to commit changes for both parent and + * child simultaneously making the following commit_metadata a + * noop. + */ + err2 = nfserrno(commit_metadata(fhp)); + if (err2) + err = err2; + /* + * Update the file handle to get the new inode info. + */ + if (!err) + err = fh_update(resfhp); out: dput(dchild); return err; @@ -1461,8 +1336,8 @@ out_nfserr: */ __be32 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct nfsd_attrs *attrs, - int type, dev_t rdev, struct svc_fh *resfhp) + char *fname, int flen, struct iattr *iap, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild = NULL; __be32 err; @@ -1481,13 +1356,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); + fh_lock_nested(fhp, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(dchild); - if (IS_ERR(dchild)) { - err = nfserrno(host_err); - goto out_unlock; - } + if (IS_ERR(dchild)) + return nfserrno(host_err); err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); /* * We unconditionally drop our ref to dchild as fh_compose will have @@ -1495,15 +1368,179 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ dput(dchild); if (err) - goto out_unlock; - fh_fill_pre_attrs(fhp); - err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp); - fh_fill_post_attrs(fhp); -out_unlock: - inode_unlock(dentry->d_inode); - return err; + return err; + return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, + rdev, resfhp); } +#ifdef CONFIG_NFSD_V3 + +/* + * NFSv3 and NFSv4 version of nfsd_create + */ +__be32 +do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *fname, int flen, struct iattr *iap, + struct svc_fh *resfhp, int createmode, u32 *verifier, + bool *truncp, bool *created) +{ + struct dentry *dentry, *dchild = NULL; + struct inode *dirp; + __be32 err; + int host_err; + __u32 v_mtime=0, v_atime=0; + + err = nfserr_perm; + if (!flen) + goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; + if (!(iap->ia_valid & ATTR_MODE)) + iap->ia_mode = 0; + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (err) + goto out; + + dentry = fhp->fh_dentry; + dirp = d_inode(dentry); + + host_err = fh_want_write(fhp); + if (host_err) + goto out_nfserr; + + fh_lock_nested(fhp, I_MUTEX_PARENT); + + /* + * Compose the response file handle. + */ + dchild = lookup_one_len(fname, dentry, flen); + host_err = PTR_ERR(dchild); + if (IS_ERR(dchild)) + goto out_nfserr; + + /* If file doesn't exist, check for permissions to create one */ + if (d_really_is_negative(dchild)) { + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); + if (err) + goto out; + } + + err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); + if (err) + goto out; + + if (nfsd_create_is_exclusive(createmode)) { + /* solaris7 gets confused (bugid 4218508) if these have + * the high bit set, so just clear the high bits. If this is + * ever changed to use different attrs for storing the + * verifier, then do_open_lookup() will also need to be fixed + * accordingly. + */ + v_mtime = verifier[0]&0x7fffffff; + v_atime = verifier[1]&0x7fffffff; + } + + if (d_really_is_positive(dchild)) { + err = 0; + + switch (createmode) { + case NFS3_CREATE_UNCHECKED: + if (! d_is_reg(dchild)) + goto out; + else if (truncp) { + /* in nfsv4, we need to treat this case a little + * differently. we don't want to truncate the + * file now; this would be wrong if the OPEN + * fails for some other reason. furthermore, + * if the size is nonzero, we should ignore it + * according to spec! + */ + *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size; + } + else { + iap->ia_valid &= ATTR_SIZE; + goto set_attr; + } + break; + case NFS3_CREATE_EXCLUSIVE: + if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime + && d_inode(dchild)->i_atime.tv_sec == v_atime + && d_inode(dchild)->i_size == 0 ) { + if (created) + *created = true; + break; + } + fallthrough; + case NFS4_CREATE_EXCLUSIVE4_1: + if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime + && d_inode(dchild)->i_atime.tv_sec == v_atime + && d_inode(dchild)->i_size == 0 ) { + if (created) + *created = true; + goto set_attr; + } + fallthrough; + case NFS3_CREATE_GUARDED: + err = nfserr_exist; + } + fh_drop_write(fhp); + goto out; + } + + if (!IS_POSIXACL(dirp)) + iap->ia_mode &= ~current_umask(); + + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); + if (host_err < 0) { + fh_drop_write(fhp); + goto out_nfserr; + } + if (created) + *created = true; + + nfsd_check_ignore_resizing(iap); + + if (nfsd_create_is_exclusive(createmode)) { + /* Cram the verifier into atime/mtime */ + iap->ia_valid = ATTR_MTIME|ATTR_ATIME + | ATTR_MTIME_SET|ATTR_ATIME_SET; + /* XXX someone who knows this better please fix it for nsec */ + iap->ia_mtime.tv_sec = v_mtime; + iap->ia_atime.tv_sec = v_atime; + iap->ia_mtime.tv_nsec = 0; + iap->ia_atime.tv_nsec = 0; + } + + set_attr: + err = nfsd_create_setattr(rqstp, resfhp, iap); + + /* + * nfsd_create_setattr already committed the child + * (and possibly also the parent). + */ + if (!err) + err = nfserrno(commit_metadata(fhp)); + + /* + * Update the filehandle to get the new inode info. + */ + if (!err) + err = fh_update(resfhp); + + out: + fh_unlock(fhp); + if (dchild && !IS_ERR(dchild)) + dput(dchild); + fh_drop_write(fhp); + return err; + + out_nfserr: + err = nfserrno(host_err); + goto out; +} +#endif /* CONFIG_NFSD_V3 */ + /* * Read a symlink. On entry, *lenp must contain the maximum path length that * fits into the buffer. On return, it contains the true length. @@ -1542,25 +1579,15 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) return 0; } -/** - * nfsd_symlink - Create a symlink and look up its inode - * @rqstp: RPC transaction being executed - * @fhp: NFS filehandle of parent directory - * @fname: filename of the new symlink - * @flen: length of @fname - * @path: content of the new symlink (NUL-terminated) - * @attrs: requested attributes of new object - * @resfhp: NFS filehandle of new object - * +/* + * Create a symlink and look up its inode * N.B. After this call _both_ fhp and resfhp need an fh_put - * - * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, - char *path, struct nfsd_attrs *attrs, - struct svc_fh *resfhp) + char *fname, int flen, + char *path, + struct svc_fh *resfhp) { struct dentry *dentry, *dnew; __be32 err, cerr; @@ -1578,35 +1605,33 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; host_err = fh_want_write(fhp); - if (host_err) { - err = nfserrno(host_err); - goto out; - } + if (host_err) + goto out_nfserr; + fh_lock(fhp); dentry = fhp->fh_dentry; - inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dnew = lookup_one_len(fname, dentry, flen); - if (IS_ERR(dnew)) { - err = nfserrno(PTR_ERR(dnew)); - inode_unlock(dentry->d_inode); - goto out_drop_write; - } - fh_fill_pre_attrs(fhp); + host_err = PTR_ERR(dnew); + if (IS_ERR(dnew)) + goto out_nfserr; + host_err = vfs_symlink(d_inode(dentry), dnew, path); err = nfserrno(host_err); - cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); - if (!err) - nfsd_create_setattr(rqstp, fhp, resfhp, attrs); - fh_fill_post_attrs(fhp); - inode_unlock(dentry->d_inode); if (!err) err = nfserrno(commit_metadata(fhp)); + fh_unlock(fhp); + + fh_drop_write(fhp); + + cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); dput(dnew); if (err==0) err = cerr; -out_drop_write: - fh_drop_write(fhp); out: return err; + +out_nfserr: + err = nfserrno(host_err); + goto out; } /* @@ -1644,25 +1669,21 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, goto out; } + fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = d_inode(ddir); - inode_lock_nested(dirp, I_MUTEX_PARENT); dnew = lookup_one_len(name, ddir, len); - if (IS_ERR(dnew)) { - err = nfserrno(PTR_ERR(dnew)); - goto out_unlock; - } + host_err = PTR_ERR(dnew); + if (IS_ERR(dnew)) + goto out_nfserr; dold = tfhp->fh_dentry; err = nfserr_noent; if (d_really_is_negative(dold)) goto out_dput; - fh_fill_pre_attrs(ffhp); host_err = vfs_link(dold, dirp, dnew, NULL); - fh_fill_post_attrs(ffhp); - inode_unlock(dirp); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1673,17 +1694,17 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } +out_dput: dput(dnew); -out_drop_write: +out_unlock: + fh_unlock(ffhp); fh_drop_write(tfhp); out: return err; -out_dput: - dput(dnew); -out_unlock: - inode_unlock(dirp); - goto out_drop_write; +out_nfserr: + err = nfserrno(host_err); + goto out_unlock; } static void @@ -1718,7 +1739,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, struct inode *fdir, *tdir; __be32 err; int host_err; - bool close_cached = false; + bool has_cached = false; err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) @@ -1750,9 +1771,12 @@ retry: goto out; } + /* cannot use fh_lock as we need deadlock protective ordering + * so do it by hand */ trap = lock_rename(tdentry, fdentry); - fh_fill_pre_attrs(ffhp); - fh_fill_pre_attrs(tfhp); + ffhp->fh_locked = tfhp->fh_locked = true; + fill_pre_wcc(ffhp); + fill_pre_wcc(tfhp); odentry = lookup_one_len(fname, fdentry, flen); host_err = PTR_ERR(odentry); @@ -1774,26 +1798,11 @@ retry: if (ndentry == trap) goto out_dput_new; - if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) && - nfsd_has_cached_files(ndentry)) { - close_cached = true; + if (nfsd_has_cached_files(ndentry)) { + has_cached = true; goto out_dput_old; } else { - struct renamedata rd = { - .old_dir = fdir, - .old_dentry = odentry, - .new_dir = tdir, - .new_dentry = ndentry, - }; - int retries; - - for (retries = 1;;) { - host_err = vfs_rename(&rd); - if (host_err != -EAGAIN || !retries--) - break; - if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry))) - break; - } + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1806,12 +1815,17 @@ retry: dput(odentry); out_nfserr: err = nfserrno(host_err); - - if (!close_cached) { - fh_fill_post_attrs(ffhp); - fh_fill_post_attrs(tfhp); + /* + * We cannot rely on fh_unlock on the two filehandles, + * as that would do the wrong thing if the two directories + * were the same, so again we do it by hand. + */ + if (!has_cached) { + fill_post_wcc(ffhp); + fill_post_wcc(tfhp); } unlock_rename(tdentry, fdentry); + ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); /* @@ -1820,8 +1834,8 @@ retry: * shouldn't be done with locks held however, so we delay it until this * point and then reattempt the whole shebang. */ - if (close_cached) { - close_cached = false; + if (has_cached) { + has_cached = false; nfsd_close_cached_files(ndentry); dput(ndentry); goto retry; @@ -1840,7 +1854,6 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, { struct dentry *dentry, *rdentry; struct inode *dirp; - struct inode *rinode; __be32 err; int host_err; @@ -1855,50 +1868,34 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_nfserr; + fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = d_inode(dentry); - inode_lock_nested(dirp, I_MUTEX_PARENT); rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_unlock; + goto out_drop_write; if (d_really_is_negative(rdentry)) { dput(rdentry); host_err = -ENOENT; - goto out_unlock; + goto out_drop_write; } - rinode = d_inode(rdentry); - ihold(rinode); if (!type) type = d_inode(rdentry)->i_mode & S_IFMT; - fh_fill_pre_attrs(fhp); if (type != S_IFDIR) { - int retries; - - if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) - nfsd_close_cached_files(rdentry); - - for (retries = 1;;) { - host_err = vfs_unlink(dirp, rdentry, NULL); - if (host_err != -EAGAIN || !retries--) - break; - if (!nfsd_wait_for_delegreturn(rqstp, rinode)) - break; - } + nfsd_close_cached_files(rdentry); + host_err = vfs_unlink(dirp, rdentry, NULL); } else { host_err = vfs_rmdir(dirp, rdentry); } - fh_fill_post_attrs(fhp); - inode_unlock(dirp); if (!host_err) host_err = commit_metadata(fhp); dput(rdentry); - iput(rinode); /* truncate the inode here */ out_drop_write: fh_drop_write(fhp); @@ -1916,9 +1913,6 @@ out_nfserr: } out: return err; -out_unlock: - inode_unlock(dirp); - goto out_drop_write; } /* @@ -1968,9 +1962,8 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, return 0; } -static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp, - nfsd_filldir_t func, struct readdir_cd *cdp, - loff_t *offsetp) +static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, + struct readdir_cd *cdp, loff_t *offsetp) { struct buffered_dirent *de; int host_err; @@ -2016,8 +2009,6 @@ static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp, if (cdp->err != nfs_ok) break; - trace_nfsd_dirent(fhp, de->ino, de->name, de->namlen); - reclen = ALIGN(sizeof(*de) + de->namlen, sizeof(u64)); size -= reclen; @@ -2065,7 +2056,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, goto out_close; } - err = nfsd_buffered_readdir(file, fhp, func, cdp, offsetp); + err = nfsd_buffered_readdir(file, func, cdp, offsetp); if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ @@ -2272,16 +2263,13 @@ out: return err; } -/** - * nfsd_removexattr - Remove an extended attribute - * @rqstp: RPC transaction being executed - * @fhp: NFS filehandle of object with xattr to remove - * @name: name of xattr to remove (NUL-terminate) - * - * Pass in a NULL pointer for delegated_inode, and let the client deal - * with NFS4ERR_DELAY (same as with e.g. setattr and remove). - * - * Returns nfs_ok on success, or an nfsstat in network byte order. +/* + * Removexattr and setxattr need to call fh_lock to both lock the inode + * and set the change attribute. Since the top-level vfs_removexattr + * and vfs_setxattr calls already do their own inode_lock calls, call + * the _locked variant. Pass in a NULL pointer for delegated_inode, + * and let the client deal with NFS4ERR_DELAY (same as with e.g. + * setattr and remove). */ __be32 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) @@ -2297,13 +2285,11 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) if (ret) return nfserrno(ret); - inode_lock(fhp->fh_dentry->d_inode); - fh_fill_pre_attrs(fhp); + fh_lock(fhp); ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL); - fh_fill_post_attrs(fhp); - inode_unlock(fhp->fh_dentry->d_inode); + fh_unlock(fhp); fh_drop_write(fhp); return nfsd_xattr_errno(ret); @@ -2323,13 +2309,12 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, ret = fh_want_write(fhp); if (ret) return nfserrno(ret); - inode_lock(fhp->fh_dentry->d_inode); - fh_fill_pre_attrs(fhp); + fh_lock(fhp); ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags, NULL); - fh_fill_post_attrs(fhp); - inode_unlock(fhp->fh_dentry->d_inode); + + fh_unlock(fhp); fh_drop_write(fhp); return nfsd_xattr_errno(ret); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index dbdfef7ae85b..a2442ebe5acf 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -6,8 +6,6 @@ #ifndef LINUX_NFSD_VFS_H #define LINUX_NFSD_VFS_H -#include -#include #include "nfsfh.h" #include "nfsd.h" @@ -44,23 +42,6 @@ struct nfsd_file; typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); /* nfsd/vfs.c */ -struct nfsd_attrs { - struct iattr *na_iattr; /* input */ - struct xdr_netobj *na_seclabel; /* input */ - struct posix_acl *na_pacl; /* input */ - struct posix_acl *na_dpacl; /* input */ - - int na_labelerr; /* output */ - int na_aclerr; /* output */ -}; - -static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) -{ - posix_acl_release(attrs->na_pacl); - posix_acl_release(attrs->na_dpacl); -} - -__be32 nfserrno (int errno); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, @@ -69,28 +50,32 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct nfsd_attrs *, int, time64_t); + struct iattr *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 +__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, + struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); -__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, - struct nfsd_file *nf_src, u64 src_pos, +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, - struct nfsd_attrs *attrs, int type, dev_t rdev, - struct svc_fh *res); -__be32 nfsd_create(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct nfsd_attrs *attrs, + char *name, int len, struct iattr *attrs, int type, dev_t rdev, struct svc_fh *res); +__be32 nfsd_create(struct svc_rqst *, struct svc_fh *, + char *name, int len, struct iattr *attrs, + int type, dev_t rdev, struct svc_fh *res); +#ifdef CONFIG_NFSD_V3 __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); -__be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct svc_fh *resfhp, struct nfsd_attrs *iap); -__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, - struct nfsd_file *nf, u64 offset, u32 count, - __be32 *verf); +__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, + char *name, int len, struct iattr *attrs, + struct svc_fh *res, int createmode, + u32 *verifier, bool *truncp, bool *created); +__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, + loff_t, unsigned long, __be32 *verf); +#endif /* CONFIG_NFSD_V3 */ #ifdef CONFIG_NFSD_V4 __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, void **bufp, int *lenp); @@ -104,7 +89,7 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); -__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, +__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, @@ -128,9 +113,8 @@ __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, - char *name, int len, char *path, - struct nfsd_attrs *attrs, - struct svc_fh *res); + char *name, int len, char *path, + struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); ssize_t nfsd_copy_file_range(struct file *, u64, @@ -168,7 +152,7 @@ static inline void fh_drop_write(struct svc_fh *fh) } } -static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) +static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) { struct path p = {.mnt = fh->fh_export->ex_path.mnt, .dentry = fh->fh_dentry}; @@ -176,4 +160,10 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) AT_STATX_SYNC_AS_STAT)); } +static inline int nfsd_create_is_exclusive(int createmode) +{ + return createmode == NFS3_CREATE_EXCLUSIVE + || createmode == NFS4_CREATE_EXCLUSIVE4_1; +} + #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index 852f71580bd0..b8cc6a4b2e0e 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -27,13 +27,14 @@ struct nfsd_readargs { struct svc_fh fh; __u32 offset; __u32 count; + int vlen; }; struct nfsd_writeargs { svc_fh fh; __u32 offset; __u32 len; - struct xdr_buf payload; + struct kvec first; }; struct nfsd_createargs { @@ -52,6 +53,11 @@ struct nfsd_renameargs { unsigned int tlen; }; +struct nfsd_readlinkargs { + struct svc_fh fh; + char * buffer; +}; + struct nfsd_linkargs { struct svc_fh ffh; struct svc_fh tfh; @@ -73,6 +79,7 @@ struct nfsd_readdirargs { struct svc_fh fh; __u32 cookie; __u32 count; + __be32 * buffer; }; struct nfsd_stat { @@ -94,7 +101,6 @@ struct nfsd_diropres { struct nfsd_readlinkres { __be32 status; int len; - struct page *page; }; struct nfsd_readres { @@ -102,20 +108,17 @@ struct nfsd_readres { struct svc_fh fh; unsigned long count; struct kstat stat; - struct page **pages; }; struct nfsd_readdirres { - /* Components of the reply */ __be32 status; int count; - /* Used to encode the reply's entry list */ - struct xdr_stream xdr; - struct xdr_buf dirlist; struct readdir_cd common; - unsigned int cookie_offset; + __be32 * buffer; + int buflen; + __be32 * offset; }; struct nfsd_statfsres { @@ -141,37 +144,36 @@ union nfsd_xdrstore { #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) -bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfssvc_decode_void(struct svc_rqst *, __be32 *); +int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *); +int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_readargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_createargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); +int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); +int nfssvc_encode_void(struct svc_rqst *, __be32 *); +int nfssvc_encode_stat(struct svc_rqst *, __be32 *); +int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *); +int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); +int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *); +int nfssvc_encode_readres(struct svc_rqst *, __be32 *); +int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); +int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); -bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); - -void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); -int nfssvc_encode_entry(void *data, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type); +int nfssvc_encode_entry(void *, const char *name, + int namlen, loff_t offset, u64 ino, unsigned int); void nfssvc_release_attrstat(struct svc_rqst *rqstp); void nfssvc_release_diropres(struct svc_rqst *rqstp); void nfssvc_release_readres(struct svc_rqst *rqstp); /* Helper functions for NFSv2 ACL code */ -bool svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp); -bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status); -bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, - const struct svc_fh *fhp, const struct kstat *stat); +__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat); +__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp); #endif /* LINUX_NFSD_H */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 03fe4e21306c..ae6fa6c9cb46 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -25,13 +25,14 @@ struct nfsd3_diropargs { struct nfsd3_accessargs { struct svc_fh fh; - __u32 access; + unsigned int access; }; struct nfsd3_readargs { struct svc_fh fh; __u64 offset; __u32 count; + int vlen; }; struct nfsd3_writeargs { @@ -40,7 +41,7 @@ struct nfsd3_writeargs { __u32 count; int stable; __u32 len; - struct xdr_buf payload; + struct kvec first; }; struct nfsd3_createargs { @@ -70,6 +71,11 @@ struct nfsd3_renameargs { unsigned int tlen; }; +struct nfsd3_readlinkargs { + struct svc_fh fh; + char * buffer; +}; + struct nfsd3_linkargs { struct svc_fh ffh; struct svc_fh tfh; @@ -90,8 +96,10 @@ struct nfsd3_symlinkargs { struct nfsd3_readdirargs { struct svc_fh fh; __u64 cookie; + __u32 dircount; __u32 count; __be32 * verf; + __be32 * buffer; }; struct nfsd3_commitargs { @@ -102,13 +110,13 @@ struct nfsd3_commitargs { struct nfsd3_getaclargs { struct svc_fh fh; - __u32 mask; + int mask; }; struct posix_acl; struct nfsd3_setaclargs { struct svc_fh fh; - __u32 mask; + int mask; struct posix_acl *acl_access; struct posix_acl *acl_default; }; @@ -137,7 +145,6 @@ struct nfsd3_readlinkres { __be32 status; struct svc_fh fh; __u32 len; - struct page **pages; }; struct nfsd3_readres { @@ -145,7 +152,6 @@ struct nfsd3_readres { struct svc_fh fh; unsigned long count; __u32 eof; - struct page **pages; }; struct nfsd3_writeres { @@ -169,17 +175,19 @@ struct nfsd3_linkres { }; struct nfsd3_readdirres { - /* Components of the reply */ __be32 status; struct svc_fh fh; + /* Just to save kmalloc on every readdirplus entry (svc_fh is a + * little large for the stack): */ + struct svc_fh scratch; + int count; __be32 verf[2]; - /* Used to encode the reply's entry list */ - struct xdr_stream xdr; - struct xdr_buf dirlist; - struct svc_fh scratch; struct readdir_cd common; - unsigned int cookie_offset; + __be32 * buffer; + int buflen; + __be32 * offset; + __be32 * offset1; struct svc_rqst * rqstp; }; @@ -265,50 +273,52 @@ union nfsd3_xdrstore { #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) -bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); - -bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs3svc_decode_voidarg(struct svc_rqst *, __be32 *); +int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *); +int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); +int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); +int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *); +int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); +int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_readres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_createres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *); +int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *); +int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *); void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); - -void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset); -int nfs3svc_encode_entry3(void *data, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type); -int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type); +int nfs3svc_encode_entry(void *, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int); +int nfs3svc_encode_entry_plus(void *, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int); /* Helper functions for NFSv3 ACL code */ -bool svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp); -bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status); -bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, - const struct svc_fh *fhp); +__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, + struct svc_fh *fhp); +__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp); + #endif /* _LINUX_NFSD_XDR3_H */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index a034b9b62137..679d40af1bbb 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -76,7 +76,12 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) struct nfsd4_change_info { u32 atomic; + bool change_supported; + u32 before_ctime_sec; + u32 before_ctime_nsec; u64 before_change; + u32 after_ctime_sec; + u32 after_ctime_nsec; u64 after_change; }; @@ -247,8 +252,7 @@ struct nfsd4_listxattrs { struct nfsd4_open { u32 op_claim_type; /* request */ - u32 op_fnamelen; - char * op_fname; /* request - everything but CLAIM_PREV */ + struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ u32 op_delegate_type; /* request - CLAIM_PREV only */ stateid_t op_delegate_stateid; /* request - response */ u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ @@ -273,13 +277,11 @@ struct nfsd4_open { bool op_truncate; /* used during processing */ bool op_created; /* used during processing */ struct nfs4_openowner *op_openowner; /* used during processing */ - struct file *op_filp; /* used during processing */ struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_clnt_odstate *op_odstate; /* used during processing */ struct nfs4_acl *op_acl; struct xdr_netobj op_label; - struct svc_rqst *op_rqstp; }; struct nfsd4_open_confirm { @@ -303,10 +305,9 @@ struct nfsd4_read { u32 rd_length; /* request */ int rd_vlen; struct nfsd_file *rd_nf; - + struct svc_rqst *rd_rqstp; /* response */ - struct svc_fh *rd_fhp; /* response */ - u32 rd_eof; /* response */ + struct svc_fh *rd_fhp; /* response */ }; struct nfsd4_readdir { @@ -384,6 +385,13 @@ struct nfsd4_setclientid_confirm { nfs4_verifier sc_confirm; }; +struct nfsd4_saved_compoundargs { + __be32 *p; + __be32 *end; + int pagelen; + struct page **pagelist; +}; + struct nfsd4_test_stateid_id { __be32 ts_id_status; stateid_t ts_id_stateid; @@ -411,7 +419,8 @@ struct nfsd4_write { u64 wr_offset; /* request */ u32 wr_stable_how; /* request */ u32 wr_buflen; /* request */ - struct xdr_buf wr_payload; /* request */ + struct kvec wr_head; + struct page ** wr_pagelist; /* request */ u32 wr_bytes_written; /* response */ u32 wr_how_written; /* response */ @@ -424,7 +433,7 @@ struct nfsd4_exchange_id { u32 flags; clientid_t clientid; u32 seqid; - u32 spa_how; + int spa_how; u32 spo_must_enforce[3]; u32 spo_must_allow[3]; struct xdr_netobj nii_domain; @@ -534,13 +543,6 @@ struct nfsd42_write_res { stateid_t cb_stateid; }; -struct nfsd4_cb_offload { - struct nfsd4_callback co_cb; - struct nfsd42_write_res co_res; - __be32 co_nfserr; - struct knfsd_fh co_fh; -}; - struct nfsd4_copy { /* request */ stateid_t cp_src_stateid; @@ -548,16 +550,18 @@ struct nfsd4_copy { u64 cp_src_pos; u64 cp_dst_pos; u64 cp_count; - struct nl4_server *cp_src; + struct nl4_server cp_src; + bool cp_intra; - unsigned long cp_flags; -#define NFSD4_COPY_F_STOPPED (0) -#define NFSD4_COPY_F_INTRA (1) -#define NFSD4_COPY_F_SYNCHRONOUS (2) -#define NFSD4_COPY_F_COMMITTED (3) + /* both */ + bool cp_synchronous; /* response */ struct nfsd42_write_res cp_res; + + /* for cb_offload */ + struct nfsd4_callback cp_cb; + __be32 nfserr; struct knfsd_fh fh; struct nfs4_client *cp_clp; @@ -570,34 +574,13 @@ struct nfsd4_copy { struct list_head copies; struct task_struct *copy_task; refcount_t refcount; + bool stopped; - struct nfsd4_ssc_umount_item *ss_nsui; + struct vfsmount *ss_mnt; struct nfs_fh c_fh; nfs4_stateid stateid; }; - -static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync) -{ - if (sync) - set_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); - else - clear_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); -} - -static inline bool nfsd4_copy_is_sync(const struct nfsd4_copy *copy) -{ - return test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); -} - -static inline bool nfsd4_copy_is_async(const struct nfsd4_copy *copy) -{ - return !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); -} - -static inline bool nfsd4_ssc_is_inter(const struct nfsd4_copy *copy) -{ - return !test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); -} +extern bool inter_copy_offload_enable; struct nfsd4_seek { /* request */ @@ -622,20 +605,19 @@ struct nfsd4_offload_status { struct nfsd4_copy_notify { /* request */ stateid_t cpn_src_stateid; - struct nl4_server *cpn_dst; + struct nl4_server cpn_dst; /* response */ stateid_t cpn_cnr_stateid; u64 cpn_sec; u32 cpn_nsec; - struct nl4_server *cpn_src; + struct nl4_server cpn_src; }; struct nfsd4_op { - u32 opnum; + int opnum; + const struct nfsd4_operation * opdesc; __be32 status; - const struct nfsd4_operation *opdesc; - struct nfs4_replay *replay; union nfsd4_op_u { struct nfsd4_access access; struct nfsd4_close close; @@ -699,6 +681,7 @@ struct nfsd4_op { struct nfsd4_listxattrs listxattrs; struct nfsd4_removexattr removexattr; } u; + struct nfs4_replay * replay; }; bool nfsd4_cache_this_op(struct nfsd4_op *); @@ -713,29 +696,35 @@ struct svcxdr_tmpbuf { struct nfsd4_compoundargs { /* scratch variables for XDR decode */ - struct xdr_stream *xdr; + __be32 * p; + __be32 * end; + struct page ** pagelist; + int pagelen; + bool tail; + __be32 tmp[8]; + __be32 * tmpp; struct svcxdr_tmpbuf *to_free; + struct svc_rqst *rqstp; - char * tag; u32 taglen; + char * tag; u32 minorversion; - u32 client_opcnt; u32 opcnt; struct nfsd4_op *ops; struct nfsd4_op iops[8]; + int cachetype; }; struct nfsd4_compoundres { /* scratch variables for XDR encode */ - struct xdr_stream *xdr; + struct xdr_stream xdr; struct svc_rqst * rqstp; - __be32 *statusp; - char * tag; u32 taglen; + char * tag; u32 opcnt; - + __be32 * tagp; /* tag, opcount encode location */ struct nfsd4_compound_state cstate; }; @@ -778,16 +767,24 @@ static inline void set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved); - cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr); + cinfo->atomic = (u32)fhp->fh_post_saved; + cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry)); cinfo->before_change = fhp->fh_pre_change; cinfo->after_change = fhp->fh_post_change; + cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; + cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; + cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; + cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; + } bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); -bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nfs4svc_decode_voidarg(struct svc_rqst *, __be32 *); +int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *); +int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *); +int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); @@ -888,19 +885,13 @@ struct nfsd4_operation { u32 op_flags; char *op_name; /* Try to get response size before operation */ - u32 (*op_rsize_bop)(const struct svc_rqst *rqstp, - const struct nfsd4_op *op); + u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); void (*op_get_currentstateid)(struct nfsd4_compound_state *, union nfsd4_op_u *); void (*op_set_currentstateid)(struct nfsd4_compound_state *, union nfsd4_op_u *); }; -struct nfsd4_cb_recall_any { - struct nfsd4_callback ra_cb; - u32 ra_keep; - u32 ra_bmval[1]; -}; #endif diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 0d39af1b00a0..547cf07cf4e0 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -48,9 +48,3 @@ #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) -#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \ - cb_sequence_enc_sz + \ - 1 + 1 + 1) -#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ - cb_sequence_dec_sz + \ - op_dec_sz) diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index fa81c59a2ad4..e45ca6ecba95 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -150,7 +150,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); - fsnotify_group_lock(dnotify_group); + mutex_lock(&dnotify_group->mark_mutex); spin_lock(&fsn_mark->lock); prev = &dn_mark->dn; @@ -173,7 +173,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) free = true; } - fsnotify_group_unlock(dnotify_group); + mutex_unlock(&dnotify_group->mark_mutex); if (free) fsnotify_free_mark(fsn_mark); @@ -196,7 +196,7 @@ static __u32 convert_arg(unsigned long arg) if (arg & DN_ATTRIB) new_mask |= FS_ATTRIB; if (arg & DN_RENAME) - new_mask |= FS_RENAME; + new_mask |= FS_DN_RENAME; if (arg & DN_CREATE) new_mask |= (FS_CREATE | FS_MOVED_TO); @@ -306,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) new_dn_mark->dn = NULL; /* this is needed to prevent the fcntl/close race described below */ - fsnotify_group_lock(dnotify_group); + mutex_lock(&dnotify_group->mark_mutex); /* add the new_fsn_mark or find an old one. */ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); @@ -316,7 +316,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) } else { error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0); if (error) { - fsnotify_group_unlock(dnotify_group); + mutex_unlock(&dnotify_group->mark_mutex); goto out_err; } spin_lock(&new_fsn_mark->lock); @@ -327,7 +327,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) } rcu_read_lock(); - f = lookup_fd_rcu(fd); + f = fcheck(fd); rcu_read_unlock(); /* if (f != filp) means that we lost a race and another task/thread @@ -365,7 +365,7 @@ out: if (destroy) fsnotify_detach_mark(fsn_mark); - fsnotify_group_unlock(dnotify_group); + mutex_unlock(&dnotify_group->mark_mutex); if (destroy) fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); @@ -383,8 +383,7 @@ static int __init dnotify_init(void) SLAB_PANIC|SLAB_ACCOUNT); dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, - FSNOTIFY_GROUP_NOFS); + dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); return 0; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index a2a15bc4df28..c3af99e94f1d 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -14,33 +14,20 @@ #include #include #include -#include #include "fanotify.h" -static bool fanotify_path_equal(const struct path *p1, const struct path *p2) +static bool fanotify_path_equal(struct path *p1, struct path *p2) { return p1->mnt == p2->mnt && p1->dentry == p2->dentry; } -static unsigned int fanotify_hash_path(const struct path *path) -{ - return hash_ptr(path->dentry, FANOTIFY_EVENT_HASH_BITS) ^ - hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS); -} - static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, __kernel_fsid_t *fsid2) { return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; } -static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid) -{ - return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^ - hash_32(fsid->val[1], FANOTIFY_EVENT_HASH_BITS); -} - static bool fanotify_fh_equal(struct fanotify_fh *fh1, struct fanotify_fh *fh2) { @@ -51,16 +38,6 @@ static bool fanotify_fh_equal(struct fanotify_fh *fh1, !memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len); } -static unsigned int fanotify_hash_fh(struct fanotify_fh *fh) -{ - long salt = (long)fh->type | (long)fh->len << 8; - - /* - * full_name_hash() works long by long, so it handles fh buf optimally. - */ - return full_name_hash((void *)salt, fanotify_fh_buf(fh), fh->len); -} - static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, struct fanotify_fid_event *ffe2) { @@ -76,10 +53,8 @@ static bool fanotify_info_equal(struct fanotify_info *info1, struct fanotify_info *info2) { if (info1->dir_fh_totlen != info2->dir_fh_totlen || - info1->dir2_fh_totlen != info2->dir2_fh_totlen || info1->file_fh_totlen != info2->file_fh_totlen || - info1->name_len != info2->name_len || - info1->name2_len != info2->name2_len) + info1->name_len != info2->name_len) return false; if (info1->dir_fh_totlen && @@ -87,24 +62,14 @@ static bool fanotify_info_equal(struct fanotify_info *info1, fanotify_info_dir_fh(info2))) return false; - if (info1->dir2_fh_totlen && - !fanotify_fh_equal(fanotify_info_dir2_fh(info1), - fanotify_info_dir2_fh(info2))) - return false; - if (info1->file_fh_totlen && !fanotify_fh_equal(fanotify_info_file_fh(info1), fanotify_info_file_fh(info2))) return false; - if (info1->name_len && - memcmp(fanotify_info_name(info1), fanotify_info_name(info2), - info1->name_len)) - return false; - - return !info1->name2_len || - !memcmp(fanotify_info_name2(info1), fanotify_info_name2(info2), - info1->name2_len); + return !info1->name_len || + !memcmp(fanotify_info_name(info1), fanotify_info_name(info2), + info1->name_len); } static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, @@ -123,22 +88,16 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, return fanotify_info_equal(info1, info2); } -static bool fanotify_error_event_equal(struct fanotify_error_event *fee1, - struct fanotify_error_event *fee2) +static bool fanotify_should_merge(struct fsnotify_event *old_fsn, + struct fsnotify_event *new_fsn) { - /* Error events against the same file system are always merged. */ - if (!fanotify_fsid_equal(&fee1->fsid, &fee2->fsid)) - return false; + struct fanotify_event *old, *new; - return true; -} + pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn); + old = FANOTIFY_E(old_fsn); + new = FANOTIFY_E(new_fsn); -static bool fanotify_should_merge(struct fanotify_event *old, - struct fanotify_event *new) -{ - pr_debug("%s: old=%p new=%p\n", __func__, old, new); - - if (old->hash != new->hash || + if (old_fsn->objectid != new_fsn->objectid || old->type != new->type || old->pid != new->pid) return false; @@ -153,13 +112,6 @@ static bool fanotify_should_merge(struct fanotify_event *old, if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) return false; - /* - * FAN_RENAME event is reported with special info record types, - * so we cannot merge it with other events. - */ - if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME)) - return false; - switch (old->type) { case FANOTIFY_EVENT_TYPE_PATH: return fanotify_path_equal(fanotify_event_path(old), @@ -170,9 +122,6 @@ static bool fanotify_should_merge(struct fanotify_event *old, case FANOTIFY_EVENT_TYPE_FID_NAME: return fanotify_name_event_equal(FANOTIFY_NE(old), FANOTIFY_NE(new)); - case FANOTIFY_EVENT_TYPE_FS_ERROR: - return fanotify_error_event_equal(FANOTIFY_EE(old), - FANOTIFY_EE(new)); default: WARN_ON_ONCE(1); } @@ -184,16 +133,14 @@ static bool fanotify_should_merge(struct fanotify_event *old, #define FANOTIFY_MAX_MERGE_EVENTS 128 /* and the list better be locked by something too! */ -static int fanotify_merge(struct fsnotify_group *group, - struct fsnotify_event *event) +static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) { - struct fanotify_event *old, *new = FANOTIFY_E(event); - unsigned int bucket = fanotify_event_hash_bucket(group, new); - struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; + struct fsnotify_event *test_event; + struct fanotify_event *new; int i = 0; - pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, - group, event, bucket); + pr_debug("%s: list=%p event=%p\n", __func__, list, event); + new = FANOTIFY_E(event); /* * Don't merge a permission event with any other event so that we know @@ -203,15 +150,11 @@ static int fanotify_merge(struct fsnotify_group *group, if (fanotify_is_perm_event(new->mask)) return 0; - hlist_for_each_entry(old, hlist, merge_list) { + list_for_each_entry_reverse(test_event, list, list) { if (++i > FANOTIFY_MAX_MERGE_EVENTS) break; - if (fanotify_should_merge(old, new)) { - old->mask |= new->mask; - - if (fanotify_is_error_event(old->mask)) - FANOTIFY_EE(old)->err_count++; - + if (fanotify_should_merge(test_event, event)) { + FANOTIFY_E(test_event)->mask |= new->mask; return 1; } } @@ -247,11 +190,8 @@ static int fanotify_get_response(struct fsnotify_group *group, return ret; } /* Event not yet reported? Just remove it. */ - if (event->state == FAN_EVENT_INIT) { + if (event->state == FAN_EVENT_INIT) fsnotify_remove_queued_event(group, &event->fae.fse); - /* Permission events are not supposed to be hashed */ - WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list)); - } /* * Event may be also answered in case signal delivery raced * with wakeup. In that case we have nothing to do besides @@ -291,17 +231,15 @@ out: */ static u32 fanotify_group_event_mask(struct fsnotify_group *group, struct fsnotify_iter_info *iter_info, - u32 *match_mask, u32 event_mask, - const void *data, int data_type, - struct inode *dir) + u32 event_mask, const void *data, + int data_type, struct inode *dir) { - __u32 marks_mask = 0, marks_ignore_mask = 0; + __u32 marks_mask = 0, marks_ignored_mask = 0; __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS | FANOTIFY_EVENT_FLAGS; const struct path *path = fsnotify_data_path(data, data_type); unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); struct fsnotify_mark *mark; - bool ondir = event_mask & FAN_ONDIR; int type; pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", @@ -316,30 +254,37 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, return 0; } else if (!(fid_mode & FAN_REPORT_FID)) { /* Do we have a directory inode to report? */ - if (!dir && !ondir) + if (!dir && !(event_mask & FS_ISDIR)) return 0; } - fsnotify_foreach_iter_mark_type(iter_info, mark, type) { - /* - * Apply ignore mask depending on event flags in ignore mask. - */ - marks_ignore_mask |= - fsnotify_effective_ignore_mask(mark, ondir, type); + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + + /* Apply ignore mask regardless of ISDIR and ON_CHILD flags */ + marks_ignored_mask |= mark->ignored_mask; /* - * Send the event depending on event flags in mark mask. + * If the event is on dir and this mark doesn't care about + * events on dir, don't send it! */ - if (!fsnotify_mask_applicable(mark->mask, ondir, type)) + if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR)) + continue; + + /* + * If the event is on a child and this mark is on a parent not + * watching children, don't send it! + */ + if (type == FSNOTIFY_OBJ_TYPE_PARENT && + !(mark->mask & FS_EVENT_ON_CHILD)) continue; marks_mask |= mark->mask; - - /* Record the mark types of this group that matched the event */ - *match_mask |= 1U << type; } - test_mask = event_mask & marks_mask & ~marks_ignore_mask; + test_mask = event_mask & marks_mask & ~marks_ignored_mask; /* * For dirent modification events (create/delete/move) that do not carry @@ -374,23 +319,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, static int fanotify_encode_fh_len(struct inode *inode) { int dwords = 0; - int fh_len; if (!inode) return 0; exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); - fh_len = dwords << 2; - /* - * struct fanotify_error_event might be preallocated and is - * limited to MAX_HANDLE_SZ. This should never happen, but - * safeguard by forcing an invalid file handle. - */ - if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ)) - return 0; - - return fh_len; + return dwords << 2; } /* @@ -400,8 +335,7 @@ static int fanotify_encode_fh_len(struct inode *inode) * Return 0 on failure to encode. */ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, - unsigned int fh_len, unsigned int *hash, - gfp_t gfp) + unsigned int fh_len, gfp_t gfp) { int dwords, type = 0; char *ext_buf = NULL; @@ -411,21 +345,15 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, fh->type = FILEID_ROOT; fh->len = 0; fh->flags = 0; - - /* - * Invalid FHs are used by FAN_FS_ERROR for errors not - * linked to any inode. The f_handle won't be reported - * back to userspace. - */ if (!inode) - goto out; + return 0; /* * !gpf means preallocated variable size fh, but fh_len could * be zero in that case if encoding fh len failed. */ err = -ENOENT; - if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4) || fh_len > MAX_HANDLE_SZ) + if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4)) goto out_err; /* No external buffer in a variable size allocated fh */ @@ -450,14 +378,6 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, fh->type = type; fh->len = fh_len; -out: - /* - * Mix fh into event merge key. Hash might be NULL in case of - * unhashed FID events (i.e. FAN_FS_ERROR). - */ - if (hash) - *hash ^= fanotify_hash_fh(fh); - return FANOTIFY_FH_HDR_LEN + fh_len; out_err: @@ -472,41 +392,17 @@ out_err: } /* - * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for - * some events and the fid of the parent for create/delete/move events. - * - * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported - * also in create/delete/move events in addition to the fid of the parent - * and the name of the child. - */ -static inline bool fanotify_report_child_fid(unsigned int fid_mode, u32 mask) -{ - if (mask & ALL_FSNOTIFY_DIRENT_EVENTS) - return (fid_mode & FAN_REPORT_TARGET_FID); - - return (fid_mode & FAN_REPORT_FID) && !(mask & FAN_ONDIR); -} - -/* - * The inode to use as identifier when reporting fid depends on the event - * and the group flags. - * - * With the group flag FAN_REPORT_TARGET_FID, always report the child fid. - * - * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory - * fid on dirent events and the child fid otherwise. - * + * The inode to use as identifier when reporting fid depends on the event. + * Report the modified directory inode on dirent modification events. + * Report the "victim" inode otherwise. * For example: - * FS_ATTRIB reports the child fid even if reported on a watched parent. - * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID. - * and reports the created child fid with FAN_REPORT_TARGET_FID. + * FS_ATTRIB reports the child inode even if reported on a watched parent. + * FS_CREATE reports the modified dir inode and not the created inode. */ static struct inode *fanotify_fid_inode(u32 event_mask, const void *data, - int data_type, struct inode *dir, - unsigned int fid_mode) + int data_type, struct inode *dir) { - if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) && - !(fid_mode & FAN_REPORT_TARGET_FID)) + if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) return dir; return fsnotify_data_inode(data, data_type); @@ -528,14 +424,13 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data, if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) return dir; - if (inode && S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) return inode; return dir; } static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, - unsigned int *hash, gfp_t gfp) { struct fanotify_path_event *pevent; @@ -546,7 +441,6 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH; pevent->path = *path; - *hash ^= fanotify_hash_path(path); path_get(path); return &pevent->fae; @@ -572,7 +466,6 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, __kernel_fsid_t *fsid, - unsigned int *hash, gfp_t gfp) { struct fanotify_fid_event *ffe; @@ -583,153 +476,78 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; ffe->fsid = *fsid; - *hash ^= fanotify_hash_fsid(fsid); fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id), - hash, gfp); + gfp); return &ffe->fae; } -static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir, +static struct fanotify_event *fanotify_alloc_name_event(struct inode *id, __kernel_fsid_t *fsid, - const struct qstr *name, + const struct qstr *file_name, struct inode *child, - struct dentry *moved, - unsigned int *hash, gfp_t gfp) { struct fanotify_name_event *fne; struct fanotify_info *info; struct fanotify_fh *dfh, *ffh; - struct inode *dir2 = moved ? d_inode(moved->d_parent) : NULL; - const struct qstr *name2 = moved ? &moved->d_name : NULL; - unsigned int dir_fh_len = fanotify_encode_fh_len(dir); - unsigned int dir2_fh_len = fanotify_encode_fh_len(dir2); + unsigned int dir_fh_len = fanotify_encode_fh_len(id); unsigned int child_fh_len = fanotify_encode_fh_len(child); - unsigned long name_len = name ? name->len : 0; - unsigned long name2_len = name2 ? name2->len : 0; - unsigned int len, size; + unsigned int size; - /* Reserve terminating null byte even for empty name */ - size = sizeof(*fne) + name_len + name2_len + 2; - if (dir_fh_len) - size += FANOTIFY_FH_HDR_LEN + dir_fh_len; - if (dir2_fh_len) - size += FANOTIFY_FH_HDR_LEN + dir2_fh_len; + size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len; if (child_fh_len) size += FANOTIFY_FH_HDR_LEN + child_fh_len; + if (file_name) + size += file_name->len + 1; fne = kmalloc(size, gfp); if (!fne) return NULL; fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME; fne->fsid = *fsid; - *hash ^= fanotify_hash_fsid(fsid); info = &fne->info; fanotify_info_init(info); - if (dir_fh_len) { - dfh = fanotify_info_dir_fh(info); - len = fanotify_encode_fh(dfh, dir, dir_fh_len, hash, 0); - fanotify_info_set_dir_fh(info, len); - } - if (dir2_fh_len) { - dfh = fanotify_info_dir2_fh(info); - len = fanotify_encode_fh(dfh, dir2, dir2_fh_len, hash, 0); - fanotify_info_set_dir2_fh(info, len); - } + dfh = fanotify_info_dir_fh(info); + info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, 0); if (child_fh_len) { ffh = fanotify_info_file_fh(info); - len = fanotify_encode_fh(ffh, child, child_fh_len, hash, 0); - fanotify_info_set_file_fh(info, len); - } - if (name_len) { - fanotify_info_copy_name(info, name); - *hash ^= full_name_hash((void *)name_len, name->name, name_len); - } - if (name2_len) { - fanotify_info_copy_name2(info, name2); - *hash ^= full_name_hash((void *)name2_len, name2->name, - name2_len); + info->file_fh_totlen = fanotify_encode_fh(ffh, child, child_fh_len, 0); } + if (file_name) + fanotify_info_copy_name(info, file_name); - pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", - __func__, size, dir_fh_len, child_fh_len, + pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n", + __func__, id->i_ino, size, dir_fh_len, child_fh_len, info->name_len, info->name_len, fanotify_info_name(info)); - if (dir2_fh_len) { - pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n", - __func__, dir2_fh_len, info->name2_len, - info->name2_len, fanotify_info_name2(info)); - } - return &fne->fae; } -static struct fanotify_event *fanotify_alloc_error_event( - struct fsnotify_group *group, - __kernel_fsid_t *fsid, - const void *data, int data_type, - unsigned int *hash) -{ - struct fs_error_report *report = - fsnotify_data_error_report(data, data_type); - struct inode *inode; - struct fanotify_error_event *fee; - int fh_len; - - if (WARN_ON_ONCE(!report)) - return NULL; - - fee = mempool_alloc(&group->fanotify_data.error_events_pool, GFP_NOFS); - if (!fee) - return NULL; - - fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR; - fee->error = report->error; - fee->err_count = 1; - fee->fsid = *fsid; - - inode = report->inode; - fh_len = fanotify_encode_fh_len(inode); - - /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */ - if (!fh_len && inode) - inode = NULL; - - fanotify_encode_fh(&fee->object_fh, inode, fh_len, NULL, 0); - - *hash ^= fanotify_hash_fsid(fsid); - - return &fee->fae; -} - -static struct fanotify_event *fanotify_alloc_event( - struct fsnotify_group *group, - u32 mask, const void *data, int data_type, - struct inode *dir, const struct qstr *file_name, - __kernel_fsid_t *fsid, u32 match_mask) +static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, + u32 mask, const void *data, + int data_type, struct inode *dir, + const struct qstr *file_name, + __kernel_fsid_t *fsid) { struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; - unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); - struct inode *id = fanotify_fid_inode(mask, data, data_type, dir, - fid_mode); + struct inode *id = fanotify_fid_inode(mask, data, data_type, dir); struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); const struct path *path = fsnotify_data_path(data, data_type); + unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); struct mem_cgroup *old_memcg; - struct dentry *moved = NULL; struct inode *child = NULL; bool name_event = false; - unsigned int hash = 0; - bool ondir = mask & FAN_ONDIR; - struct pid *pid; if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { /* - * For certain events and group flags, report the child fid + * With both flags FAN_REPORT_DIR_FID and FAN_REPORT_FID, we + * report the child fid for events reported on a non-dir child * in addition to reporting the parent fid and maybe child name. */ - if (fanotify_report_child_fid(fid_mode, mask) && id != dirid) + if ((fid_mode & FAN_REPORT_FID) && + id != dirid && !(mask & FAN_ONDIR)) child = id; id = dirid; @@ -750,41 +568,10 @@ static struct fanotify_event *fanotify_alloc_event( if (!(fid_mode & FAN_REPORT_NAME)) { name_event = !!child; file_name = NULL; - } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) { + } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || + !(mask & FAN_ONDIR)) { name_event = true; } - - /* - * In the special case of FAN_RENAME event, use the match_mask - * to determine if we need to report only the old parent+name, - * only the new parent+name or both. - * 'dirid' and 'file_name' are the old parent+name and - * 'moved' has the new parent+name. - */ - if (mask & FAN_RENAME) { - bool report_old, report_new; - - if (WARN_ON_ONCE(!match_mask)) - return NULL; - - /* Report both old and new parent+name if sb watching */ - report_old = report_new = - match_mask & (1U << FSNOTIFY_ITER_TYPE_SB); - report_old |= - match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE); - report_new |= - match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE2); - - if (!report_old) { - /* Do not report old parent+name */ - dirid = NULL; - file_name = NULL; - } - if (report_new) { - /* Report new parent+name */ - moved = fsnotify_data_dentry(data, data_type); - } - } } /* @@ -803,30 +590,28 @@ static struct fanotify_event *fanotify_alloc_event( if (fanotify_is_perm_event(mask)) { event = fanotify_alloc_perm_event(path, gfp); - } else if (fanotify_is_error_event(mask)) { - event = fanotify_alloc_error_event(group, fsid, data, - data_type, &hash); - } else if (name_event && (file_name || moved || child)) { - event = fanotify_alloc_name_event(dirid, fsid, file_name, child, - moved, &hash, gfp); + } else if (name_event && (file_name || child)) { + event = fanotify_alloc_name_event(id, fsid, file_name, child, + gfp); } else if (fid_mode) { - event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); + event = fanotify_alloc_fid_event(id, fsid, gfp); } else { - event = fanotify_alloc_path_event(path, &hash, gfp); + event = fanotify_alloc_path_event(path, gfp); } if (!event) goto out; + /* + * Use the victim inode instead of the watching inode as the id for + * event queue, so event reported on parent is merged with event + * reported on child when both directory and child watches exist. + */ + fanotify_init_event(event, (unsigned long)id, mask); if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) - pid = get_pid(task_pid(current)); + event->pid = get_pid(task_pid(current)); else - pid = get_pid(task_tgid(current)); - - /* Mix event info, FAN_ONDIR flag and pid into event merge key */ - hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS); - fanotify_init_event(event, hash, mask); - event->pid = pid; + event->pid = get_pid(task_tgid(current)); out: set_active_memcg(old_memcg); @@ -840,14 +625,16 @@ out: */ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) { - struct fsnotify_mark *mark; int type; __kernel_fsid_t fsid = {}; - fsnotify_foreach_iter_mark_type(iter_info, mark, type) { + fsnotify_foreach_obj_type(type) { struct fsnotify_mark_connector *conn; - conn = READ_ONCE(mark->connector); + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + + conn = READ_ONCE(iter_info->marks[type]->connector); /* Mark is just getting destroyed or created? */ if (!conn) continue; @@ -864,27 +651,6 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) return fsid; } -/* - * Add an event to hash table for faster merge. - */ -static void fanotify_insert_event(struct fsnotify_group *group, - struct fsnotify_event *fsn_event) -{ - struct fanotify_event *event = FANOTIFY_E(fsn_event); - unsigned int bucket = fanotify_event_hash_bucket(group, event); - struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; - - assert_spin_locked(&group->notification_lock); - - if (!fanotify_is_hashed_event(event->mask)) - return; - - pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, - group, event, bucket); - - hlist_add_head(&event->merge_list, hlist); -} - static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, const void *data, int data_type, struct inode *dir, @@ -895,7 +661,6 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, struct fanotify_event *event; struct fsnotify_event *fsn_event; __kernel_fsid_t fsid = {}; - u32 match_mask = 0; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); @@ -916,18 +681,15 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); - BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); - BUILD_BUG_ON(FAN_RENAME != FS_RENAME); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19); - mask = fanotify_group_event_mask(group, iter_info, &match_mask, - mask, data, data_type, dir); + mask = fanotify_group_event_mask(group, iter_info, mask, data, + data_type, dir); if (!mask) return 0; - pr_debug("%s: group=%p mask=%x report_mask=%x\n", __func__, - group, mask, match_mask); + pr_debug("%s: group=%p mask=%x\n", __func__, group, mask); if (fanotify_is_perm_event(mask)) { /* @@ -946,7 +708,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, } event = fanotify_alloc_event(group, mask, data, data_type, dir, - file_name, &fsid, match_mask); + file_name, &fsid); ret = -ENOMEM; if (unlikely(!event)) { /* @@ -959,8 +721,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, } fsn_event = &event->fse; - ret = fsnotify_insert_event(group, fsn_event, fanotify_merge, - fanotify_insert_event); + ret = fsnotify_add_event(group, fsn_event, fanotify_merge); if (ret) { /* Permission events shouldn't be merged */ BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); @@ -981,13 +742,11 @@ finish: static void fanotify_free_group_priv(struct fsnotify_group *group) { - kfree(group->fanotify_data.merge_hash); - if (group->fanotify_data.ucounts) - dec_ucount(group->fanotify_data.ucounts, - UCOUNT_FANOTIFY_GROUPS); + struct user_struct *user; - if (mempool_initialized(&group->fanotify_data.error_events_pool)) - mempool_exit(&group->fanotify_data.error_events_pool); + user = group->fanotify_data.user; + atomic_dec(&user->fanotify_listeners); + free_uid(user); } static void fanotify_free_path_event(struct fanotify_event *event) @@ -1016,16 +775,7 @@ static void fanotify_free_name_event(struct fanotify_event *event) kfree(FANOTIFY_NE(event)); } -static void fanotify_free_error_event(struct fsnotify_group *group, - struct fanotify_event *event) -{ - struct fanotify_error_event *fee = FANOTIFY_EE(event); - - mempool_free(fee, &group->fanotify_data.error_events_pool); -} - -static void fanotify_free_event(struct fsnotify_group *group, - struct fsnotify_event *fsn_event) +static void fanotify_free_event(struct fsnotify_event *fsn_event) { struct fanotify_event *event; @@ -1047,21 +797,11 @@ static void fanotify_free_event(struct fsnotify_group *group, case FANOTIFY_EVENT_TYPE_OVERFLOW: kfree(event); break; - case FANOTIFY_EVENT_TYPE_FS_ERROR: - fanotify_free_error_event(group, event); - break; default: WARN_ON_ONCE(1); } } -static void fanotify_freeing_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group) -{ - if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) - dec_ucount(group->fanotify_data.ucounts, UCOUNT_FANOTIFY_MARKS); -} - static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) { kmem_cache_free(fanotify_mark_cache, fsn_mark); @@ -1071,6 +811,5 @@ const struct fsnotify_ops fanotify_fsnotify_ops = { .handle_event = fanotify_handle_event, .free_group_priv = fanotify_free_group_priv, .free_event = fanotify_free_event, - .freeing_mark = fanotify_freeing_mark, .free_mark = fanotify_free_mark, }; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 57f51a9a3015..896c819a1786 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -3,7 +3,6 @@ #include #include #include -#include extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_fid_event_cachep; @@ -40,45 +39,15 @@ struct fanotify_fh { struct fanotify_info { /* size of dir_fh/file_fh including fanotify_fh hdr size */ u8 dir_fh_totlen; - u8 dir2_fh_totlen; u8 file_fh_totlen; u8 name_len; - u8 name2_len; - u8 pad[3]; + u8 pad; unsigned char buf[]; /* * (struct fanotify_fh) dir_fh starts at buf[0] - * (optional) dir2_fh starts at buf[dir_fh_totlen] - * (optional) file_fh starts at buf[dir_fh_totlen + dir2_fh_totlen] - * name starts at buf[dir_fh_totlen + dir2_fh_totlen + file_fh_totlen] - * ... + * (optional) file_fh starts at buf[dir_fh_totlen] + * name starts at buf[dir_fh_totlen + file_fh_totlen] */ -#define FANOTIFY_DIR_FH_SIZE(info) ((info)->dir_fh_totlen) -#define FANOTIFY_DIR2_FH_SIZE(info) ((info)->dir2_fh_totlen) -#define FANOTIFY_FILE_FH_SIZE(info) ((info)->file_fh_totlen) -#define FANOTIFY_NAME_SIZE(info) ((info)->name_len + 1) -#define FANOTIFY_NAME2_SIZE(info) ((info)->name2_len + 1) - -#define FANOTIFY_DIR_FH_OFFSET(info) 0 -#define FANOTIFY_DIR2_FH_OFFSET(info) \ - (FANOTIFY_DIR_FH_OFFSET(info) + FANOTIFY_DIR_FH_SIZE(info)) -#define FANOTIFY_FILE_FH_OFFSET(info) \ - (FANOTIFY_DIR2_FH_OFFSET(info) + FANOTIFY_DIR2_FH_SIZE(info)) -#define FANOTIFY_NAME_OFFSET(info) \ - (FANOTIFY_FILE_FH_OFFSET(info) + FANOTIFY_FILE_FH_SIZE(info)) -#define FANOTIFY_NAME2_OFFSET(info) \ - (FANOTIFY_NAME_OFFSET(info) + FANOTIFY_NAME_SIZE(info)) - -#define FANOTIFY_DIR_FH_BUF(info) \ - ((info)->buf + FANOTIFY_DIR_FH_OFFSET(info)) -#define FANOTIFY_DIR2_FH_BUF(info) \ - ((info)->buf + FANOTIFY_DIR2_FH_OFFSET(info)) -#define FANOTIFY_FILE_FH_BUF(info) \ - ((info)->buf + FANOTIFY_FILE_FH_OFFSET(info)) -#define FANOTIFY_NAME_BUF(info) \ - ((info)->buf + FANOTIFY_NAME_OFFSET(info)) -#define FANOTIFY_NAME2_BUF(info) \ - ((info)->buf + FANOTIFY_NAME2_OFFSET(info)) } __aligned(4); static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh) @@ -117,21 +86,7 @@ static inline struct fanotify_fh *fanotify_info_dir_fh(struct fanotify_info *inf { BUILD_BUG_ON(offsetof(struct fanotify_info, buf) % 4); - return (struct fanotify_fh *)FANOTIFY_DIR_FH_BUF(info); -} - -static inline int fanotify_info_dir2_fh_len(struct fanotify_info *info) -{ - if (!info->dir2_fh_totlen || - WARN_ON_ONCE(info->dir2_fh_totlen < FANOTIFY_FH_HDR_LEN)) - return 0; - - return info->dir2_fh_totlen - FANOTIFY_FH_HDR_LEN; -} - -static inline struct fanotify_fh *fanotify_info_dir2_fh(struct fanotify_info *info) -{ - return (struct fanotify_fh *)FANOTIFY_DIR2_FH_BUF(info); + return (struct fanotify_fh *)info->buf; } static inline int fanotify_info_file_fh_len(struct fanotify_info *info) @@ -145,90 +100,27 @@ static inline int fanotify_info_file_fh_len(struct fanotify_info *info) static inline struct fanotify_fh *fanotify_info_file_fh(struct fanotify_info *info) { - return (struct fanotify_fh *)FANOTIFY_FILE_FH_BUF(info); + return (struct fanotify_fh *)(info->buf + info->dir_fh_totlen); } -static inline char *fanotify_info_name(struct fanotify_info *info) +static inline const char *fanotify_info_name(struct fanotify_info *info) { - if (!info->name_len) - return NULL; - - return FANOTIFY_NAME_BUF(info); -} - -static inline char *fanotify_info_name2(struct fanotify_info *info) -{ - if (!info->name2_len) - return NULL; - - return FANOTIFY_NAME2_BUF(info); + return info->buf + info->dir_fh_totlen + info->file_fh_totlen; } static inline void fanotify_info_init(struct fanotify_info *info) { - BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN + MAX_HANDLE_SZ > U8_MAX); - BUILD_BUG_ON(NAME_MAX > U8_MAX); - info->dir_fh_totlen = 0; - info->dir2_fh_totlen = 0; info->file_fh_totlen = 0; info->name_len = 0; - info->name2_len = 0; -} - -/* These set/copy helpers MUST be called by order */ -static inline void fanotify_info_set_dir_fh(struct fanotify_info *info, - unsigned int totlen) -{ - if (WARN_ON_ONCE(info->dir2_fh_totlen > 0) || - WARN_ON_ONCE(info->file_fh_totlen > 0) || - WARN_ON_ONCE(info->name_len > 0) || - WARN_ON_ONCE(info->name2_len > 0)) - return; - - info->dir_fh_totlen = totlen; -} - -static inline void fanotify_info_set_dir2_fh(struct fanotify_info *info, - unsigned int totlen) -{ - if (WARN_ON_ONCE(info->file_fh_totlen > 0) || - WARN_ON_ONCE(info->name_len > 0) || - WARN_ON_ONCE(info->name2_len > 0)) - return; - - info->dir2_fh_totlen = totlen; -} - -static inline void fanotify_info_set_file_fh(struct fanotify_info *info, - unsigned int totlen) -{ - if (WARN_ON_ONCE(info->name_len > 0) || - WARN_ON_ONCE(info->name2_len > 0)) - return; - - info->file_fh_totlen = totlen; } static inline void fanotify_info_copy_name(struct fanotify_info *info, const struct qstr *name) { - if (WARN_ON_ONCE(name->len > NAME_MAX) || - WARN_ON_ONCE(info->name2_len > 0)) - return; - info->name_len = name->len; - strcpy(fanotify_info_name(info), name->name); -} - -static inline void fanotify_info_copy_name2(struct fanotify_info *info, - const struct qstr *name) -{ - if (WARN_ON_ONCE(name->len > NAME_MAX)) - return; - - info->name2_len = name->len; - strcpy(fanotify_info_name2(info), name->name); + strcpy(info->buf + info->dir_fh_totlen + info->file_fh_totlen, + name->name); } /* @@ -243,48 +135,29 @@ enum fanotify_event_type { FANOTIFY_EVENT_TYPE_PATH, FANOTIFY_EVENT_TYPE_PATH_PERM, FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ - FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */ - __FANOTIFY_EVENT_TYPE_NUM }; -#define FANOTIFY_EVENT_TYPE_BITS \ - (ilog2(__FANOTIFY_EVENT_TYPE_NUM - 1) + 1) -#define FANOTIFY_EVENT_HASH_BITS \ - (32 - FANOTIFY_EVENT_TYPE_BITS) - struct fanotify_event { struct fsnotify_event fse; - struct hlist_node merge_list; /* List for hashed merge */ u32 mask; - struct { - unsigned int type : FANOTIFY_EVENT_TYPE_BITS; - unsigned int hash : FANOTIFY_EVENT_HASH_BITS; - }; + enum fanotify_event_type type; struct pid *pid; }; static inline void fanotify_init_event(struct fanotify_event *event, - unsigned int hash, u32 mask) + unsigned long id, u32 mask) { - fsnotify_init_event(&event->fse); - INIT_HLIST_NODE(&event->merge_list); - event->hash = hash; + fsnotify_init_event(&event->fse, id); event->mask = mask; event->pid = NULL; } -#define FANOTIFY_INLINE_FH(name, size) \ -struct { \ - struct fanotify_fh (name); \ - /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \ - unsigned char _inline_fh_buf[(size)]; \ -} - struct fanotify_fid_event { struct fanotify_event fae; __kernel_fsid_t fsid; - - FANOTIFY_INLINE_FH(object_fh, FANOTIFY_INLINE_FH_LEN); + struct fanotify_fh object_fh; + /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */ + unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN]; }; static inline struct fanotify_fid_event * @@ -305,30 +178,12 @@ FANOTIFY_NE(struct fanotify_event *event) return container_of(event, struct fanotify_name_event, fae); } -struct fanotify_error_event { - struct fanotify_event fae; - s32 error; /* Error reported by the Filesystem. */ - u32 err_count; /* Suppressed errors count */ - - __kernel_fsid_t fsid; /* FSID this error refers to. */ - - FANOTIFY_INLINE_FH(object_fh, MAX_HANDLE_SZ); -}; - -static inline struct fanotify_error_event * -FANOTIFY_EE(struct fanotify_event *event) -{ - return container_of(event, struct fanotify_error_event, fae); -} - static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_FID) return &FANOTIFY_FE(event)->fsid; else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) return &FANOTIFY_NE(event)->fsid; - else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) - return &FANOTIFY_EE(event)->fsid; else return NULL; } @@ -340,8 +195,6 @@ static inline struct fanotify_fh *fanotify_event_object_fh( return &FANOTIFY_FE(event)->object_fh; else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) return fanotify_info_file_fh(&FANOTIFY_NE(event)->info); - else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) - return &FANOTIFY_EE(event)->object_fh; else return NULL; } @@ -373,37 +226,6 @@ static inline int fanotify_event_dir_fh_len(struct fanotify_event *event) return info ? fanotify_info_dir_fh_len(info) : 0; } -static inline int fanotify_event_dir2_fh_len(struct fanotify_event *event) -{ - struct fanotify_info *info = fanotify_event_info(event); - - return info ? fanotify_info_dir2_fh_len(info) : 0; -} - -static inline bool fanotify_event_has_object_fh(struct fanotify_event *event) -{ - /* For error events, even zeroed fh are reported. */ - if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR) - return true; - return fanotify_event_object_fh_len(event) > 0; -} - -static inline bool fanotify_event_has_dir_fh(struct fanotify_event *event) -{ - return fanotify_event_dir_fh_len(event) > 0; -} - -static inline bool fanotify_event_has_dir2_fh(struct fanotify_event *event) -{ - return fanotify_event_dir2_fh_len(event) > 0; -} - -static inline bool fanotify_event_has_any_dir_fh(struct fanotify_event *event) -{ - return fanotify_event_has_dir_fh(event) || - fanotify_event_has_dir2_fh(event); -} - struct fanotify_path_event { struct fanotify_event fae; struct path path; @@ -447,12 +269,13 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct fanotify_event, fse); } -static inline bool fanotify_is_error_event(u32 mask) +static inline bool fanotify_event_has_path(struct fanotify_event *event) { - return mask & FAN_FS_ERROR; + return event->type == FANOTIFY_EVENT_TYPE_PATH || + event->type == FANOTIFY_EVENT_TYPE_PATH_PERM; } -static inline const struct path *fanotify_event_path(struct fanotify_event *event) +static inline struct path *fanotify_event_path(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_PATH) return &FANOTIFY_PE(event)->path; @@ -461,40 +284,3 @@ static inline const struct path *fanotify_event_path(struct fanotify_event *even else return NULL; } - -/* - * Use 128 size hash table to speed up events merge. - */ -#define FANOTIFY_HTABLE_BITS (7) -#define FANOTIFY_HTABLE_SIZE (1 << FANOTIFY_HTABLE_BITS) -#define FANOTIFY_HTABLE_MASK (FANOTIFY_HTABLE_SIZE - 1) - -/* - * Permission events and overflow event do not get merged - don't hash them. - */ -static inline bool fanotify_is_hashed_event(u32 mask) -{ - return !(fanotify_is_perm_event(mask) || - fsnotify_is_overflow_event(mask)); -} - -static inline unsigned int fanotify_event_hash_bucket( - struct fsnotify_group *group, - struct fanotify_event *event) -{ - return event->hash & FANOTIFY_HTABLE_MASK; -} - -static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) -{ - unsigned int mflags = 0; - - if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) - mflags |= FAN_MARK_IGNORED_SURV_MODIFY; - if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF) - mflags |= FAN_MARK_EVICTABLE; - if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) - mflags |= FAN_MARK_IGNORE; - - return mflags; -} diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 5302313f28be..84de9f97bbc0 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include #include @@ -28,62 +27,8 @@ #include "fanotify.h" #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 -#define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192 -#define FANOTIFY_DEFAULT_MAX_GROUPS 128 -#define FANOTIFY_DEFAULT_FEE_POOL_SIZE 32 - -/* - * Legacy fanotify marks limits (8192) is per group and we introduced a tunable - * limit of marks per user, similar to inotify. Effectively, the legacy limit - * of fanotify marks per user is * . - * This default limit (1M) also happens to match the increased limit of inotify - * max_user_watches since v5.10. - */ -#define FANOTIFY_DEFAULT_MAX_USER_MARKS \ - (FANOTIFY_OLD_DEFAULT_MAX_MARKS * FANOTIFY_DEFAULT_MAX_GROUPS) - -/* - * Most of the memory cost of adding an inode mark is pinning the marked inode. - * The size of the filesystem inode struct is not uniform across filesystems, - * so double the size of a VFS inode is used as a conservative approximation. - */ -#define INODE_MARK_COST (2 * sizeof(struct inode)) - -/* configurable via /proc/sys/fs/fanotify/ */ -static int fanotify_max_queued_events __read_mostly; - -#ifdef CONFIG_SYSCTL - -#include - -struct ctl_table fanotify_table[] = { - { - .procname = "max_user_groups", - .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, - { - .procname = "max_user_marks", - .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS], - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, - { - .procname = "max_queued_events", - .data = &fanotify_max_queued_events, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO - }, - { } -}; -#endif /* CONFIG_SYSCTL */ +#define FANOTIFY_DEFAULT_MAX_MARKS 8192 +#define FANOTIFY_DEFAULT_MAX_LISTENERS 128 /* * All flags that may be specified in parameter event_f_flags of fanotify_init. @@ -106,12 +51,8 @@ struct kmem_cache *fanotify_path_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; #define FANOTIFY_EVENT_ALIGN 4 -#define FANOTIFY_FID_INFO_HDR_LEN \ +#define FANOTIFY_INFO_HDR_LEN \ (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) -#define FANOTIFY_PIDFD_INFO_HDR_LEN \ - sizeof(struct fanotify_event_info_pidfd) -#define FANOTIFY_ERROR_INFO_LEN \ - (sizeof(struct fanotify_event_info_error)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -120,45 +61,21 @@ static int fanotify_fid_info_len(int fh_len, int name_len) if (name_len) info_len += name_len + 1; - return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len, - FANOTIFY_EVENT_ALIGN); + return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN); } -/* FAN_RENAME may have one or two dir+name info records */ -static int fanotify_dir_name_info_len(struct fanotify_event *event) +static int fanotify_event_info_len(unsigned int fid_mode, + struct fanotify_event *event) { struct fanotify_info *info = fanotify_event_info(event); int dir_fh_len = fanotify_event_dir_fh_len(event); - int dir2_fh_len = fanotify_event_dir2_fh_len(event); + int fh_len = fanotify_event_object_fh_len(event); int info_len = 0; - - if (dir_fh_len) - info_len += fanotify_fid_info_len(dir_fh_len, - info->name_len); - if (dir2_fh_len) - info_len += fanotify_fid_info_len(dir2_fh_len, - info->name2_len); - - return info_len; -} - -static size_t fanotify_event_len(unsigned int info_mode, - struct fanotify_event *event) -{ - size_t event_len = FAN_EVENT_METADATA_LEN; - int fh_len; int dot_len = 0; - if (!info_mode) - return event_len; - - if (fanotify_is_error_event(event->mask)) - event_len += FANOTIFY_ERROR_INFO_LEN; - - if (fanotify_event_has_any_dir_fh(event)) { - event_len += fanotify_dir_name_info_len(event); - } else if ((info_mode & FAN_REPORT_NAME) && - (event->mask & FAN_ONDIR)) { + if (dir_fh_len) { + info_len += fanotify_fid_info_len(dir_fh_len, info->name_len); + } else if ((fid_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) { /* * With group flag FAN_REPORT_NAME, if name was not recorded in * event on a directory, we will report the name ".". @@ -166,32 +83,10 @@ static size_t fanotify_event_len(unsigned int info_mode, dot_len = 1; } - if (info_mode & FAN_REPORT_PIDFD) - event_len += FANOTIFY_PIDFD_INFO_HDR_LEN; + if (fh_len) + info_len += fanotify_fid_info_len(fh_len, dot_len); - if (fanotify_event_has_object_fh(event)) { - fh_len = fanotify_event_object_fh_len(event); - event_len += fanotify_fid_info_len(fh_len, dot_len); - } - - return event_len; -} - -/* - * Remove an hashed event from merge hash table. - */ -static void fanotify_unhash_event(struct fsnotify_group *group, - struct fanotify_event *event) -{ - assert_spin_locked(&group->notification_lock); - - pr_debug("%s: group=%p event=%p bucket=%u\n", __func__, - group, event, fanotify_event_hash_bucket(group, event)); - - if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list))) - return; - - hlist_del_init(&event->merge_list); + return info_len; } /* @@ -203,41 +98,34 @@ static void fanotify_unhash_event(struct fsnotify_group *group, static struct fanotify_event *get_one_event(struct fsnotify_group *group, size_t count) { - size_t event_size; + size_t event_size = FAN_EVENT_METADATA_LEN; struct fanotify_event *event = NULL; - struct fsnotify_event *fsn_event; - unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); + unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); pr_debug("%s: group=%p count=%zd\n", __func__, group, count); spin_lock(&group->notification_lock); - fsn_event = fsnotify_peek_first_event(group); - if (!fsn_event) + if (fsnotify_notify_queue_is_empty(group)) goto out; - event = FANOTIFY_E(fsn_event); - event_size = fanotify_event_len(info_mode, event); + if (fid_mode) { + event_size += fanotify_event_info_len(fid_mode, + FANOTIFY_E(fsnotify_peek_first_event(group))); + } if (event_size > count) { event = ERR_PTR(-EINVAL); goto out; } - - /* - * Held the notification_lock the whole time, so this is the - * same event we peeked above. - */ - fsnotify_remove_first_event(group); + event = FANOTIFY_E(fsnotify_remove_first_event(group)); if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; - if (fanotify_is_hashed_event(event->mask)) - fanotify_unhash_event(group, event); out: spin_unlock(&group->notification_lock); return event; } -static int create_fd(struct fsnotify_group *group, const struct path *path, +static int create_fd(struct fsnotify_group *group, struct path *path, struct file **file) { int client_fd; @@ -252,7 +140,7 @@ static int create_fd(struct fsnotify_group *group, const struct path *path, * originally opened O_WRONLY. */ new_file = dentry_open(path, - group->fanotify_data.f_flags | __FMODE_NONOTIFY, + group->fanotify_data.f_flags | FMODE_NONOTIFY, current_cred()); if (IS_ERR(new_file)) { /* @@ -337,31 +225,9 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } -static size_t copy_error_info_to_user(struct fanotify_event *event, - char __user *buf, int count) -{ - struct fanotify_event_info_error info = { }; - struct fanotify_error_event *fee = FANOTIFY_EE(event); - - info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR; - info.hdr.len = FANOTIFY_ERROR_INFO_LEN; - - if (WARN_ON(count < info.hdr.len)) - return -EFAULT; - - info.error = fee->error; - info.error_count = fee->err_count; - - if (copy_to_user(buf, &info, sizeof(info))) - return -EFAULT; - - return info.hdr.len; -} - -static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, - int info_type, const char *name, - size_t name_len, - char __user *buf, size_t count) +static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, + int info_type, const char *name, size_t name_len, + char __user *buf, size_t count) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; @@ -373,6 +239,9 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", __func__, fh_len, name_len, info_len, count); + if (!fh_len) + return 0; + if (WARN_ON_ONCE(len < sizeof(info) || len > count)) return -EFAULT; @@ -387,8 +256,6 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, return -EFAULT; break; case FAN_EVENT_INFO_TYPE_DFID_NAME: - case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME: - case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME: if (WARN_ON_ONCE(!name || !name_len)) return -EFAULT; break; @@ -409,11 +276,6 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, handle.handle_type = fh->type; handle.handle_bytes = fh_len; - - /* Mangle handle_type for bad file_handle */ - if (!fh_len) - handle.handle_type = FILEID_INVALID; - if (copy_to_user(buf, &handle, sizeof(handle))) return -EFAULT; @@ -458,79 +320,68 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, return info_len; } -static int copy_pidfd_info_to_user(int pidfd, - char __user *buf, - size_t count) +static ssize_t copy_event_to_user(struct fsnotify_group *group, + struct fanotify_event *event, + char __user *buf, size_t count) { - struct fanotify_event_info_pidfd info = { }; - size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN; + struct fanotify_event_metadata metadata; + struct path *path = fanotify_event_path(event); + struct fanotify_info *info = fanotify_event_info(event); + unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); + struct file *f = NULL; + int ret, fd = FAN_NOFD; + int info_type = 0; - if (WARN_ON_ONCE(info_len > count)) - return -EFAULT; + pr_debug("%s: group=%p event=%p\n", __func__, group, event); - info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD; - info.hdr.len = info_len; - info.pidfd = pidfd; + metadata.event_len = FAN_EVENT_METADATA_LEN + + fanotify_event_info_len(fid_mode, event); + metadata.metadata_len = FAN_EVENT_METADATA_LEN; + metadata.vers = FANOTIFY_METADATA_VERSION; + metadata.reserved = 0; + metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; + metadata.pid = pid_vnr(event->pid); - if (copy_to_user(buf, &info, info_len)) - return -EFAULT; - - return info_len; -} - -static int copy_info_records_to_user(struct fanotify_event *event, - struct fanotify_info *info, - unsigned int info_mode, int pidfd, - char __user *buf, size_t count) -{ - int ret, total_bytes = 0, info_type = 0; - unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS; - unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; + if (path && path->mnt && path->dentry) { + fd = create_fd(group, path, &f); + if (fd < 0) + return fd; + } + metadata.fd = fd; + ret = -EFAULT; /* - * Event info records order is as follows: - * 1. dir fid + name - * 2. (optional) new dir fid + new name - * 3. (optional) child fid + * Sanity check copy size in case get_one_event() and + * event_len sizes ever get out of sync. */ - if (fanotify_event_has_dir_fh(event)) { + if (WARN_ON_ONCE(metadata.event_len > count)) + goto out_close_fd; + + if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) + goto out_close_fd; + + buf += FAN_EVENT_METADATA_LEN; + count -= FAN_EVENT_METADATA_LEN; + + if (fanotify_is_perm_event(event->mask)) + FANOTIFY_PERM(event)->fd = fd; + + /* Event info records order is: dir fid + name, child fid */ + if (fanotify_event_dir_fh_len(event)) { info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : FAN_EVENT_INFO_TYPE_DFID; - - /* FAN_RENAME uses special info types */ - if (event->mask & FAN_RENAME) - info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME; - - ret = copy_fid_info_to_user(fanotify_event_fsid(event), - fanotify_info_dir_fh(info), - info_type, - fanotify_info_name(info), - info->name_len, buf, count); + ret = copy_info_to_user(fanotify_event_fsid(event), + fanotify_info_dir_fh(info), + info_type, fanotify_info_name(info), + info->name_len, buf, count); if (ret < 0) - return ret; + goto out_close_fd; buf += ret; count -= ret; - total_bytes += ret; } - /* New dir fid+name may be reported in addition to old dir fid+name */ - if (fanotify_event_has_dir2_fh(event)) { - info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME; - ret = copy_fid_info_to_user(fanotify_event_fsid(event), - fanotify_info_dir2_fh(info), - info_type, - fanotify_info_name2(info), - info->name2_len, buf, count); - if (ret < 0) - return ret; - - buf += ret; - count -= ret; - total_bytes += ret; - } - - if (fanotify_event_has_object_fh(event)) { + if (fanotify_event_object_fh_len(event)) { const char *dot = NULL; int dot_len = 0; @@ -544,8 +395,8 @@ static int copy_info_records_to_user(struct fanotify_event *event, (event->mask & FAN_ONDIR)) { /* * With group flag FAN_REPORT_NAME, if name was not - * recorded in an event on a directory, report the name - * "." with info type DFID_NAME. + * recorded in an event on a directory, report the + * name "." with info type DFID_NAME. */ info_type = FAN_EVENT_INFO_TYPE_DFID_NAME; dot = "."; @@ -568,132 +419,14 @@ static int copy_info_records_to_user(struct fanotify_event *event, info_type = FAN_EVENT_INFO_TYPE_FID; } - ret = copy_fid_info_to_user(fanotify_event_fsid(event), - fanotify_event_object_fh(event), - info_type, dot, dot_len, - buf, count); - if (ret < 0) - return ret; - - buf += ret; - count -= ret; - total_bytes += ret; - } - - if (pidfd_mode) { - ret = copy_pidfd_info_to_user(pidfd, buf, count); - if (ret < 0) - return ret; - - buf += ret; - count -= ret; - total_bytes += ret; - } - - if (fanotify_is_error_event(event->mask)) { - ret = copy_error_info_to_user(event, buf, count); - if (ret < 0) - return ret; - buf += ret; - count -= ret; - total_bytes += ret; - } - - return total_bytes; -} - -static ssize_t copy_event_to_user(struct fsnotify_group *group, - struct fanotify_event *event, - char __user *buf, size_t count) -{ - struct fanotify_event_metadata metadata; - const struct path *path = fanotify_event_path(event); - struct fanotify_info *info = fanotify_event_info(event); - unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); - unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; - struct file *f = NULL; - int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD; - - pr_debug("%s: group=%p event=%p\n", __func__, group, event); - - metadata.event_len = fanotify_event_len(info_mode, event); - metadata.metadata_len = FAN_EVENT_METADATA_LEN; - metadata.vers = FANOTIFY_METADATA_VERSION; - metadata.reserved = 0; - metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; - metadata.pid = pid_vnr(event->pid); - /* - * For an unprivileged listener, event->pid can be used to identify the - * events generated by the listener process itself, without disclosing - * the pids of other processes. - */ - if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && - task_tgid(current) != event->pid) - metadata.pid = 0; - - /* - * For now, fid mode is required for an unprivileged listener and - * fid mode does not report fd in events. Keep this check anyway - * for safety in case fid mode requirement is relaxed in the future - * to allow unprivileged listener to get events with no fd and no fid. - */ - if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && - path && path->mnt && path->dentry) { - fd = create_fd(group, path, &f); - if (fd < 0) - return fd; - } - metadata.fd = fd; - - if (pidfd_mode) { - /* - * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual - * exclusion is ever lifted. At the time of incoporating pidfd - * support within fanotify, the pidfd API only supported the - * creation of pidfds for thread-group leaders. - */ - WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID)); - - /* - * The PIDTYPE_TGID check for an event->pid is performed - * preemptively in an attempt to catch out cases where the event - * listener reads events after the event generating process has - * already terminated. Report FAN_NOPIDFD to the event listener - * in those cases, with all other pidfd creation errors being - * reported as FAN_EPIDFD. - */ - if (metadata.pid == 0 || - !pid_has_task(event->pid, PIDTYPE_TGID)) { - pidfd = FAN_NOPIDFD; - } else { - pidfd = pidfd_create(event->pid, 0); - if (pidfd < 0) - pidfd = FAN_EPIDFD; - } - } - - ret = -EFAULT; - /* - * Sanity check copy size in case get_one_event() and - * event_len sizes ever get out of sync. - */ - if (WARN_ON_ONCE(metadata.event_len > count)) - goto out_close_fd; - - if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) - goto out_close_fd; - - buf += FAN_EVENT_METADATA_LEN; - count -= FAN_EVENT_METADATA_LEN; - - if (fanotify_is_perm_event(event->mask)) - FANOTIFY_PERM(event)->fd = fd; - - if (info_mode) { - ret = copy_info_records_to_user(event, info, info_mode, pidfd, - buf, count); + ret = copy_info_to_user(fanotify_event_fsid(event), + fanotify_event_object_fh(event), + info_type, dot, dot_len, buf, count); if (ret < 0) goto out_close_fd; + + buf += ret; + count -= ret; } if (f) @@ -706,10 +439,6 @@ out_close_fd: put_unused_fd(fd); fput(f); } - - if (pidfd >= 0) - close_fd(pidfd); - return ret; } @@ -844,7 +573,6 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct fsnotify_event *fsn_event; /* * Stop new events from arriving in the notification queue. since @@ -873,12 +601,13 @@ static int fanotify_release(struct inode *ignored, struct file *file) * dequeue them and set the response. They will be freed once the * response is consumed and fanotify_get_response() returns. */ - while ((fsn_event = fsnotify_remove_first_event(group))) { - struct fanotify_event *event = FANOTIFY_E(fsn_event); + while (!fsnotify_notify_queue_is_empty(group)) { + struct fanotify_event *event; + event = FANOTIFY_E(fsnotify_remove_first_event(group)); if (!(event->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); - fsnotify_destroy_event(group, fsn_event); + fsnotify_destroy_event(group, &event->fse); } else { finish_permission_event(group, FANOTIFY_PERM(event), FAN_ALLOW); @@ -973,7 +702,7 @@ static int fanotify_find_path(int dfd, const char __user *filename, } /* you can only watch an inode if you have read permissions on it */ - ret = path_permission(path, MAY_READ); + ret = inode_permission(path->dentry->d_inode, MAY_READ); if (ret) { path_put(path); goto out; @@ -991,28 +720,27 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u32 mask, unsigned int flags, __u32 umask, int *destroy) { - __u32 oldmask, newmask; + __u32 oldmask = 0; /* umask bits cannot be removed by user */ mask &= ~umask; spin_lock(&fsn_mark->lock); - oldmask = fsnotify_calc_mask(fsn_mark); - if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) { + if (!(flags & FAN_MARK_IGNORED_MASK)) { + oldmask = fsn_mark->mask; fsn_mark->mask &= ~mask; } else { - fsn_mark->ignore_mask &= ~mask; + fsn_mark->ignored_mask &= ~mask; } - newmask = fsnotify_calc_mask(fsn_mark); /* * We need to keep the mark around even if remaining mask cannot * result in any events (e.g. mask == FAN_ONDIR) to support incremenal * changes to the mask. * Destroy mark when only umask bits remain. */ - *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask); + *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask); spin_unlock(&fsn_mark->lock); - return oldmask & ~newmask; + return mask & oldmask; } static int fanotify_remove_mark(struct fsnotify_group *group, @@ -1023,10 +751,10 @@ static int fanotify_remove_mark(struct fsnotify_group *group, __u32 removed; int destroy_mark; - fsnotify_group_lock(group); + mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); return -ENOENT; } @@ -1036,7 +764,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group, fsnotify_recalc_mask(fsn_mark->connector); if (destroy_mark) fsnotify_detach_mark(fsn_mark); - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); if (destroy_mark) fsnotify_free_mark(fsn_mark); @@ -1069,199 +797,76 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, flags, umask); } -static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, - unsigned int fan_flags) +static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, + __u32 mask, + unsigned int flags) { - bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE); - unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS; - bool recalc = false; - - /* - * When using FAN_MARK_IGNORE for the first time, mark starts using - * independent event flags in ignore mask. After that, trying to - * update the ignore mask with the old FAN_MARK_IGNORED_MASK API - * will result in EEXIST error. - */ - if (ignore == FAN_MARK_IGNORE) - fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS; - - /* - * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to - * the removal of the FS_MODIFY bit in calculated mask if it was set - * because of an ignore mask that is now going to survive FS_MODIFY. - */ - if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && - !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) { - fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; - if (!(fsn_mark->mask & FS_MODIFY)) - recalc = true; - } - - if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE || - want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) - return recalc; - - /* - * NO_IREF may be removed from a mark, but not added. - * When removed, fsnotify_recalc_mask() will take the inode ref. - */ - WARN_ON_ONCE(!want_iref); - fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF; - - return true; -} - -static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, - __u32 mask, unsigned int fan_flags) -{ - bool recalc; + __u32 oldmask = -1; spin_lock(&fsn_mark->lock); - if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS)) + if (!(flags & FAN_MARK_IGNORED_MASK)) { + oldmask = fsn_mark->mask; fsn_mark->mask |= mask; - else - fsn_mark->ignore_mask |= mask; - - recalc = fsnotify_calc_mask(fsn_mark) & - ~fsnotify_conn_mask(fsn_mark->connector); - - recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags); + } else { + fsn_mark->ignored_mask |= mask; + if (flags & FAN_MARK_IGNORED_SURV_MODIFY) + fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; + } spin_unlock(&fsn_mark->lock); - return recalc; + return mask & ~oldmask; } static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, - unsigned int obj_type, - unsigned int fan_flags, + unsigned int type, __kernel_fsid_t *fsid) { - struct ucounts *ucounts = group->fanotify_data.ucounts; struct fsnotify_mark *mark; int ret; - /* - * Enforce per user marks limits per user in all containing user ns. - * A group with FAN_UNLIMITED_MARKS does not contribute to mark count - * in the limited groups account. - */ - if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && - !inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_FANOTIFY_MARKS)) + if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) return ERR_PTR(-ENOSPC); mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); - if (!mark) { - ret = -ENOMEM; - goto out_dec_ucounts; - } + if (!mark) + return ERR_PTR(-ENOMEM); fsnotify_init_mark(mark, group); - if (fan_flags & FAN_MARK_EVICTABLE) - mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF; - - ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid); + ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); if (ret) { fsnotify_put_mark(mark); - goto out_dec_ucounts; + return ERR_PTR(ret); } return mark; - -out_dec_ucounts: - if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) - dec_ucount(ucounts, UCOUNT_FANOTIFY_MARKS); - return ERR_PTR(ret); } -static int fanotify_group_init_error_pool(struct fsnotify_group *group) -{ - if (mempool_initialized(&group->fanotify_data.error_events_pool)) - return 0; - - return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool, - FANOTIFY_DEFAULT_FEE_POOL_SIZE, - sizeof(struct fanotify_error_event)); -} - -static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, - unsigned int fan_flags) -{ - /* - * Non evictable mark cannot be downgraded to evictable mark. - */ - if (fan_flags & FAN_MARK_EVICTABLE && - !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) - return -EEXIST; - - /* - * New ignore mask semantics cannot be downgraded to old semantics. - */ - if (fan_flags & FAN_MARK_IGNORED_MASK && - fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) - return -EEXIST; - - /* - * An ignore mask that survives modify could never be downgraded to not - * survive modify. With new FAN_MARK_IGNORE semantics we make that rule - * explicit and return an error when trying to update the ignore mask - * without the original FAN_MARK_IGNORED_SURV_MODIFY value. - */ - if (fan_flags & FAN_MARK_IGNORE && - !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && - fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) - return -EEXIST; - - return 0; -} static int fanotify_add_mark(struct fsnotify_group *group, - fsnotify_connp_t *connp, unsigned int obj_type, - __u32 mask, unsigned int fan_flags, + fsnotify_connp_t *connp, unsigned int type, + __u32 mask, unsigned int flags, __kernel_fsid_t *fsid) { struct fsnotify_mark *fsn_mark; - bool recalc; - int ret = 0; + __u32 added; - fsnotify_group_lock(group); + mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, obj_type, - fan_flags, fsid); + fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); if (IS_ERR(fsn_mark)) { - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); return PTR_ERR(fsn_mark); } } - - /* - * Check if requested mark flags conflict with an existing mark flags. - */ - ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags); - if (ret) - goto out; - - /* - * Error events are pre-allocated per group, only if strictly - * needed (i.e. FAN_FS_ERROR was requested). - */ - if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) && - (mask & FAN_FS_ERROR)) { - ret = fanotify_group_init_error_pool(group); - if (ret) - goto out; - } - - recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags); - if (recalc) + added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); + if (added & ~fsnotify_conn_mask(fsn_mark->connector)) fsnotify_recalc_mask(fsn_mark->connector); - -out: - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); - return ret; + return 0; } static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, @@ -1288,10 +893,10 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, /* * If some other task has this inode open for write we should not add - * an ignore mask, unless that ignore mask is supposed to survive + * an ignored mark, unless that ignored mark is supposed to survive * modification changes anyway. */ - if ((flags & FANOTIFY_MARK_IGNORE_BITS) && + if ((flags & FAN_MARK_IGNORED_MASK) && !(flags & FAN_MARK_IGNORED_SURV_MODIFY) && inode_is_open_for_write(inode)) return 0; @@ -1314,49 +919,20 @@ static struct fsnotify_event *fanotify_alloc_overflow_event(void) return &oevent->fse; } -static struct hlist_head *fanotify_alloc_merge_hash(void) -{ - struct hlist_head *hash; - - hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS, - GFP_KERNEL_ACCOUNT); - if (!hash) - return NULL; - - __hash_init(hash, FANOTIFY_HTABLE_SIZE); - - return hash; -} - /* fanotify syscalls */ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) { struct fsnotify_group *group; int f_flags, fd; + struct user_struct *user; unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; - unsigned int internal_flags = 0; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); - if (!capable(CAP_SYS_ADMIN)) { - /* - * An unprivileged user can setup an fanotify group with - * limited functionality - an unprivileged group is limited to - * notification events with file handles and it cannot use - * unlimited queue/marks. - */ - if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode) - return -EPERM; - - /* - * Setting the internal flag FANOTIFY_UNPRIV on the group - * prevents setting mount/filesystem marks on this group and - * prevents reporting pid and open fd in events. - */ - internal_flags |= FANOTIFY_UNPRIV; - } + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; #ifdef CONFIG_AUDITSYSCALL if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) @@ -1365,14 +941,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) #endif return -EINVAL; - /* - * A pidfd can only be returned for a thread-group leader; thus - * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually - * exclusive. - */ - if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) - return -EINVAL; - if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) return -EINVAL; @@ -1395,46 +963,30 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) return -EINVAL; - /* - * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID - * and is used as an indication to report both dir and child fid on all - * dirent events. - */ - if ((fid_mode & FAN_REPORT_TARGET_FID) && - (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) - return -EINVAL; + user = get_current_user(); + if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { + free_uid(user); + return -EMFILE; + } - f_flags = O_RDWR | __FMODE_NONOTIFY; + f_flags = O_RDWR | FMODE_NONOTIFY; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) f_flags |= O_NONBLOCK; /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ - group = fsnotify_alloc_group(&fanotify_fsnotify_ops, - FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS); + group = fsnotify_alloc_group(&fanotify_fsnotify_ops); if (IS_ERR(group)) { + free_uid(user); return PTR_ERR(group); } - /* Enforce groups limits per user in all containing user ns */ - group->fanotify_data.ucounts = inc_ucount(current_user_ns(), - current_euid(), - UCOUNT_FANOTIFY_GROUPS); - if (!group->fanotify_data.ucounts) { - fd = -EMFILE; - goto out_destroy_group; - } - - group->fanotify_data.flags = flags | internal_flags; + group->fanotify_data.user = user; + group->fanotify_data.flags = flags; + atomic_inc(&user->fanotify_listeners); group->memcg = get_mem_cgroup_from_mm(current->mm); - group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); - if (!group->fanotify_data.merge_hash) { - fd = -ENOMEM; - goto out_destroy_group; - } - group->overflow_event = fanotify_alloc_overflow_event(); if (unlikely(!group->overflow_event)) { fd = -ENOMEM; @@ -1467,13 +1019,16 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_destroy_group; group->max_events = UINT_MAX; } else { - group->max_events = fanotify_max_queued_events; + group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS; } if (flags & FAN_UNLIMITED_MARKS) { fd = -EPERM; if (!capable(CAP_SYS_ADMIN)) goto out_destroy_group; + group->fanotify_data.max_marks = UINT_MAX; + } else { + group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS; } if (flags & FAN_ENABLE_AUDIT) { @@ -1493,15 +1048,16 @@ out_destroy_group: return fd; } -static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) +/* Check if filesystem can encode a unique fid */ +static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) { __kernel_fsid_t root_fsid; int err; /* - * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). + * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). */ - err = vfs_get_fsid(dentry, fsid); + err = vfs_get_fsid(path->dentry, fsid); if (err) return err; @@ -1509,10 +1065,10 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) return -ENODEV; /* - * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) + * Make sure path is not inside a filesystem subvolume (e.g. btrfs) * which uses a different fsid than sb root. */ - err = vfs_get_fsid(dentry->d_sb->s_root, &root_fsid); + err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid); if (err) return err; @@ -1520,12 +1076,6 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid) root_fsid.val[1] != fsid->val[1]) return -EXDEV; - return 0; -} - -/* Check if filesystem can encode a unique fid */ -static int fanotify_test_fid(struct dentry *dentry) -{ /* * We need to make sure that the file system supports at least * encoding a file handle so user can use name_to_handle_at() to @@ -1533,22 +1083,17 @@ static int fanotify_test_fid(struct dentry *dentry) * objects. However, name_to_handle_at() requires that the * filesystem also supports decoding file handles. */ - if (!dentry->d_sb->s_export_op || - !dentry->d_sb->s_export_op->fh_to_dentry) + if (!path->dentry->d_sb->s_export_op || + !path->dentry->d_sb->s_export_op->fh_to_dentry) return -EOPNOTSUPP; return 0; } -static int fanotify_events_supported(struct fsnotify_group *group, - const struct path *path, __u64 mask, +static int fanotify_events_supported(struct path *path, __u64 mask, unsigned int flags) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; - /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ - bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || - (mask & FAN_RENAME) || - (flags & FAN_MARK_IGNORE); /* * Some filesystems such as 'proc' acquire unusual locks when opening @@ -1576,15 +1121,6 @@ static int fanotify_events_supported(struct fsnotify_group *group, path->mnt->mnt_sb->s_flags & SB_NOUSER) return -EINVAL; - /* - * We shouldn't have allowed setting dirent events and the directory - * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, - * but because we always allowed it, error only when using new APIs. - */ - if (strict_dir_events && mark_type == FAN_MARK_INODE && - !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) - return -ENOTDIR; - return 0; } @@ -1599,8 +1135,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, __kernel_fsid_t __fsid, *fsid = NULL; u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; - unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; - unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; + bool ignored = flags & FAN_MARK_IGNORED_MASK; unsigned int obj_type, fid_mode; u32 umask = 0; int ret; @@ -1609,7 +1144,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, __func__, fanotify_fd, flags, dfd, pathname, mask); /* we only use the lower 32 bits as of right now. */ - if (upper_32_bits(mask)) + if (mask & ((__u64)0xffffffff << 32)) return -EINVAL; if (flags & ~FANOTIFY_MARK_FLAGS) @@ -1629,7 +1164,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; } - switch (mark_cmd) { + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { case FAN_MARK_ADD: case FAN_MARK_REMOVE: if (!mask) @@ -1649,19 +1184,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (mask & ~valid_mask) return -EINVAL; - - /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */ - if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK)) - return -EINVAL; - - /* - * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with - * FAN_MARK_IGNORED_MASK. - */ - if (ignore == FAN_MARK_IGNORED_MASK) { + /* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */ + if (ignored) mask &= ~FANOTIFY_EVENT_FLAGS; - umask = FANOTIFY_EVENT_FLAGS; - } f = fdget(fanotify_fd); if (unlikely(!f.file)) @@ -1673,17 +1198,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; group = f.file->private_data; - /* - * An unprivileged user is not allowed to setup mount nor filesystem - * marks. This also includes setting up such marks by a group that - * was initialized by an unprivileged user. - */ - ret = -EPERM; - if ((!capable(CAP_SYS_ADMIN) || - FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) && - mark_type != FAN_MARK_INODE) - goto fput_and_out; - /* * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not * allowed to set permissions events. @@ -1693,39 +1207,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, group->priority == FS_PRIO_0) goto fput_and_out; - if (mask & FAN_FS_ERROR && - mark_type != FAN_MARK_FILESYSTEM) - goto fput_and_out; - /* - * Evictable is only relevant for inode marks, because only inode object - * can be evicted on memory pressure. - */ - if (flags & FAN_MARK_EVICTABLE && - mark_type != FAN_MARK_INODE) - goto fput_and_out; - - /* - * Events that do not carry enough information to report - * event->fd require a group that supports reporting fid. Those - * events are not supported on a mount mark, because they do not - * carry enough information (i.e. path) to be filtered by mount - * point. + * Events with data type inode do not carry enough information to report + * event->fd, so we do not allow setting a mask for inode events unless + * group supports reporting fid. + * inode events are not supported on a mount mark, because they do not + * carry enough information (i.e. path) to be filtered by mount point. */ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); - if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && + if (mask & FANOTIFY_INODE_EVENTS && (!fid_mode || mark_type == FAN_MARK_MOUNT)) goto fput_and_out; - /* - * FAN_RENAME uses special info type records to report the old and - * new parent+name. Reporting only old and new parent id is less - * useful and was not implemented. - */ - if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) - goto fput_and_out; - - if (mark_cmd == FAN_MARK_FLUSH) { + if (flags & FAN_MARK_FLUSH) { ret = 0; if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); @@ -1741,18 +1235,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; - if (mark_cmd == FAN_MARK_ADD) { - ret = fanotify_events_supported(group, &path, mask, flags); + if (flags & FAN_MARK_ADD) { + ret = fanotify_events_supported(&path, mask, flags); if (ret) goto path_put_and_out; } if (fid_mode) { - ret = fanotify_test_fsid(path.dentry, &__fsid); - if (ret) - goto path_put_and_out; - - ret = fanotify_test_fid(path.dentry); + ret = fanotify_test_fid(&path, &__fsid); if (ret) goto path_put_and_out; @@ -1765,13 +1255,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, else mnt = path.mnt; - ret = mnt ? -EINVAL : -EISDIR; - /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ - if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE && - (mnt || S_ISDIR(inode->i_mode)) && - !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) - goto path_put_and_out; - /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; @@ -1781,12 +1264,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * events with parent/name info for non-directory. */ if ((fid_mode & FAN_REPORT_DIR_FID) && - (flags & FAN_MARK_ADD) && !ignore) + (flags & FAN_MARK_ADD) && !ignored) mask |= FAN_EVENT_ON_CHILD; } /* create/update an inode mark */ - switch (mark_cmd) { + switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) { case FAN_MARK_ADD: if (mark_type == FAN_MARK_MOUNT) ret = fanotify_add_vfsmount_mark(group, mnt, mask, @@ -1847,24 +1330,8 @@ SYSCALL32_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { - struct sysinfo si; - int max_marks; - - si_meminfo(&si); - /* - * Allow up to 1% of addressable memory to be accounted for per user - * marks limited to the range [8192, 1048576]. mount and sb marks are - * a lot cheaper than inode marks, but there is no reason for a user - * to have many of those, so calculate by the cost of inode marks. - */ - max_marks = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / - INODE_MARK_COST; - max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS, - FANOTIFY_DEFAULT_MAX_USER_MARKS); - - BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); @@ -1877,11 +1344,6 @@ static int __init fanotify_user_setup(void) KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } - fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; - init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = - FANOTIFY_DEFAULT_MAX_GROUPS; - init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks; - return 0; } device_initcall(fanotify_user_setup); diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 55081ae3a6ec..765b50aeadd2 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -14,7 +14,6 @@ #include #include "inotify/inotify.h" -#include "fanotify/fanotify.h" #include "fdinfo.h" #include "fsnotify.h" @@ -29,13 +28,13 @@ static void show_fdinfo(struct seq_file *m, struct file *f, struct fsnotify_group *group = f->private_data; struct fsnotify_mark *mark; - fsnotify_group_lock(group); + mutex_lock(&group->mark_mutex); list_for_each_entry(mark, &group->marks_list, g_list) { show(m, mark); if (seq_has_overflowed(m)) break; } - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); } #if defined(CONFIG_EXPORTFS) @@ -104,16 +103,19 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f) static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) { - unsigned int mflags = fanotify_mark_user_flags(mark); + unsigned int mflags = 0; struct inode *inode; + if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) + mflags |= FAN_MARK_IGNORED_SURV_MODIFY; + if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = igrab(fsnotify_conn_inode(mark->connector)); if (!inode) return; seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", inode->i_ino, inode->i_sb->s_dev, - mflags, mark->mask, mark->ignore_mask); + mflags, mark->mask, mark->ignored_mask); show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); @@ -121,12 +123,12 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) struct mount *mnt = fsnotify_conn_mount(mark->connector); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", - mnt->mnt_id, mflags, mark->mask, mark->ignore_mask); + mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) { struct super_block *sb = fsnotify_conn_sb(mark->connector); seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", - sb->s_dev, mflags, mark->mask, mark->ignore_mask); + sb->s_dev, mflags, mark->mask, mark->ignored_mask); } } @@ -135,8 +137,7 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f) struct fsnotify_group *group = f->private_data; seq_printf(m, "fanotify flags:%x event-flags:%x\n", - group->fanotify_data.flags & FANOTIFY_INIT_FLAGS, - group->fanotify_data.f_flags); + group->fanotify_data.flags, group->fanotify_data.f_flags); show_fdinfo(m, f, fanotify_fdinfo); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 7974e91ffe13..30d422b8c0fc 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -70,7 +70,8 @@ static void fsnotify_unmount_inodes(struct super_block *sb) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); - iput(iput_inode); + if (iput_inode) + iput(iput_inode); /* for each watch, send FS_UNMOUNT and then remove it */ fsnotify_inode(inode, FS_UNMOUNT); @@ -84,23 +85,24 @@ static void fsnotify_unmount_inodes(struct super_block *sb) } spin_unlock(&sb->s_inode_list_lock); - iput(iput_inode); + if (iput_inode) + iput(iput_inode); + /* Wait for outstanding inode references from connectors */ + wait_var_event(&sb->s_fsnotify_inode_refs, + !atomic_long_read(&sb->s_fsnotify_inode_refs)); } void fsnotify_sb_delete(struct super_block *sb) { fsnotify_unmount_inodes(sb); fsnotify_clear_marks_by_sb(sb); - /* Wait for outstanding object references from connectors */ - wait_var_event(&sb->s_fsnotify_connectors, - !atomic_long_read(&sb->s_fsnotify_connectors)); } /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the - * parent cares. Thus when an event happens on a child it can quickly tell + * parent cares. Thus when an event happens on a child it can quickly tell if * if there is a need to find a parent and send the event to the parent. */ void __fsnotify_update_child_dentry_flags(struct inode *inode) @@ -250,10 +252,7 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group, if (WARN_ON_ONCE(!ops->handle_inode_event)) return 0; - if (WARN_ON_ONCE(!inode && !dir)) - return 0; - - if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) && + if ((inode_mark->mask & FS_EXCL_UNLINK) && path && d_unlinked(path->dentry)) return 0; @@ -277,28 +276,23 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info))) return 0; - /* - * For FS_RENAME, 'dir' is old dir and 'data' is new dentry. - * The only ->handle_inode_event() backend that supports FS_RENAME is - * dnotify, where it means file was renamed within same parent. - */ - if (mask & FS_RENAME) { - struct dentry *moved = fsnotify_data_dentry(data, data_type); - - if (dir != moved->d_parent->d_inode) + if (parent_mark) { + /* + * parent_mark indicates that the parent inode is watching + * children and interested in this event, which is an event + * possible on child. But is *this mark* watching children and + * interested in this event? + */ + if (parent_mark->mask & FS_EVENT_ON_CHILD) { + ret = fsnotify_handle_inode_event(group, parent_mark, mask, + data, data_type, dir, name, 0); + if (ret) + return ret; + } + if (!inode_mark) return 0; } - if (parent_mark) { - ret = fsnotify_handle_inode_event(group, parent_mark, mask, - data, data_type, dir, name, 0); - if (ret) - return ret; - } - - if (!inode_mark) - return 0; - if (mask & FS_EVENT_ON_CHILD) { /* * Some events can be sent on both parent dir and child marks @@ -324,36 +318,42 @@ static int send_to_group(__u32 mask, const void *data, int data_type, struct fsnotify_group *group = NULL; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); __u32 marks_mask = 0; - __u32 marks_ignore_mask = 0; - bool is_dir = mask & FS_ISDIR; + __u32 marks_ignored_mask = 0; struct fsnotify_mark *mark; int type; - if (!iter_info->report_mask) + if (WARN_ON(!iter_info->report_mask)) return 0; /* clear ignored on inode modification */ if (mask & FS_MODIFY) { - fsnotify_foreach_iter_mark_type(iter_info, mark, type) { - if (!(mark->flags & - FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) - mark->ignore_mask = 0; + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + if (mark && + !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) + mark->ignored_mask = 0; } } - /* Are any of the group marks interested in this event? */ - fsnotify_foreach_iter_mark_type(iter_info, mark, type) { - group = mark->group; - marks_mask |= mark->mask; - marks_ignore_mask |= - fsnotify_effective_ignore_mask(mark, is_dir, type); + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + mark = iter_info->marks[type]; + /* does the object mark tell us to do something? */ + if (mark) { + group = mark->group; + marks_mask |= mark->mask; + marks_ignored_mask |= mark->ignored_mask; + } } - pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", - __func__, group, mask, marks_mask, marks_ignore_mask, + pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n", + __func__, group, mask, marks_mask, marks_ignored_mask, data, data_type, dir, cookie); - if (!(test_mask & marks_mask & ~marks_ignore_mask)) + if (!(test_mask & marks_mask & ~marks_ignored_mask)) return 0; if (group->ops->handle_event) { @@ -390,11 +390,11 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark) /* * iter_info is a multi head priority queue of marks. - * Pick a subset of marks from queue heads, all with the same group - * and set the report_mask to a subset of the selected marks. - * Returns false if there are no more groups to iterate. + * Pick a subset of marks from queue heads, all with the + * same group and set the report_mask for selected subset. + * Returns the report_mask of the selected subset. */ -static bool fsnotify_iter_select_report_types( +static unsigned int fsnotify_iter_select_report_types( struct fsnotify_iter_info *iter_info) { struct fsnotify_group *max_prio_group = NULL; @@ -402,7 +402,7 @@ static bool fsnotify_iter_select_report_types( int type; /* Choose max prio group among groups of all queue heads */ - fsnotify_foreach_iter_type(type) { + fsnotify_foreach_obj_type(type) { mark = iter_info->marks[type]; if (mark && fsnotify_compare_groups(max_prio_group, mark->group) > 0) @@ -410,49 +410,30 @@ static bool fsnotify_iter_select_report_types( } if (!max_prio_group) - return false; + return 0; /* Set the report mask for marks from same group as max prio group */ - iter_info->current_group = max_prio_group; iter_info->report_mask = 0; - fsnotify_foreach_iter_type(type) { + fsnotify_foreach_obj_type(type) { mark = iter_info->marks[type]; - if (mark && mark->group == iter_info->current_group) { - /* - * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode - * is watching children and interested in this event, - * which is an event possible on child. - * But is *this mark* watching children? - */ - if (type == FSNOTIFY_ITER_TYPE_PARENT && - !(mark->mask & FS_EVENT_ON_CHILD) && - !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD)) - continue; - + if (mark && + fsnotify_compare_groups(max_prio_group, mark->group) == 0) fsnotify_iter_set_report_type(iter_info, type); - } } - return true; + return iter_info->report_mask; } /* - * Pop from iter_info multi head queue, the marks that belong to the group of + * Pop from iter_info multi head queue, the marks that were iterated in the * current iteration step. */ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) { - struct fsnotify_mark *mark; int type; - /* - * We cannot use fsnotify_foreach_iter_mark_type() here because we - * may need to advance a mark of type X that belongs to current_group - * but was not selected for reporting. - */ - fsnotify_foreach_iter_type(type) { - mark = iter_info->marks[type]; - if (mark && mark->group == iter_info->current_group) + fsnotify_foreach_obj_type(type) { + if (fsnotify_iter_should_report_type(iter_info, type)) iter_info->marks[type] = fsnotify_next_mark(iter_info->marks[type]); } @@ -474,20 +455,18 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) * @file_name is relative to * @file_name: optional file name associated with event * @inode: optional inode associated with event - - * If @dir and @inode are both non-NULL, event may be - * reported to both. + * either @dir or @inode must be non-NULL. + * if both are non-NULL event may be reported to both. * @cookie: inotify rename cookie */ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *file_name, struct inode *inode, u32 cookie) { const struct path *path = fsnotify_data_path(data, data_type); - struct super_block *sb = fsnotify_data_sb(data, data_type); struct fsnotify_iter_info iter_info = {}; + struct super_block *sb; struct mount *mnt = NULL; - struct inode *inode2 = NULL; - struct dentry *moved; - int inode2_type; + struct inode *parent = NULL; int ret = 0; __u32 test_mask, marks_mask; @@ -497,20 +476,14 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if (!inode) { /* Dirent event - report on TYPE_INODE to dir */ inode = dir; - /* For FS_RENAME, inode is old_dir and inode2 is new_dir */ - if (mask & FS_RENAME) { - moved = fsnotify_data_dentry(data, data_type); - inode2 = moved->d_parent->d_inode; - inode2_type = FSNOTIFY_ITER_TYPE_INODE2; - } } else if (mask & FS_EVENT_ON_CHILD) { /* * Event on child - report on TYPE_PARENT to dir if it is * watching children and on TYPE_INODE to child. */ - inode2 = dir; - inode2_type = FSNOTIFY_ITER_TYPE_PARENT; + parent = dir; } + sb = inode->i_sb; /* * Optimization: srcu_read_lock() has a memory barrier which can @@ -522,7 +495,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if (!sb->s_fsnotify_marks && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && - (!inode2 || !inode2->i_fsnotify_marks)) + (!parent || !parent->i_fsnotify_marks)) return 0; marks_mask = sb->s_fsnotify_mask; @@ -530,35 +503,33 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, marks_mask |= mnt->mnt_fsnotify_mask; if (inode) marks_mask |= inode->i_fsnotify_mask; - if (inode2) - marks_mask |= inode2->i_fsnotify_mask; + if (parent) + marks_mask |= parent->i_fsnotify_mask; /* - * If this is a modify event we may need to clear some ignore masks. - * In that case, the object with ignore masks will have the FS_MODIFY - * event in its mask. - * Otherwise, return if none of the marks care about this type of event. + * if this is a modify event we may need to clear the ignored masks + * otherwise return if none of the marks care about this type of event. */ test_mask = (mask & ALL_FSNOTIFY_EVENTS); - if (!(test_mask & marks_mask)) + if (!(mask & FS_MODIFY) && !(test_mask & marks_mask)) return 0; iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - iter_info.marks[FSNOTIFY_ITER_TYPE_SB] = + iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = fsnotify_first_mark(&sb->s_fsnotify_marks); if (mnt) { - iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] = + iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); } if (inode) { - iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] = + iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&inode->i_fsnotify_marks); } - if (inode2) { - iter_info.marks[inode2_type] = - fsnotify_first_mark(&inode2->i_fsnotify_marks); + if (parent) { + iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] = + fsnotify_first_mark(&parent->i_fsnotify_marks); } /* @@ -587,7 +558,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index fde74eb333cc..ff2063ec6b0f 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -27,21 +27,6 @@ static inline struct super_block *fsnotify_conn_sb( return container_of(conn->obj, struct super_block, s_fsnotify_marks); } -static inline struct super_block *fsnotify_connector_sb( - struct fsnotify_mark_connector *conn) -{ - switch (conn->type) { - case FSNOTIFY_OBJ_TYPE_INODE: - return fsnotify_conn_inode(conn)->i_sb; - case FSNOTIFY_OBJ_TYPE_VFSMOUNT: - return fsnotify_conn_mount(conn)->mnt.mnt_sb; - case FSNOTIFY_OBJ_TYPE_SB: - return fsnotify_conn_sb(conn); - default: - return NULL; - } -} - /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); @@ -76,6 +61,10 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) */ extern void __fsnotify_update_child_dentry_flags(struct inode *inode); +/* allocate and destroy and event holder to attach events to notification/access queues */ +extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void); +extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder); + extern struct kmem_cache *fsnotify_mark_connector_cachep; #endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/fs/notify/group.c b/fs/notify/group.c index 1de6631a3925..a4a4b1c64d32 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -58,7 +58,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) fsnotify_group_stop_queueing(group); /* Clear all marks for this group and queue them for destruction */ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_ANY); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES_MASK); /* * Some marks can still be pinned when waiting for response from @@ -88,7 +88,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group) * that deliberately ignores overflow events. */ if (group->overflow_event) - group->ops->free_event(group, group->overflow_event); + group->ops->free_event(group->overflow_event); fsnotify_put_group(group); } @@ -111,19 +111,20 @@ void fsnotify_put_group(struct fsnotify_group *group) } EXPORT_SYMBOL_GPL(fsnotify_put_group); -static struct fsnotify_group *__fsnotify_alloc_group( - const struct fsnotify_ops *ops, - int flags, gfp_t gfp) +/* + * Create a new fsnotify_group and hold a reference for the group returned. + */ +struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) { - static struct lock_class_key nofs_marks_lock; struct fsnotify_group *group; - group = kzalloc(sizeof(struct fsnotify_group), gfp); + group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); /* set to 0 when there a no external references to this group */ refcount_set(&group->refcnt, 1); + atomic_set(&group->num_marks, 0); atomic_set(&group->user_waits, 0); spin_lock_init(&group->notification_lock); @@ -135,32 +136,9 @@ static struct fsnotify_group *__fsnotify_alloc_group( INIT_LIST_HEAD(&group->marks_list); group->ops = ops; - group->flags = flags; - /* - * For most backends, eviction of inode with a mark is not expected, - * because marks hold a refcount on the inode against eviction. - * - * Use a different lockdep class for groups that support evictable - * inode marks, because with evictable marks, mark_mutex is NOT - * fs-reclaim safe - the mutex is taken when evicting inodes. - */ - if (flags & FSNOTIFY_GROUP_NOFS) - lockdep_set_class(&group->mark_mutex, &nofs_marks_lock); return group; } - -/* - * Create a new fsnotify_group and hold a reference for the group returned. - */ -struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops, - int flags) -{ - gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT : - GFP_KERNEL; - - return __fsnotify_alloc_group(ops, flags, gfp); -} EXPORT_SYMBOL_GPL(fsnotify_alloc_group); int fsnotify_fasync(int fd, struct file *file, int on) diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 7d5df7a21539..8f00151eb731 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,18 +27,11 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is * used only internally to the kernel. */ -#define INOTIFY_USER_MASK (IN_ALL_EVENTS) +#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK) static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark) { - __u32 mask = fsn_mark->mask & INOTIFY_USER_MASK; - - if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) - mask |= IN_EXCL_UNLINK; - if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) - mask |= IN_ONESHOT; - - return mask; + return fsn_mark->mask & INOTIFY_USER_MASK; } extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 993375f0db67..66991c7fef9e 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -46,10 +46,9 @@ static bool event_compare(struct fsnotify_event *old_fsn, return false; } -static int inotify_merge(struct fsnotify_group *group, - struct fsnotify_event *event) +static int inotify_merge(struct list_head *list, + struct fsnotify_event *event) { - struct list_head *list = &group->notification_list; struct fsnotify_event *last_event; last_event = list_entry(list->prev, struct fsnotify_event, list); @@ -115,7 +114,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, mask &= ~IN_ISDIR; fsn_event = &event->fse; - fsnotify_init_event(fsn_event); + fsnotify_init_event(fsn_event, 0); event->mask = mask; event->wd = wd; event->sync_cookie = cookie; @@ -129,7 +128,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, fsnotify_destroy_event(group, fsn_event); } - if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) + if (inode_mark->mask & IN_ONESHOT) fsnotify_destroy_mark(inode_mark, group); return 0; @@ -184,8 +183,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group) dec_inotify_instances(group->inotify_data.ucounts); } -static void inotify_free_event(struct fsnotify_group *group, - struct fsnotify_event *fsn_event) +static void inotify_free_event(struct fsnotify_event *fsn_event) { kfree(INOTIFY_E(fsn_event)); } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 67a9f3941f9b..9ea915e9d2a1 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -37,15 +37,6 @@ #include -/* - * An inotify watch requires allocating an inotify_inode_mark structure as - * well as pinning the watched inode. Doubling the size of a VFS inode - * should be more than enough to cover the additional filesystem inode - * size increase. - */ -#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \ - 2 * sizeof(struct inode)) - /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; @@ -89,10 +80,10 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) __u32 mask; /* - * Everything should receive events when the inode is unmounted. - * All directories care about children. + * Everything should accept their own ignored and should receive events + * when the inode is unmounted. All directories care about children. */ - mask = (FS_UNMOUNT); + mask = (FS_IN_IGNORED | FS_UNMOUNT); if (S_ISDIR(inode->i_mode)) mask |= FS_EVENT_ON_CHILD; @@ -102,28 +93,13 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) return mask; } -#define INOTIFY_MARK_FLAGS \ - (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT) - -static inline unsigned int inotify_arg_to_flags(u32 arg) -{ - unsigned int flags = 0; - - if (arg & IN_EXCL_UNLINK) - flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK; - if (arg & IN_ONESHOT) - flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT; - - return flags; -} - static inline u32 inotify_mask_to_arg(__u32 mask) { return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | IN_Q_OVERFLOW); } -/* inotify userspace file descriptor functions */ +/* intofiy userspace file descriptor functions */ static __poll_t inotify_poll(struct file *file, poll_table *wait) { struct fsnotify_group *group = file->private_data; @@ -161,10 +137,11 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, size_t event_size = sizeof(struct inotify_event); struct fsnotify_event *event; - event = fsnotify_peek_first_event(group); - if (!event) + if (fsnotify_notify_queue_is_empty(group)) return NULL; + event = fsnotify_peek_first_event(group); + pr_debug("%s: group=%p event=%p\n", __func__, group, event); event_size += round_event_name_len(event); @@ -366,7 +343,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, if (error) return error; /* you can only watch an inode if you have read permissions on it */ - error = path_permission(path, MAY_READ); + error = inode_permission(path->dentry->d_inode, MAY_READ); if (error) { path_put(path); return error; @@ -528,10 +505,13 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; __u32 old_mask, new_mask; - int replace = !(arg & IN_MASK_ADD); + __u32 mask; + int add = (arg & IN_MASK_ADD); int create = (arg & IN_MASK_CREATE); int ret; + mask = inotify_arg_to_mask(inode, arg); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; @@ -544,12 +524,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, spin_lock(&fsn_mark->lock); old_mask = fsn_mark->mask; - if (replace) { - fsn_mark->mask = 0; - fsn_mark->flags &= ~INOTIFY_MARK_FLAGS; - } - fsn_mark->mask |= inotify_arg_to_mask(inode, arg); - fsn_mark->flags |= inotify_arg_to_flags(arg); + if (add) + fsn_mark->mask |= mask; + else + fsn_mark->mask = mask; new_mask = fsn_mark->mask; spin_unlock(&fsn_mark->lock); @@ -580,17 +558,19 @@ static int inotify_new_watch(struct fsnotify_group *group, u32 arg) { struct inotify_inode_mark *tmp_i_mark; + __u32 mask; int ret; struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; + mask = inotify_arg_to_mask(inode, arg); + tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) return -ENOMEM; fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); - tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg); - tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg); + tmp_i_mark->fsn_mark.mask = mask; tmp_i_mark->wd = -1; ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); @@ -627,13 +607,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod { int ret = 0; - fsnotify_group_lock(group); + mutex_lock(&group->mark_mutex); /* try to update and existing watch with the new arg */ ret = inotify_update_existing_watch(group, inode, arg); /* no mark present, try to add a new one */ if (ret == -ENOENT) ret = inotify_new_watch(group, inode, arg); - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); return ret; } @@ -643,18 +623,17 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) struct fsnotify_group *group; struct inotify_event_info *oevent; - group = fsnotify_alloc_group(&inotify_fsnotify_ops, - FSNOTIFY_GROUP_USER); + group = fsnotify_alloc_group(&inotify_fsnotify_ops); if (IS_ERR(group)) return group; - oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT); + oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL); if (unlikely(!oevent)) { fsnotify_destroy_group(group); return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event); + fsnotify_init_event(group->overflow_event, 0); oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; @@ -830,18 +809,6 @@ out: */ static int __init inotify_user_setup(void) { - unsigned long watches_max; - struct sysinfo si; - - si_meminfo(&si); - /* - * Allow up to 1% of addressable memory to be allocated for inotify - * watches (per user) limited to the range [8192, 1048576]. - */ - watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / - INOTIFY_WATCH_COST; - watches_max = clamp(watches_max, 8192UL, 1048576UL); - BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); @@ -857,7 +824,9 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); + BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); + BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); @@ -866,7 +835,7 @@ static int __init inotify_user_setup(void) inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; - init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max; + init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192; return 0; } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index c74ef947447d..5b44be5f93dd 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -116,64 +116,20 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) return *fsnotify_conn_mask_p(conn); } -static void fsnotify_get_inode_ref(struct inode *inode) -{ - ihold(inode); - atomic_long_inc(&inode->i_sb->s_fsnotify_connectors); -} - -/* - * Grab or drop inode reference for the connector if needed. - * - * When it's time to drop the reference, we only clear the HAS_IREF flag and - * return the inode object. fsnotify_drop_object() will be resonsible for doing - * iput() outside of spinlocks. This happens when last mark that wanted iref is - * detached. - */ -static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn, - bool want_iref) -{ - bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF; - struct inode *inode = NULL; - - if (conn->type != FSNOTIFY_OBJ_TYPE_INODE || - want_iref == has_iref) - return NULL; - - if (want_iref) { - /* Pin inode if any mark wants inode refcount held */ - fsnotify_get_inode_ref(fsnotify_conn_inode(conn)); - conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF; - } else { - /* Unpin inode after detach of last mark that wanted iref */ - inode = fsnotify_conn_inode(conn); - conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF; - } - - return inode; -} - -static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) +static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; - bool want_iref = false; struct fsnotify_mark *mark; assert_spin_locked(&conn->lock); /* We can get detached connector here when inode is getting unlinked. */ if (!fsnotify_valid_obj_type(conn->type)) - return NULL; + return; hlist_for_each_entry(mark, &conn->list, obj_list) { - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) - continue; - new_mask |= fsnotify_calc_mask(mark); - if (conn->type == FSNOTIFY_OBJ_TYPE_INODE && - !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) - want_iref = true; + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) + new_mask |= mark->mask; } *fsnotify_conn_mask_p(conn) = new_mask; - - return fsnotify_update_iref(conn, want_iref); } /* @@ -213,31 +169,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static void fsnotify_put_inode_ref(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - - iput(inode); - if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) - wake_up_var(&sb->s_fsnotify_connectors); -} - -static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb) - atomic_long_inc(&sb->s_fsnotify_connectors); -} - -static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn) -{ - struct super_block *sb = fsnotify_connector_sb(conn); - - if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors)) - wake_up_var(&sb->s_fsnotify_connectors); -} - static void *fsnotify_detach_connector_from_object( struct fsnotify_mark_connector *conn, unsigned int *type) @@ -251,17 +182,13 @@ static void *fsnotify_detach_connector_from_object( if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { inode = fsnotify_conn_inode(conn); inode->i_fsnotify_mask = 0; - - /* Unpin inode when detaching from connector */ - if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF)) - inode = NULL; + atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs); } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; } - fsnotify_put_sb_connectors(conn); rcu_assign_pointer(*(conn->obj), NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; @@ -282,12 +209,19 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) /* Drop object reference originally held by a connector */ static void fsnotify_drop_object(unsigned int type, void *objp) { + struct inode *inode; + struct super_block *sb; + if (!objp) return; /* Currently only inode references are passed to be dropped */ if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE)) return; - fsnotify_put_inode_ref(objp); + inode = objp; + sb = inode->i_sb; + iput(inode); + if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs)) + wake_up_var(&sb->s_fsnotify_inode_refs); } void fsnotify_put_mark(struct fsnotify_mark *mark) @@ -316,8 +250,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) objp = fsnotify_detach_connector_from_object(conn, &type); free_conn = true; } else { - objp = __fsnotify_recalc_mask(conn); - type = conn->type; + __fsnotify_recalc_mask(conn); } WRITE_ONCE(mark->connector, NULL); spin_unlock(&conn->lock); @@ -396,7 +329,7 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) { int type; - fsnotify_foreach_iter_type(type) { + fsnotify_foreach_obj_type(type) { /* This can fail if mark is being removed */ if (!fsnotify_get_mark_safe(iter_info->marks[type])) { __release(&fsnotify_mark_srcu); @@ -425,7 +358,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) int type; iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); - fsnotify_foreach_iter_type(type) + fsnotify_foreach_obj_type(type) fsnotify_put_mark_wake(iter_info->marks[type]); } @@ -441,7 +374,9 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) */ void fsnotify_detach_mark(struct fsnotify_mark *mark) { - fsnotify_group_assert_locked(mark->group); + struct fsnotify_group *group = mark->group; + + WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && refcount_read(&mark->refcnt) < 1 + !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); @@ -456,6 +391,8 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) list_del_init(&mark->g_list); spin_unlock(&mark->lock); + atomic_dec(&group->num_marks); + /* Drop mark reference acquired in fsnotify_add_mark_locked() */ fsnotify_put_mark(mark); } @@ -493,9 +430,9 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) void fsnotify_destroy_mark(struct fsnotify_mark *mark, struct fsnotify_group *group) { - fsnotify_group_lock(group); + mutex_lock(&group->mark_mutex); fsnotify_detach_mark(mark); - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); fsnotify_free_mark(mark); } EXPORT_SYMBOL_GPL(fsnotify_destroy_mark); @@ -537,9 +474,10 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) } static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, - unsigned int obj_type, + unsigned int type, __kernel_fsid_t *fsid) { + struct inode *inode = NULL; struct fsnotify_mark_connector *conn; conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); @@ -547,8 +485,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, return -ENOMEM; spin_lock_init(&conn->lock); INIT_HLIST_HEAD(&conn->list); - conn->flags = 0; - conn->type = obj_type; + conn->type = type; conn->obj = connp; /* Cache fsid of filesystem containing the object */ if (fsid) { @@ -558,15 +495,16 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, conn->fsid.val[0] = conn->fsid.val[1] = 0; conn->flags = 0; } - fsnotify_get_sb_connectors(conn); - + if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) + inode = igrab(fsnotify_conn_inode(conn)); /* * cmpxchg() provides the barrier so that readers of *connp can see * only initialized structure */ if (cmpxchg(connp, NULL, conn)) { /* Someone else created list structure for us */ - fsnotify_put_sb_connectors(conn); + if (inode) + iput(inode); kmem_cache_free(fsnotify_mark_connector_cachep, conn); } @@ -607,16 +545,15 @@ out: * priority, highest number first, and then by the group's location in memory. */ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, - unsigned int obj_type, - int add_flags, __kernel_fsid_t *fsid) + fsnotify_connp_t *connp, unsigned int type, + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_mark *lmark, *last = NULL; struct fsnotify_mark_connector *conn; int cmp; int err = 0; - if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) + if (WARN_ON(!fsnotify_valid_obj_type(type))) return -EINVAL; /* Backend is expected to check for zero fsid (e.g. tmpfs) */ @@ -628,8 +565,7 @@ restart: conn = fsnotify_grab_connector(connp); if (!conn) { spin_unlock(&mark->lock); - err = fsnotify_attach_connector_to_object(connp, obj_type, - fsid); + err = fsnotify_attach_connector_to_object(connp, type, fsid); if (err) return err; goto restart; @@ -668,7 +604,7 @@ restart: if ((lmark->group == mark->group) && (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && - !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) { + !allow_dups) { err = -EEXIST; goto out_err; } @@ -702,13 +638,13 @@ out_err: * event types should be delivered to which group. */ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int obj_type, - int add_flags, __kernel_fsid_t *fsid) + fsnotify_connp_t *connp, unsigned int type, + int allow_dups, __kernel_fsid_t *fsid) { struct fsnotify_group *group = mark->group; int ret = 0; - fsnotify_group_assert_locked(group); + BUG_ON(!mutex_is_locked(&group->mark_mutex)); /* * LOCKING ORDER!!!! @@ -720,14 +656,16 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; list_add(&mark->g_list, &group->marks_list); + atomic_inc(&group->num_marks); fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid); + ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid); if (ret) goto err; - fsnotify_recalc_mask(mark->connector); + if (mark->mask) + fsnotify_recalc_mask(mark->connector); return ret; err: @@ -736,21 +674,21 @@ err: FSNOTIFY_MARK_FLAG_ATTACHED); list_del_init(&mark->g_list); spin_unlock(&mark->lock); + atomic_dec(&group->num_marks); fsnotify_put_mark(mark); return ret; } int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int obj_type, int add_flags, - __kernel_fsid_t *fsid) + unsigned int type, int allow_dups, __kernel_fsid_t *fsid) { int ret; struct fsnotify_group *group = mark->group; - fsnotify_group_lock(group); - ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid); - fsnotify_group_unlock(group); + mutex_lock(&group->mark_mutex); + ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid); + mutex_unlock(&group->mark_mutex); return ret; } EXPORT_SYMBOL_GPL(fsnotify_add_mark); @@ -784,14 +722,14 @@ EXPORT_SYMBOL_GPL(fsnotify_find_mark); /* Clear any marks in a group with given type mask */ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, - unsigned int obj_type) + unsigned int type_mask) { struct fsnotify_mark *lmark, *mark; LIST_HEAD(to_free); struct list_head *head = &to_free; /* Skip selection step if we want to clear all marks. */ - if (obj_type == FSNOTIFY_OBJ_TYPE_ANY) { + if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) { head = &group->marks_list; goto clear; } @@ -804,24 +742,24 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group, * move marks to free to to_free list in one go and then free marks in * to_free list one by one. */ - fsnotify_group_lock(group); + mutex_lock(&group->mark_mutex); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->connector->type == obj_type) + if ((1U << mark->connector->type) & type_mask) list_move(&mark->g_list, &to_free); } - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); clear: while (1) { - fsnotify_group_lock(group); + mutex_lock(&group->mark_mutex); if (list_empty(head)) { - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); break; } mark = list_first_entry(head, struct fsnotify_mark, g_list); fsnotify_get_mark(mark); fsnotify_detach_mark(mark); - fsnotify_group_unlock(group); + mutex_unlock(&group->mark_mutex); fsnotify_free_mark(mark); fsnotify_put_mark(mark); } diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 9022ae650cf8..75d79d6d3ef0 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -47,6 +47,13 @@ u32 fsnotify_get_cookie(void) } EXPORT_SYMBOL_GPL(fsnotify_get_cookie); +/* return true if the notify queue is empty, false otherwise */ +bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) +{ + assert_spin_locked(&group->notification_lock); + return list_empty(&group->notification_list) ? true : false; +} + void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event) { @@ -64,26 +71,20 @@ void fsnotify_destroy_event(struct fsnotify_group *group, WARN_ON(!list_empty(&event->list)); spin_unlock(&group->notification_lock); } - group->ops->free_event(group, event); + group->ops->free_event(event); } /* - * Try to add an event to the notification queue. - * The group can later pull this event off the queue to deal with. - * The group can use the @merge hook to merge the event with a queued event. - * The group can use the @insert hook to insert the event into hash table. - * The function returns: - * 0 if the event was added to a queue - * 1 if the event was merged with some other queued event + * Add an event to the group notification queue. The group can later pull this + * event off the queue to deal with. The function returns 0 if the event was + * added to the queue, 1 if the event was merged with some other queued event, * 2 if the event was not queued - either the queue of events has overflown - * or the group is shutting down. + * or the group is shutting down. */ -int fsnotify_insert_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct fsnotify_group *, - struct fsnotify_event *), - void (*insert)(struct fsnotify_group *, - struct fsnotify_event *)) +int fsnotify_add_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct list_head *, + struct fsnotify_event *)) { int ret = 0; struct list_head *list = &group->notification_list; @@ -110,7 +111,7 @@ int fsnotify_insert_event(struct fsnotify_group *group, } if (!list_empty(list) && merge) { - ret = merge(group, event); + ret = merge(list, event); if (ret) { spin_unlock(&group->notification_lock); return ret; @@ -120,8 +121,6 @@ int fsnotify_insert_event(struct fsnotify_group *group, queue: group->q_len++; list_add_tail(&event->list, list); - if (insert) - insert(group, event); spin_unlock(&group->notification_lock); wake_up(&group->notification_waitq); @@ -141,39 +140,36 @@ void fsnotify_remove_queued_event(struct fsnotify_group *group, group->q_len--; } -/* - * Return the first event on the notification list without removing it. - * Returns NULL if the list is empty. - */ -struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group) -{ - assert_spin_locked(&group->notification_lock); - - if (fsnotify_notify_queue_is_empty(group)) - return NULL; - - return list_first_entry(&group->notification_list, - struct fsnotify_event, list); -} - /* * Remove and return the first event from the notification list. It is the * responsibility of the caller to destroy the obtained event */ struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group) { - struct fsnotify_event *event = fsnotify_peek_first_event(group); + struct fsnotify_event *event; - if (!event) - return NULL; + assert_spin_locked(&group->notification_lock); - pr_debug("%s: group=%p event=%p\n", __func__, group, event); + pr_debug("%s: group=%p\n", __func__, group); + event = list_first_entry(&group->notification_list, + struct fsnotify_event, list); fsnotify_remove_queued_event(group, event); - return event; } +/* + * This will not remove the event, that must be done with + * fsnotify_remove_first_event() + */ +struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group) +{ + assert_spin_locked(&group->notification_lock); + + return list_first_entry(&group->notification_list, + struct fsnotify_event, list); +} + /* * Called when a group is being torn down to clean up any outstanding * event notifications. diff --git a/fs/open.c b/fs/open.c index 3ad0c6c8f5e7..965230a0710c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -493,7 +493,7 @@ retry: if (error) goto out; - error = path_permission(&path, MAY_EXEC | MAY_CHDIR); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -522,7 +522,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) if (!d_can_lookup(f.file->f_path.dentry)) goto out_putf; - error = file_permission(f.file, MAY_EXEC | MAY_CHDIR); + error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR); if (!error) set_fs_pwd(current->fs, &f.file->f_path); out_putf: @@ -541,7 +541,7 @@ retry: if (error) goto out; - error = path_permission(&path, MAY_EXEC | MAY_CHDIR); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); if (error) goto dput_and_out; @@ -965,47 +965,6 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); -/** - * dentry_create - Create and open a file - * @path: path to create - * @flags: O_ flags - * @mode: mode bits for new file - * @cred: credentials to use - * - * Caller must hold the parent directory's lock, and have prepared - * a negative dentry, placed in @path->dentry, for the new file. - * - * Caller sets @path->mnt to the vfsmount of the filesystem where - * the new file is to be created. The parent directory and the - * negative dentry must reside on the same filesystem instance. - * - * On success, returns a "struct file *". Otherwise a ERR_PTR - * is returned. - */ -struct file *dentry_create(const struct path *path, int flags, umode_t mode, - const struct cred *cred) -{ - struct file *f; - int error; - - validate_creds(cred); - f = alloc_empty_file(flags, cred); - if (IS_ERR(f)) - return f; - - error = vfs_create(d_inode(path->dentry->d_parent), - path->dentry, mode, true); - if (!error) - error = vfs_open(path, f); - - if (unlikely(error)) { - fput(f); - return ERR_PTR(error); - } - return f; -} -EXPORT_SYMBOL(dentry_create); - struct file *open_with_fake_path(const struct path *path, int flags, struct inode *inode, const struct cred *cred) { @@ -1382,7 +1341,7 @@ EXPORT_SYMBOL(filp_close); */ SYSCALL_DEFINE1(close, unsigned int, fd) { - int retval = close_fd(fd); + int retval = __close_fd(current->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 520a6bdaf429..a9f9923a725d 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -214,16 +214,9 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, unsigned int flags) { int err; - struct renamedata rd = { - .old_dir = olddir, - .old_dentry = olddentry, - .new_dir = newdir, - .new_dentry = newdentry, - .flags = flags, - }; pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags); - err = vfs_rename(&rd); + err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); if (err) { pr_debug("...rename(%pd2, %pd2, ...) = %i\n", olddentry, newdentry, err); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 35b92009c1cc..6b634c0a9b6e 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -29,13 +29,14 @@ static int seq_show(struct seq_file *m, void *v) if (!task) return -ENOENT; - task_lock(task); - files = task->files; + files = get_files_struct(task); + put_task_struct(task); + if (files) { unsigned int fd = proc_fd(m->private); spin_lock(&files->file_lock); - file = files_lookup_fd_locked(files, fd); + file = fcheck_files(files, fd); if (file) { struct fdtable *fdt = files_fdtable(files); @@ -47,9 +48,8 @@ static int seq_show(struct seq_file *m, void *v) ret = 0; } spin_unlock(&files->file_lock); + put_files_struct(files); } - task_unlock(task); - put_task_struct(task); if (ret) return ret; @@ -59,7 +59,6 @@ static int seq_show(struct seq_file *m, void *v) real_mount(file->f_path.mnt)->mnt_id, file_inode(file)->i_ino); - /* show_fd_locks() never deferences files so a stale value is safe */ show_fd_locks(m, file, files); if (seq_has_overflowed(m)) goto out; @@ -108,13 +107,18 @@ static const struct file_operations proc_fdinfo_file_operations = { static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode) { + struct files_struct *files = get_files_struct(task); struct file *file; + if (!files) + return false; + rcu_read_lock(); - file = task_lookup_fd_rcu(task, fd); + file = fcheck_files(files, fd); if (file) *mode = file->f_mode; rcu_read_unlock(); + put_files_struct(files); return !!file; } @@ -166,22 +170,29 @@ static const struct dentry_operations tid_fd_dentry_operations = { static int proc_fd_link(struct dentry *dentry, struct path *path) { + struct files_struct *files = NULL; struct task_struct *task; int ret = -ENOENT; task = get_proc_task(d_inode(dentry)); if (task) { + files = get_files_struct(task); + put_task_struct(task); + } + + if (files) { unsigned int fd = proc_fd(d_inode(dentry)); struct file *fd_file; - fd_file = fget_task(task, fd); + spin_lock(&files->file_lock); + fd_file = fcheck_files(files, fd); if (fd_file) { *path = fd_file->f_path; path_get(&fd_file->f_path); ret = 0; - fput(fd_file); } - put_task_struct(task); + spin_unlock(&files->file_lock); + put_files_struct(files); } return ret; @@ -242,6 +253,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, instantiate_t instantiate) { struct task_struct *p = get_proc_task(file_inode(file)); + struct files_struct *files; unsigned int fd; if (!p) @@ -249,18 +261,22 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, if (!dir_emit_dots(file, ctx)) goto out; + files = get_files_struct(p); + if (!files) + goto out; rcu_read_lock(); - for (fd = ctx->pos - 2;; fd++) { + for (fd = ctx->pos - 2; + fd < files_fdtable(files)->max_fds; + fd++, ctx->pos++) { struct file *f; struct fd_data data; char name[10 + 1]; unsigned int len; - f = task_lookup_next_fd_rcu(p, &fd); - ctx->pos = fd + 2LL; + f = fcheck_files(files, fd); if (!f) - break; + continue; data.mode = f->f_mode; rcu_read_unlock(); data.fd = fd; @@ -269,11 +285,13 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, if (!proc_fill_cache(file, ctx, name, len, instantiate, p, &data)) - goto out; + goto out_fd_loop; cond_resched(); rcu_read_lock(); } rcu_read_unlock(); +out_fd_loop: + put_files_struct(files); out: put_task_struct(p); return 0; diff --git a/fs/udf/file.c b/fs/udf/file.c index 25f7c915f22b..e283a62701b8 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -181,7 +181,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long old_block, new_block; int result; - if (file_permission(filp, MAY_READ) != 0) { + if (inode_permission(inode, MAY_READ) != 0) { udf_debug("no permission to access inode %lu\n", inode->i_ino); return -EPERM; } diff --git a/fs/verity/enable.c b/fs/verity/enable.c index dfe8acc32df6..dbabea77efc0 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -369,7 +369,7 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg) * has verity enabled, and to stabilize the data being hashed. */ - err = file_permission(filp, MAY_WRITE); + err = inode_permission(inode, MAY_WRITE); if (err) return err; diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index b87c3b85a166..0aad774beaec 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -26,7 +26,7 @@ struct dnotify_struct { FS_MODIFY | FS_MODIFY_CHILD |\ FS_ACCESS | FS_ACCESS_CHILD |\ FS_ATTRIB | FS_ATTRIB_CHILD |\ - FS_CREATE | FS_RENAME |\ + FS_CREATE | FS_DN_RENAME |\ FS_MOVED_FROM | FS_MOVED_TO) extern int dir_notify_enable; diff --git a/include/linux/errno.h b/include/linux/errno.h index 8b0c754bab02..d73f597a2484 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -31,6 +31,5 @@ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ #define EIOCBQUEUED 529 /* iocb queued, will get completion event */ #define ERECALLCONFLICT 530 /* conflict with recalled state */ -#define ENOGRACE 531 /* NFS file lock reclaim refused */ #endif diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 218fc5c54e90..3ceb72b67a7a 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,27 +213,12 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); - u64 (*fetch_iversion)(struct inode *); -#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ -#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ -#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ -#define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */ -#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply - atomic attribute updates - */ -#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ - unsigned long flags; }; extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent); extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int connectable); -extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, - struct fid *fid, int fh_len, - int fileid_type, - int (*acceptable)(void *, struct dentry *), - void *context); extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), void *context); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 558844c8d259..3e9c56ee651f 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -2,11 +2,8 @@ #ifndef _LINUX_FANOTIFY_H #define _LINUX_FANOTIFY_H -#include #include -extern struct ctl_table fanotify_table[]; /* for sysctl */ - #define FAN_GROUP_FLAG(group, flag) \ ((group)->fanotify_data.flags & (flag)) @@ -18,62 +15,27 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ * these constant, the programs may break if re-compiled with new uapi headers * and then run on an old kernel. */ - -/* Group classes where permission events are allowed */ -#define FANOTIFY_PERM_CLASSES (FAN_CLASS_CONTENT | \ +#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ FAN_CLASS_PRE_CONTENT) -#define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES) +#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) -#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) - -#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) - -/* - * fanotify_init() flags that require CAP_SYS_ADMIN. - * We do not allow unprivileged groups to request permission events. - * We do not allow unprivileged groups to get other process pid in events. - * We do not allow unprivileged groups to use unlimited resources. - */ -#define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \ - FAN_REPORT_TID | \ - FAN_REPORT_PIDFD | \ - FAN_UNLIMITED_QUEUE | \ - FAN_UNLIMITED_MARKS) - -/* - * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. - * FAN_CLASS_NOTIF is the only class we allow for unprivileged group. - * We do not allow unprivileged groups to get file descriptors in events, - * so one of the flags for reporting file handles is required. - */ -#define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \ - FANOTIFY_FID_BITS | \ - FAN_CLOEXEC | FAN_NONBLOCK) - -#define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \ - FANOTIFY_USER_INIT_FLAGS) - -/* Internal group flags */ -#define FANOTIFY_UNPRIV 0x80000000 -#define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV) +#define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | FANOTIFY_FID_BITS | \ + FAN_REPORT_TID | \ + FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ FAN_MARK_FILESYSTEM) -#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ - FAN_MARK_FLUSH) - -#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \ - FAN_MARK_IGNORE) - #define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \ - FANOTIFY_MARK_CMD_BITS | \ - FANOTIFY_MARK_IGNORE_BITS | \ + FAN_MARK_ADD | \ + FAN_MARK_REMOVE | \ FAN_MARK_DONT_FOLLOW | \ FAN_MARK_ONLYDIR | \ + FAN_MARK_IGNORED_MASK | \ FAN_MARK_IGNORED_SURV_MODIFY | \ - FAN_MARK_EVICTABLE) + FAN_MARK_FLUSH) /* * Events that can be reported with data type FSNOTIFY_EVENT_PATH. @@ -87,23 +49,15 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ - FAN_RENAME) - -/* Events that can be reported with event->fd */ -#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF) -/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */ -#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR) - /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ - FANOTIFY_INODE_EVENTS | \ - FANOTIFY_ERROR_EVENTS) + FANOTIFY_INODE_EVENTS) /* Events that require a permission response from user */ #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ @@ -117,10 +71,6 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ FANOTIFY_PERM_EVENTS | \ FAN_Q_OVERFLOW | FAN_ONDIR) -/* Events and flags relevant only for directories */ -#define FANOTIFY_DIRONLY_EVENT_BITS (FANOTIFY_DIRENT_EVENTS | \ - FAN_EVENT_ON_CHILD | FAN_ONDIR) - #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ FANOTIFY_EVENT_FLAGS) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 4ed3589f9294..f1a99d3e5570 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -80,7 +80,7 @@ struct dentry; /* * The caller must ensure that fd table isn't shared or hold rcu or file lock */ -static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd) +static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = rcu_dereference_raw(files->fdt); @@ -91,40 +91,37 @@ static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsig return NULL; } -static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd) +static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) { - RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock), + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && + !lockdep_is_held(&files->file_lock), "suspicious rcu_dereference_check() usage"); - return files_lookup_fd_raw(files, fd); + return __fcheck_files(files, fd); } -static inline struct file *files_lookup_fd_rcu(struct files_struct *files, unsigned int fd) -{ - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), - "suspicious rcu_dereference_check() usage"); - return files_lookup_fd_raw(files, fd); -} - -static inline struct file *lookup_fd_rcu(unsigned int fd) -{ - return files_lookup_fd_rcu(current->files, fd); -} - -struct file *task_lookup_fd_rcu(struct task_struct *task, unsigned int fd); -struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *fd); +/* + * Check whether the specified fd has an open file. + */ +#define fcheck(fd) fcheck_files(current->files, fd) struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); -int unshare_files(void); +void reset_files_struct(struct files_struct *); +int unshare_files(struct files_struct **); struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; void do_close_on_exec(struct files_struct *); int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), const void *); -extern int close_fd(unsigned int fd); +extern int __alloc_fd(struct files_struct *files, + unsigned start, unsigned end, unsigned flags); +extern void __fd_install(struct files_struct *files, + unsigned int fd, struct file *file); +extern int __close_fd(struct files_struct *files, + unsigned int fd); extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern int close_fd_get_file(unsigned int fd, struct file **res); extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, diff --git a/include/linux/fs.h b/include/linux/fs.h index ec6de06ead4c..c61553833fb9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1020,7 +1020,6 @@ static inline struct file *get_file(struct file *f) #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ #define FL_LAYOUT 2048 /* outstanding pNFS layout */ -#define FL_RECLAIM 4096 /* reclaiming from a reboot server */ #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) @@ -1044,7 +1043,6 @@ struct file_lock_operations { }; struct lock_manager_operations { - void *lm_mod_owner; fl_owner_t (*lm_get_owner)(fl_owner_t); void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ @@ -1053,8 +1051,6 @@ struct lock_manager_operations { int (*lm_change)(struct file_lock *, int, struct list_head *); void (*lm_setup)(struct file_lock *, void **); bool (*lm_breaker_owns_lease)(struct file_lock *); - bool (*lm_lock_expirable)(struct file_lock *cfl); - void (*lm_expire_lock)(void); ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); @@ -1200,15 +1196,6 @@ extern void lease_unregister_notifier(struct notifier_block *); struct files_struct; extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); -extern bool locks_owner_has_blockers(struct file_lock_context *flctx, - fl_owner_t owner); - -static inline struct file_lock_context * -locks_inode_context(const struct inode *inode) -{ - return smp_load_acquire(&inode->i_flctx); -} - #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1349,18 +1336,6 @@ static inline int lease_modify(struct file_lock *fl, int arg, struct files_struct; static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} -static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, - fl_owner_t owner) -{ - return false; -} - -static inline struct file_lock_context * -locks_inode_context(const struct inode *inode) -{ - return NULL; -} - #endif /* !CONFIG_FILE_LOCKING */ static inline struct inode *file_inode(const struct file *f) @@ -1580,11 +1555,8 @@ struct super_block { /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; - /* - * Number of inode/mount/sb objects that are being watched, note that - * inodes objects are currently double-accounted. - */ - atomic_long_t s_fsnotify_connectors; + /* Pending fsnotify inode refs */ + atomic_long_t s_fsnotify_inode_refs; /* Being remounted read-only */ int s_readonly_remount; @@ -1856,17 +1828,7 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); - -struct renamedata { - struct inode *old_dir; - struct dentry *old_dentry; - struct inode *new_dir; - struct dentry *new_dentry; - struct inode **delegated_inode; - unsigned int flags; -} __randomize_layout; - -int vfs_rename(struct renamedata *); +extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry) { @@ -2700,8 +2662,6 @@ extern struct file *filp_open_block(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int, umode_t); extern struct file * dentry_open(const struct path *, int, const struct cred *); -extern struct file *dentry_create(const struct path *path, int flags, - umode_t mode, const struct cred *cred); extern struct file * open_with_fake_path(const struct path *, int, struct inode*, const struct cred *); static inline struct file *file_clone_open(struct file *file) @@ -2932,14 +2892,6 @@ static inline int bmap(struct inode *inode, sector_t *block) extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); -static inline int file_permission(struct file *file, int mask) -{ - return inode_permission(file_inode(file), mask); -} -static inline int path_permission(const struct path *path, int mask) -{ - return inode_permission(d_inode(path->dentry), mask); -} extern int __check_sticky(struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index bb8467cd11ae..79add91eaa04 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -26,27 +26,21 @@ * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only * the child is interested and not the parent. */ -static inline int fsnotify_name(__u32 mask, const void *data, int data_type, - struct inode *dir, const struct qstr *name, - u32 cookie) +static inline void fsnotify_name(struct inode *dir, __u32 mask, + struct inode *child, + const struct qstr *name, u32 cookie) { - if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0) - return 0; - - return fsnotify(mask, data, data_type, dir, name, NULL, cookie); + fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, __u32 mask) { - fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0); + fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); } static inline void fsnotify_inode(struct inode *inode, __u32 mask) { - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) - return; - if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; @@ -59,9 +53,6 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, { struct inode *inode = d_inode(dentry); - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) - return 0; - if (S_ISDIR(inode->i_mode)) { mask |= FS_ISDIR; @@ -86,7 +77,7 @@ notify_child: */ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { - fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); + fsnotify_parent(dentry, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE); } static inline int fsnotify_file(struct file *file, __u32 mask) @@ -144,23 +135,18 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = FS_MOVED_FROM; __u32 new_dir_mask = FS_MOVED_TO; - __u32 rename_mask = FS_RENAME; const struct qstr *new_name = &moved->d_name; + if (old_dir == new_dir) + old_dir_mask |= FS_DN_RENAME; + if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; - rename_mask |= FS_ISDIR; } - /* Event with information about both old and new parent+name */ - fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY, - old_dir, old_name, 0); - - fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE, - old_dir, old_name, fs_cookie); - fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE, - new_dir, new_name, fs_cookie); + fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie); + fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie); if (target) fsnotify_link_count(target); @@ -195,22 +181,16 @@ static inline void fsnotify_inoderemove(struct inode *inode) /* * fsnotify_create - 'name' was linked in - * - * Caller must make sure that dentry->d_name is stable. - * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate - * ->d_inode later */ -static inline void fsnotify_create(struct inode *dir, struct dentry *dentry) +static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { - audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); + audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_dirent(dir, dentry, FS_CREATE); + fsnotify_dirent(inode, dentry, FS_CREATE); } /* * fsnotify_link - new hardlink in 'inode' directory - * - * Caller must make sure that new_dentry->d_name is stable. * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ @@ -220,8 +200,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE, - dir, &new_dentry->d_name, 0); + fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); } /* @@ -240,8 +219,7 @@ static inline void fsnotify_delete(struct inode *dir, struct inode *inode, if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; - fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, - 0); + fsnotify_name(dir, mask, inode, &dentry->d_name, 0); } /** @@ -276,16 +254,12 @@ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) /* * fsnotify_mkdir - directory 'name' was created - * - * Caller must make sure that dentry->d_name is stable. - * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate - * ->d_inode later */ -static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) +static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { - audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); + audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR); + fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); } /* @@ -379,17 +353,4 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) fsnotify_dentry(dentry, mask); } -static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, - int error) -{ - struct fs_error_report report = { - .error = error, - .inode = inode, - .sb = sb, - }; - - return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, - NULL, NULL, NULL, 0); -} - #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d7d96c806bff..a2e42d3cd87c 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -19,8 +19,6 @@ #include #include #include -#include -#include /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -44,18 +42,13 @@ #define FS_UNMOUNT 0x00002000 /* inode on umount fs */ #define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ -#define FS_ERROR 0x00008000 /* Filesystem Error (fanotify) */ - -/* - * FS_IN_IGNORED overloads FS_ERROR. It is only used internally by inotify - * which does not support FS_ERROR. - */ #define FS_IN_IGNORED 0x00008000 /* last inotify event here */ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +#define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ /* * Set on inode mark that cares about things that happen to its children. * Always set for dnotify and inotify. @@ -63,9 +56,10 @@ */ #define FS_EVENT_ON_CHILD 0x08000000 -#define FS_RENAME 0x10000000 /* File was renamed */ +#define FS_DN_RENAME 0x10000000 /* file renamed */ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ #define FS_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_IN_ONESHOT 0x80000000 /* only send event once */ #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) @@ -75,7 +69,7 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) @@ -100,12 +94,12 @@ /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ - FS_DELETE_SELF | FS_MOVE_SELF | \ - FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_ERROR) + FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ + FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED) /* Extra flags that may be reported with event or control handling of events */ -#define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT) +#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ + FS_DN_MULTISHOT | FS_EVENT_ON_CHILD) #define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS) @@ -142,7 +136,6 @@ struct mem_cgroup; * @dir: optional directory associated with event - * if @file_name is not NULL, this is the directory that * @file_name is relative to. - * Either @inode or @dir must be non-NULL. * @file_name: optional file name associated with event * @cookie: inotify rename cookie * @@ -162,7 +155,7 @@ struct fsnotify_ops { const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); - void (*free_event)(struct fsnotify_group *group, struct fsnotify_event *event); + void (*free_event)(struct fsnotify_event *event); /* called on final put+free to free memory */ void (*free_mark)(struct fsnotify_mark *mark); }; @@ -174,6 +167,7 @@ struct fsnotify_ops { */ struct fsnotify_event { struct list_head list; + unsigned long objectid; /* identifier for queue merges */ }; /* @@ -211,14 +205,11 @@ struct fsnotify_group { unsigned int priority; bool shutdown; /* group is being shut down, don't queue more events */ -#define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ -#define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */ -#define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */ - int flags; - unsigned int owner_flags; /* stored flags of mark_mutex owner */ - /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ + atomic_t num_marks; /* 1 for each mark and 1 for not being + * past the point of no return when freeing + * a group */ atomic_t user_waits; /* Number of tasks waiting for user * response */ struct list_head marks_list; /* all inode marks for this group */ @@ -243,58 +234,23 @@ struct fsnotify_group { #endif #ifdef CONFIG_FANOTIFY struct fanotify_group_private_data { - /* Hash table of events for merge */ - struct hlist_head *merge_hash; /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; int flags; /* flags from fanotify_init() */ int f_flags; /* event_f_flags from fanotify_init() */ - struct ucounts *ucounts; - mempool_t error_events_pool; + unsigned int max_marks; + struct user_struct *user; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; }; -/* - * These helpers are used to prevent deadlock when reclaiming inodes with - * evictable marks of the same group that is allocating a new mark. - */ -static inline void fsnotify_group_lock(struct fsnotify_group *group) -{ - mutex_lock(&group->mark_mutex); - if (group->flags & FSNOTIFY_GROUP_NOFS) - group->owner_flags = memalloc_nofs_save(); -} - -static inline void fsnotify_group_unlock(struct fsnotify_group *group) -{ - if (group->flags & FSNOTIFY_GROUP_NOFS) - memalloc_nofs_restore(group->owner_flags); - mutex_unlock(&group->mark_mutex); -} - -static inline void fsnotify_group_assert_locked(struct fsnotify_group *group) -{ - WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); - if (group->flags & FSNOTIFY_GROUP_NOFS) - WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); -} - /* When calling fsnotify tell it if the data is a path or inode */ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, - FSNOTIFY_EVENT_DENTRY, - FSNOTIFY_EVENT_ERROR, -}; - -struct fs_error_report { - int error; - struct inode *inode; - struct super_block *sb; }; static inline struct inode *fsnotify_data_inode(const void *data, int data_type) @@ -302,25 +258,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type) switch (data_type) { case FSNOTIFY_EVENT_INODE: return (struct inode *)data; - case FSNOTIFY_EVENT_DENTRY: - return d_inode(data); case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); - case FSNOTIFY_EVENT_ERROR: - return ((struct fs_error_report *)data)->inode; - default: - return NULL; - } -} - -static inline struct dentry *fsnotify_data_dentry(const void *data, int data_type) -{ - switch (data_type) { - case FSNOTIFY_EVENT_DENTRY: - /* Non const is needed for dget() */ - return (struct dentry *)data; - case FSNOTIFY_EVENT_PATH: - return ((const struct path *)data)->dentry; default: return NULL; } @@ -337,110 +276,58 @@ static inline const struct path *fsnotify_data_path(const void *data, } } -static inline struct super_block *fsnotify_data_sb(const void *data, - int data_type) -{ - switch (data_type) { - case FSNOTIFY_EVENT_INODE: - return ((struct inode *)data)->i_sb; - case FSNOTIFY_EVENT_DENTRY: - return ((struct dentry *)data)->d_sb; - case FSNOTIFY_EVENT_PATH: - return ((const struct path *)data)->dentry->d_sb; - case FSNOTIFY_EVENT_ERROR: - return ((struct fs_error_report *) data)->sb; - default: - return NULL; - } -} - -static inline struct fs_error_report *fsnotify_data_error_report( - const void *data, - int data_type) -{ - switch (data_type) { - case FSNOTIFY_EVENT_ERROR: - return (struct fs_error_report *) data; - default: - return NULL; - } -} - -/* - * Index to merged marks iterator array that correlates to a type of watch. - * The type of watched object can be deduced from the iterator type, but not - * the other way around, because an event can match different watched objects - * of the same object type. - * For example, both parent and child are watching an object of type inode. - */ -enum fsnotify_iter_type { - FSNOTIFY_ITER_TYPE_INODE, - FSNOTIFY_ITER_TYPE_VFSMOUNT, - FSNOTIFY_ITER_TYPE_SB, - FSNOTIFY_ITER_TYPE_PARENT, - FSNOTIFY_ITER_TYPE_INODE2, - FSNOTIFY_ITER_TYPE_COUNT -}; - -/* The type of object that a mark is attached to */ enum fsnotify_obj_type { - FSNOTIFY_OBJ_TYPE_ANY = -1, FSNOTIFY_OBJ_TYPE_INODE, + FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; -static inline bool fsnotify_valid_obj_type(unsigned int obj_type) +#define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) +#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) +#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) +#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) +#define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) + +static inline bool fsnotify_valid_obj_type(unsigned int type) { - return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT); + return (type < FSNOTIFY_OBJ_TYPE_COUNT); } struct fsnotify_iter_info { - struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT]; - struct fsnotify_group *current_group; + struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT]; unsigned int report_mask; int srcu_idx; }; static inline bool fsnotify_iter_should_report_type( - struct fsnotify_iter_info *iter_info, int iter_type) + struct fsnotify_iter_info *iter_info, int type) { - return (iter_info->report_mask & (1U << iter_type)); + return (iter_info->report_mask & (1U << type)); } static inline void fsnotify_iter_set_report_type( - struct fsnotify_iter_info *iter_info, int iter_type) + struct fsnotify_iter_info *iter_info, int type) { - iter_info->report_mask |= (1U << iter_type); + iter_info->report_mask |= (1U << type); } -static inline struct fsnotify_mark *fsnotify_iter_mark( - struct fsnotify_iter_info *iter_info, int iter_type) +static inline void fsnotify_iter_set_report_type_mark( + struct fsnotify_iter_info *iter_info, int type, + struct fsnotify_mark *mark) { - if (fsnotify_iter_should_report_type(iter_info, iter_type)) - return iter_info->marks[iter_type]; - return NULL; -} - -static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type, - struct fsnotify_mark **markp) -{ - while (type < FSNOTIFY_ITER_TYPE_COUNT) { - *markp = fsnotify_iter_mark(iter, type); - if (*markp) - break; - type++; - } - return type; + iter_info->marks[type] = mark; + iter_info->report_mask |= (1U << type); } #define FSNOTIFY_ITER_FUNCS(name, NAME) \ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ struct fsnotify_iter_info *iter_info) \ { \ - return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \ + return (iter_info->report_mask & FSNOTIFY_OBJ_TYPE_##NAME##_FL) ? \ + iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \ } FSNOTIFY_ITER_FUNCS(inode, INODE) @@ -448,13 +335,8 @@ FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) -#define fsnotify_foreach_iter_type(type) \ - for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++) -#define fsnotify_foreach_iter_mark_type(iter, mark, type) \ - for (type = 0; \ - type = fsnotify_iter_step(iter, type, &mark), \ - type < FSNOTIFY_ITER_TYPE_COUNT; \ - type++) +#define fsnotify_foreach_obj_type(type) \ + for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++) /* * fsnotify_connp_t is what we embed in objects which connector can be attached @@ -473,7 +355,6 @@ struct fsnotify_mark_connector { spinlock_t lock; unsigned short type; /* Type of object [lock] */ #define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01 -#define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ __kernel_fsid_t fsid; /* fsid of filesystem containing object */ union { @@ -518,18 +399,11 @@ struct fsnotify_mark { struct hlist_node obj_list; /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; - /* Events types and flags to ignore [mark->lock, group->mark_mutex] */ - __u32 ignore_mask; - /* General fsnotify mark flags */ -#define FSNOTIFY_MARK_FLAG_ALIVE 0x0001 -#define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002 - /* inotify mark flags */ -#define FSNOTIFY_MARK_FLAG_EXCL_UNLINK 0x0010 -#define FSNOTIFY_MARK_FLAG_IN_ONESHOT 0x0020 - /* fanotify mark flags */ -#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 -#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 -#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 + /* Events types to ignore [mark->lock, group->mark_mutex] */ + __u32 ignored_mask; +#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01 +#define FSNOTIFY_MARK_FLAG_ALIVE 0x02 +#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 unsigned int flags; /* flags [mark->lock] */ }; @@ -595,9 +469,7 @@ static inline void fsnotify_update_flags(struct dentry *dentry) /* called from fsnotify listeners, such as fanotify or dnotify */ /* create a new group */ -extern struct fsnotify_group *fsnotify_alloc_group( - const struct fsnotify_ops *ops, - int flags); +extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops); /* get reference to a group */ extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ @@ -612,39 +484,17 @@ extern int fsnotify_fasync(int fd, struct file *file, int on); extern void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event); /* attach the event to the group notification queue */ -extern int fsnotify_insert_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct fsnotify_group *, - struct fsnotify_event *), - void (*insert)(struct fsnotify_group *, - struct fsnotify_event *)); - -static inline int fsnotify_add_event(struct fsnotify_group *group, - struct fsnotify_event *event, - int (*merge)(struct fsnotify_group *, - struct fsnotify_event *)) -{ - return fsnotify_insert_event(group, event, merge, NULL); -} - +extern int fsnotify_add_event(struct fsnotify_group *group, + struct fsnotify_event *event, + int (*merge)(struct list_head *, + struct fsnotify_event *)); /* Queue overflow event to a notification group */ static inline void fsnotify_queue_overflow(struct fsnotify_group *group) { fsnotify_add_event(group, group->overflow_event, NULL); } -static inline bool fsnotify_is_overflow_event(u32 mask) -{ - return mask & FS_Q_OVERFLOW; -} - -static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group) -{ - assert_spin_locked(&group->notification_lock); - - return list_empty(&group->notification_list); -} - +/* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group); @@ -656,101 +506,6 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group, /* functions used to manipulate the marks attached to inodes */ -/* - * Canonical "ignore mask" including event flags. - * - * Note the subtle semantic difference from the legacy ->ignored_mask. - * ->ignored_mask traditionally only meant which events should be ignored, - * while ->ignore_mask also includes flags regarding the type of objects on - * which events should be ignored. - */ -static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark) -{ - __u32 ignore_mask = mark->ignore_mask; - - /* The event flags in ignore mask take effect */ - if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) - return ignore_mask; - - /* - * Legacy behavior: - * - Always ignore events on dir - * - Ignore events on child if parent is watching children - */ - ignore_mask |= FS_ISDIR; - ignore_mask &= ~FS_EVENT_ON_CHILD; - ignore_mask |= mark->mask & FS_EVENT_ON_CHILD; - - return ignore_mask; -} - -/* Legacy ignored_mask - only event types to ignore */ -static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark) -{ - return mark->ignore_mask & ALL_FSNOTIFY_EVENTS; -} - -/* - * Check if mask (or ignore mask) should be applied depending if victim is a - * directory and whether it is reported to a watching parent. - */ -static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir, - int iter_type) -{ - /* Should mask be applied to a directory? */ - if (is_dir && !(mask & FS_ISDIR)) - return false; - - /* Should mask be applied to a child? */ - if (iter_type == FSNOTIFY_ITER_TYPE_PARENT && - !(mask & FS_EVENT_ON_CHILD)) - return false; - - return true; -} - -/* - * Effective ignore mask taking into account if event victim is a - * directory and whether it is reported to a watching parent. - */ -static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark, - bool is_dir, int iter_type) -{ - __u32 ignore_mask = fsnotify_ignored_events(mark); - - if (!ignore_mask) - return 0; - - /* For non-dir and non-child, no need to consult the event flags */ - if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT) - return ignore_mask; - - ignore_mask = fsnotify_ignore_mask(mark); - if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type)) - return 0; - - return ignore_mask & ALL_FSNOTIFY_EVENTS; -} - -/* Get mask for calculating object interest taking ignore mask into account */ -static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) -{ - __u32 mask = mark->mask; - - if (!fsnotify_ignored_events(mark)) - return mask; - - /* Interest in FS_MODIFY may be needed for clearing ignore mask */ - if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) - mask |= FS_MODIFY; - - /* - * If mark is interested in ignoring events on children, the object must - * show interest in those events for fsnotify_parent() to notice it. - */ - return mask | mark->ignore_mask; -} - /* Get mask of events for a list of marks */ extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn); /* Calculate mask of events for a list of marks */ @@ -765,27 +520,27 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn, __kernel_fsid_t *fsid); /* attach the mark to the object */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int obj_type, - int add_flags, __kernel_fsid_t *fsid); + fsnotify_connp_t *connp, unsigned int type, + int allow_dups, __kernel_fsid_t *fsid); extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, fsnotify_connp_t *connp, - unsigned int obj_type, int add_flags, + unsigned int type, int allow_dups, __kernel_fsid_t *fsid); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, - int add_flags) + int allow_dups) { return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL); + FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, - int add_flags) + int allow_dups) { return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags, + FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL); } @@ -798,32 +553,33 @@ extern void fsnotify_detach_mark(struct fsnotify_mark *mark); extern void fsnotify_free_mark(struct fsnotify_mark *mark); /* Wait until all marks queued for destruction are destroyed */ extern void fsnotify_wait_marks_destroyed(void); -/* Clear all of the marks of a group attached to a given object type */ -extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, - unsigned int obj_type); +/* run all the marks in a group, and clear all of the marks attached to given object type */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); /* run all the marks in a group, and clear all of the vfsmount marks */ static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL); } /* run all the marks in a group, and clear all of the inode marks */ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL); } /* run all the marks in a group, and clear all of the sn marks */ static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL); } extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); -static inline void fsnotify_init_event(struct fsnotify_event *event) +static inline void fsnotify_init_event(struct fsnotify_event *event, + unsigned long objectid) { INIT_LIST_HEAD(&event->list); + event->objectid = objectid; } #else diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 3bfebde5a1a6..2917ef990d43 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -328,19 +328,6 @@ inode_query_iversion(struct inode *inode) return cur >> I_VERSION_QUERIED_SHIFT; } -/* - * For filesystems without any sort of change attribute, the best we can - * do is fake one up from the ctime: - */ -static inline u64 time_to_chattr(struct timespec64 *t) -{ - u64 chattr = t->tv_sec; - - chattr <<= 32; - chattr += t->tv_nsec; - return chattr; -} - /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 465060acc981..481273f0c72d 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -71,14 +71,15 @@ static inline void *dereference_symbol_descriptor(void *ptr) return ptr; } -int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, - unsigned long), - void *data); - #ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); +/* Call a function on each kallsyms symbol in the core kernel */ +int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, + unsigned long), + void *data); + extern int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset); @@ -107,6 +108,14 @@ static inline unsigned long kallsyms_lookup_name(const char *name) return 0; } +static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, + unsigned long), + void *data) +{ + return 0; +} + static inline int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 9dae77a97a03..2484ed97e72f 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -68,7 +68,6 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_exit(long result) __noreturn; int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 3bc9f7410e21..0520c0cd73f4 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -27,8 +27,7 @@ struct rpc_task; struct nlmsvc_binding { __be32 (*fopen)(struct svc_rqst *, struct nfs_fh *, - struct file **, - int mode); + struct file **); void (*fclose)(struct file *); }; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 70ce419e2709..666f5f310a04 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -10,8 +10,6 @@ #ifndef LINUX_LOCKD_LOCKD_H #define LINUX_LOCKD_LOCKD_H -/* XXX: a lot of this should really be under fs/lockd. */ - #include #include #include @@ -156,8 +154,7 @@ struct nlm_rqst { struct nlm_file { struct hlist_node f_list; /* linked list */ struct nfs_fh f_handle; /* NFS file handle */ - struct file * f_file[2]; /* VFS file pointers, - indexed by O_ flags */ + struct file * f_file; /* VFS file pointer */ struct nlm_share * f_shares; /* DOS shares */ struct list_head f_blocks; /* blocked locks */ unsigned int f_locks; /* guesstimate # of locks */ @@ -270,7 +267,6 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref); /* * Server-side lock handling */ -int lock_to_openmode(struct file_lock *); __be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, int, struct nlm_cookie *, int); @@ -290,9 +286,8 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t); * File handling for the server personality */ __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **, - struct nlm_lock *); + struct nfs_fh *); void nlm_release_file(struct nlm_file *); -void nlmsvc_put_lockowner(struct nlm_lockowner *); void nlmsvc_release_lockowner(struct nlm_lock *); void nlmsvc_mark_resources(struct net *); void nlmsvc_free_host_resources(struct nlm_host *); @@ -304,15 +299,9 @@ void nlmsvc_invalidate_all(void); int nlmsvc_unlock_all_by_sb(struct super_block *sb); int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr); -static inline struct file *nlmsvc_file_file(struct nlm_file *file) -{ - return file->f_file[O_RDONLY] ? - file->f_file[O_RDONLY] : file->f_file[O_WRONLY]; -} - static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { - return locks_inode(nlmsvc_file_file(file)); + return locks_inode(file->f_file); } static inline int __nlm_privileged_request4(const struct sockaddr *sap) diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 67e4a2c5500b..7ab9f264313f 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -41,8 +41,6 @@ struct nlm_lock { struct nfs_fh fh; struct xdr_netobj oh; u32 svid; - u64 lock_start; - u64 lock_len; struct file_lock fl; }; @@ -98,19 +96,24 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -bool nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); - -bool nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); +int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); +int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); +int nlmsvc_encode_res(struct svc_rqst *, __be32 *); +int nlmsvc_decode_res(struct svc_rqst *, __be32 *); +int nlmsvc_encode_void(struct svc_rqst *, __be32 *); +int nlmsvc_decode_void(struct svc_rqst *, __be32 *); +int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); +int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); +int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); +int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); +/* +int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); +int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); +int nlmclt_encode_cancargs(struct rpc_rqst *, u32 *, struct nlm_args *); +int nlmclt_encode_unlockargs(struct rpc_rqst *, u32 *, struct nlm_args *); + */ #endif /* LOCKD_XDR_H */ diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 72831e35dca3..e709fe5924f2 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,22 +22,27 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) -void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len); -bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); -bool nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); +int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); +int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); +int nlm4svc_encode_res(struct svc_rqst *, __be32 *); +int nlm4svc_decode_res(struct svc_rqst *, __be32 *); +int nlm4svc_encode_void(struct svc_rqst *, __be32 *); +int nlm4svc_decode_void(struct svc_rqst *, __be32 *); +int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); +int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); +int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); +int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); +/* +int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); +int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); +int nlmclt_encode_cancargs(struct rpc_rqst *, u32 *, struct nlm_args *); +int nlmclt_encode_unlockargs(struct rpc_rqst *, u32 *, struct nlm_args *); + */ extern const struct rpc_version nlm_version4; #endif /* LOCKD_XDR4_H */ diff --git a/include/linux/module.h b/include/linux/module.h index de07fbf3a125..4cd6d889d5ba 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -599,7 +599,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod) return within_module_init(addr, mod) || within_module_core(addr, mod); } -/* Search for module by name: must be in a RCU-sched critical section. */ +/* Search for module by name: must hold module_mutex. */ struct module *find_module(const char *name); struct symsearch { @@ -621,9 +621,13 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name); -extern void __noreturn __module_put_and_kthread_exit(struct module *mod, +int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, unsigned long), + void *data); + +extern void __noreturn __module_put_and_exit(struct module *mod, long code); -#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code) +#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD int module_refcount(struct module *mod); @@ -804,6 +808,14 @@ static inline unsigned long module_kallsyms_lookup_name(const char *name) return 0; } +static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, + unsigned long), + void *data) +{ + return 0; +} + static inline int register_module_notifier(struct notifier_block *nb) { /* no events will happen anyway, so this can always succeed */ @@ -815,7 +827,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb) return 0; } -#define module_put_and_kthread_exit(code) kthread_exit(code) +#define module_put_and_exit(code) do_exit(code) static inline void print_modules(void) { @@ -892,8 +904,4 @@ static inline bool module_sig_ok(struct module *module) } #endif /* CONFIG_MODULE_SIG */ -int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, - struct module *, unsigned long), - void *data); - #endif /* _LINUX_MODULE_H */ diff --git a/include/linux/nfs.h b/include/linux/nfs.h index b06375e88e58..0dc7ad38a0da 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -36,6 +36,14 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc memcpy(target->data, source->data, source->size); } + +/* + * This is really a general kernel constant, but since nothing like + * this is defined in the kernel headers, I have to do it here. + */ +#define NFS_OFFSET_MAX ((__s64)((~(__u64)0) >> 1)) + + enum nfs3_stable_how { NFS_UNSTABLE = 0, NFS_DATA_SYNC = 1, diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index ea88d0f462c9..9dc7eeac924f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -385,6 +385,13 @@ enum lock_type4 { NFS4_WRITEW_LT = 4 }; +enum change_attr_type4 { + NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1, + NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2, + NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3, + NFS4_CHANGE_TYPE_IS_UNDEFINED = 4 +}; /* Mandatory Attributes */ #define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0) @@ -452,6 +459,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) +#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_MODE_UMASK (1UL << 17) #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) @@ -717,17 +725,4 @@ enum nfs4_setxattr_options { SETXATTR4_CREATE = 1, SETXATTR4_REPLACE = 2, }; - -enum { - RCA4_TYPE_MASK_RDATA_DLG = 0, - RCA4_TYPE_MASK_WDATA_DLG = 1, - RCA4_TYPE_MASK_DIR_DLG = 2, - RCA4_TYPE_MASK_FILE_LAYOUT = 3, - RCA4_TYPE_MASK_BLK_LAYOUT = 4, - RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, - RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, - RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, - RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, -}; - #endif diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h index 22265b1ff080..f5ba0fbff72f 100644 --- a/include/linux/nfs_ssc.h +++ b/include/linux/nfs_ssc.h @@ -8,7 +8,6 @@ */ #include -#include extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl; @@ -55,19 +54,6 @@ static inline void nfs42_ssc_close(struct file *filep) } #endif -struct nfsd4_ssc_umount_item { - struct list_head nsui_list; - bool nsui_busy; - /* - * nsui_refcnt inited to 2, 1 on list and 1 for consumer. Entry - * is removed when refcnt drops to 1 and nsui_expire expires. - */ - refcount_t nsui_refcnt; - unsigned long nsui_expire; - struct vfsmount *nsui_vfsmount; - char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1]; -}; - /* * NFS_FS */ diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h index 8e76a79cdc6a..103d44695323 100644 --- a/include/linux/nfsacl.h +++ b/include/linux/nfsacl.h @@ -38,11 +38,5 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, extern int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, struct posix_acl **pacl); -extern bool -nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt, - struct posix_acl **pacl); -extern bool -nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode, - struct posix_acl *acl, int encode_entries, int typeflag); #endif /* __LINUX_NFSACL_H */ diff --git a/include/linux/pid.h b/include/linux/pid.h index af308e15f174..fa10acb8d6a4 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -78,7 +78,6 @@ struct file; extern struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); -int pidfd_create(struct pid *pid, unsigned int flags); static inline struct pid *get_pid(struct pid *pid) { diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 52eabe6797ee..6d63a5260130 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -15,6 +15,9 @@ struct user_struct { refcount_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ +#ifdef CONFIG_FANOTIFY + atomic_t fanotify_listeners; +#endif #ifdef CONFIG_EPOLL atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ #endif diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 938c2bf29db8..43f854487539 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -10,6 +10,9 @@ #define RPC_VERSION 2 +/* size of an XDR encoding unit in bytes, i.e. 32bit */ +#define XDR_UNIT (4) + /* spec defines authentication flavor as an unsigned 32 bit integer */ typedef u32 rpc_authflavor_t; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1cf7a7799cc0..386628b36bc7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -19,7 +19,6 @@ #include #include #include -#include /* statistics for svc_pool structures */ struct svc_pool_stats { @@ -52,6 +51,25 @@ struct svc_pool { unsigned long sp_flags; } ____cacheline_aligned_in_smp; +struct svc_serv; + +struct svc_serv_ops { + /* Callback to use when last thread exits. */ + void (*svo_shutdown)(struct svc_serv *, struct net *); + + /* function for service threads to run */ + int (*svo_function)(void *); + + /* queue up a transport for servicing */ + void (*svo_enqueue_xprt)(struct svc_xprt *); + + /* set up thread (or whatever) execution context */ + int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); + + /* optional module to count when adding threads (pooled svcs only) */ + struct module *svo_module; +}; + /* * RPC service. * @@ -66,7 +84,6 @@ struct svc_serv { struct svc_program * sv_program; /* RPC program */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; - struct kref sv_refcnt; unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -84,8 +101,7 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - int (*sv_threadfn)(void *data); - + const struct svc_serv_ops *sv_ops; /* server operations */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -97,30 +113,15 @@ struct svc_serv { #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/** - * svc_get() - increment reference count on a SUNRPC serv - * @serv: the svc_serv to have count incremented - * - * Returns: the svc_serv that was passed in. +/* + * We use sv_nrthreads as a reference count. svc_destroy() drops + * this refcount, so we need to bump it up around operations that + * change the number of threads. Horrible, but there it is. + * Should be called with the "service mutex" held. */ -static inline struct svc_serv *svc_get(struct svc_serv *serv) +static inline void svc_get(struct svc_serv *serv) { - kref_get(&serv->sv_refcnt); - return serv; -} - -void svc_destroy(struct kref *); - -/** - * svc_put - decrement reference count on a SUNRPC serv - * @serv: the svc_serv to have count decremented - * - * When the reference count reaches zero, svc_destroy() - * is called to clean up and free the serv. - */ -static inline void svc_put(struct svc_serv *serv) -{ - kref_put(&serv->sv_refcnt, svc_destroy); + serv->sv_nrthreads++; } /* @@ -246,16 +247,12 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; - struct xdr_stream rq_arg_stream; - struct xdr_stream rq_res_stream; - struct page *rq_scratch_page; struct xdr_buf rq_res; struct page *rq_pages[RPCSVC_MAXPAGES + 1]; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_page_end; /* one past the last page */ - struct pagevec rq_pvec; struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; @@ -275,13 +272,13 @@ struct svc_rqst { #define RQ_VICTIM (5) /* about to be shut down */ #define RQ_BUSY (6) /* request is busy */ #define RQ_DATA (7) /* request has data */ +#define RQ_AUTHERR (8) /* Request status is auth error */ unsigned long rq_flags; /* flags field */ ktime_t rq_qtime; /* enqueue time */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ void * rq_auth_data; /* flavor-specific data */ - __be32 rq_auth_stat; /* authentication status */ int rq_auth_slack; /* extra space xdr code * should leave in head * for krb5i, krb5p. @@ -455,21 +452,40 @@ struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ - bool (*pc_decode)(struct svc_rqst *rqstp, - struct xdr_stream *xdr); + int (*pc_decode)(struct svc_rqst *, __be32 *data); /* XDR encode result: */ - bool (*pc_encode)(struct svc_rqst *rqstp, - struct xdr_stream *xdr); + int (*pc_encode)(struct svc_rqst *, __be32 *data); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ - unsigned int pc_argzero; /* how much of argument to clear */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ - const char * pc_name; /* for display */ }; +/* + * Mode for mapping cpus to pools. + */ +enum { + SVC_POOL_AUTO = -1, /* choose one of the others */ + SVC_POOL_GLOBAL, /* no mapping, just a single global pool + * (legacy & UP mode) */ + SVC_POOL_PERCPU, /* one pool per cpu */ + SVC_POOL_PERNODE /* one pool per numa node */ +}; + +struct svc_pool_map { + int count; /* How many svc_servs use us */ + int mode; /* Note: int not enum to avoid + * warnings about "enumeration value + * not handled in switch" */ + unsigned int npools; + unsigned int *pool_to; /* maps pool id to cpu or node */ + unsigned int *to_pool; /* maps cpu or node to pool id */ +}; + +extern struct svc_pool_map svc_pool_map; + /* * Function prototypes. */ @@ -477,17 +493,22 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - int (*threadfn)(void *data)); + const struct svc_serv_ops *); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); -void svc_rqst_replace_page(struct svc_rqst *rqstp, - struct page *page); +struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, + struct svc_pool *pool, int node); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); +unsigned int svc_pool_map_get(void); +void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - int (*threadfn)(void *data)); + const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); +void svc_destroy(struct svc_serv *); +void svc_shutdown_net(struct svc_serv *, struct net *); int svc_process(struct svc_rqst *); int bc_svc_process(struct svc_serv *, struct rpc_rqst *, struct svc_rqst *); @@ -498,14 +519,16 @@ void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); char * svc_print_addr(struct svc_rqst *, char *, size_t); -int svc_encode_result_payload(struct svc_rqst *rqstp, - unsigned int offset, - unsigned int length); +int svc_encode_read_payload(struct svc_rqst *rqstp, + unsigned int offset, + unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct xdr_buf *payload); + struct page **pages, + struct kvec *first, size_t total); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); +__be32 svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err); __be32 svc_generic_init_request(struct svc_rqst *rqstp, const struct svc_program *progp, struct svc_process_info *procinfo); @@ -534,42 +557,4 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) svc_reserve(rqstp, space + rqstp->rq_auth_slack); } -/** - * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding - * @rqstp: controlling server RPC transaction context - * - */ -static inline void svcxdr_init_decode(struct svc_rqst *rqstp) -{ - struct xdr_stream *xdr = &rqstp->rq_arg_stream; - struct kvec *argv = rqstp->rq_arg.head; - - xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); - xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); -} - -/** - * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding - * @rqstp: controlling server RPC transaction context - * - */ -static inline void svcxdr_init_encode(struct svc_rqst *rqstp) -{ - struct xdr_stream *xdr = &rqstp->rq_res_stream; - struct xdr_buf *buf = &rqstp->rq_res; - struct kvec *resv = buf->head; - - xdr_reset_scratch_buffer(xdr); - - xdr->buf = buf; - xdr->iov = resv; - xdr->p = resv->iov_base + resv->iov_len; - xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; - buf->len = resv->iov_len; - xdr->page_ptr = buf->pages - 1; - buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); - buf->buflen -= rqstp->rq_auth_slack; - xdr->rqst = NULL; -} - #endif /* SUNRPC_SVC_H */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 2b870a3f391b..9dc3a3b88391 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -207,8 +207,8 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, struct svc_rdma_recv_ctxt *rctxt, int status); extern int svc_rdma_sendto(struct svc_rqst *); -extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length); +extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length); /* svc_rdma_transport.c */ extern struct svc_xprt_class svc_rdma_class; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index dbffb92511ef..aca35ab5cff2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -21,8 +21,8 @@ struct svc_xprt_ops { int (*xpo_has_wspace)(struct svc_xprt *); int (*xpo_recvfrom)(struct svc_rqst *); int (*xpo_sendto)(struct svc_rqst *); - int (*xpo_result_payload)(struct svc_rqst *, unsigned int, - unsigned int); + int (*xpo_read_payload)(struct svc_rqst *, unsigned int, + unsigned int); void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); @@ -127,16 +127,14 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, - struct net *net, const int family, - const unsigned short port, int flags, - const struct cred *cred); -void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net); -void svc_xprt_received(struct svc_xprt *xprt); +int svc_create_xprt(struct svc_serv *, const char *, struct net *, + const int, const unsigned short, int, + const struct cred *); +void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); -void svc_xprt_close(struct svc_xprt *xprt); +void svc_close_xprt(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 6d9cc9080aca..b0003866a249 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -127,7 +127,7 @@ struct auth_ops { char * name; struct module *owner; int flavour; - int (*accept)(struct svc_rqst *rq); + int (*accept)(struct svc_rqst *rq, __be32 *authp); int (*release)(struct svc_rqst *rq); void (*domain_release)(struct auth_domain *); int (*set_client)(struct svc_rqst *rq); @@ -149,7 +149,7 @@ struct auth_ops { struct svc_xprt; -extern int svc_authenticate(struct svc_rqst *rqstp); +extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); extern int svc_authorise(struct svc_rqst *rqstp); extern int svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index a366d3eb0531..b7ac7fe68306 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -57,9 +57,10 @@ int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); -int svc_addsock(struct svc_serv *serv, struct net *net, - const int fd, char *name_return, const size_t len, - const struct cred *cred); +bool svc_alien_sock(struct net *net, int fd); +int svc_addsock(struct svc_serv *serv, const int fd, + char *name_return, const size_t len, + const struct cred *cred); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index c1c50eaae472..6d9d1520612b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -19,13 +19,6 @@ struct bio_vec; struct rpc_rqst; -/* - * Size of an XDR encoding unit in bytes, i.e. 32 bits, - * as defined in Section 3 of RFC 4506. All encoded - * XDR data items are aligned on a boundary of 32 bits. - */ -#define XDR_UNIT sizeof(__be32) - /* * Buffer adjustment */ @@ -239,12 +232,10 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); -extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, - struct page **pages, struct rpc_rqst *rqst); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes); -extern void __xdr_commit_encode(struct xdr_stream *xdr); +extern void xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, @@ -255,71 +246,13 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); +extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t); extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t); -extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, - unsigned int len); - -/** - * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. - * @xdr: pointer to xdr_stream struct - * @buf: pointer to an empty buffer - * @buflen: size of 'buf' - * - * The scratch buffer is used when decoding from an array of pages. - * If an xdr_inline_decode() call spans across page boundaries, then - * we copy the data into the scratch buffer in order to allow linear - * access. - */ -static inline void -xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) -{ - xdr->scratch.iov_base = buf; - xdr->scratch.iov_len = buflen; -} - -/** - * xdr_set_scratch_page - Attach a scratch buffer for decoding data - * @xdr: pointer to xdr_stream struct - * @page: an anonymous page - * - * See xdr_set_scratch_buffer(). - */ -static inline void -xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page) -{ - xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE); -} - -/** - * xdr_reset_scratch_buffer - Clear scratch buffer information - * @xdr: pointer to xdr_stream struct - * - * See xdr_set_scratch_buffer(). - */ -static inline void -xdr_reset_scratch_buffer(struct xdr_stream *xdr) -{ - xdr_set_scratch_buffer(xdr, NULL, 0); -} - -/** - * xdr_commit_encode - Ensure all data is written to xdr->buf - * @xdr: pointer to xdr_stream - * - * Handle encoding across page boundaries by giving the caller a - * temporary location to write to, then later copying the data into - * place. __xdr_commit_encode() does that copying. - */ -static inline void xdr_commit_encode(struct xdr_stream *xdr) -{ - if (unlikely(xdr->scratch.iov_len)) - __xdr_commit_encode(xdr); -} /** * xdr_stream_remaining - Return the number of bytes remaining in the stream @@ -352,7 +285,7 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, static inline size_t xdr_align_size(size_t n) { - const size_t mask = XDR_UNIT - 1; + const size_t mask = sizeof(__u32) - 1; return (n + mask) & ~mask; } @@ -382,7 +315,7 @@ static inline size_t xdr_pad_size(size_t n) */ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) { - const size_t len = XDR_UNIT; + const size_t len = sizeof(__be32); __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) @@ -401,7 +334,7 @@ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) */ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) { - const size_t len = XDR_UNIT; + const size_t len = sizeof(__be32); __be32 *p = xdr_reserve_space(xdr, len); if (unlikely(!p)) @@ -410,40 +343,6 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) return len; } -/** - * xdr_encode_bool - Encode a boolean item - * @p: address in a buffer into which to encode - * @n: boolean value to encode - * - * Return value: - * Address of item following the encoded boolean - */ -static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) -{ - *p++ = n ? xdr_one : xdr_zero; - return p; -} - -/** - * xdr_stream_encode_bool - Encode a boolean item - * @xdr: pointer to xdr_stream - * @n: boolean value to encode - * - * Return values: - * On success, returns length in bytes of XDR buffer consumed - * %-EMSGSIZE on XDR buffer overflow - */ -static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) -{ - const size_t len = XDR_UNIT; - __be32 *p = xdr_reserve_space(xdr, len); - - if (unlikely(!p)) - return -EMSGSIZE; - xdr_encode_bool(p, n); - return len; -} - /** * xdr_stream_encode_u32 - Encode a 32-bit integer * @xdr: pointer to xdr_stream @@ -605,27 +504,6 @@ static inline bool xdr_item_is_present(const __be32 *p) return *p != xdr_zero; } -/** - * xdr_stream_decode_bool - Decode a boolean - * @xdr: pointer to xdr_stream - * @ptr: pointer to a u32 in which to store the result - * - * Return values: - * %0 on success - * %-EBADMSG on XDR buffer overflow - */ -static inline ssize_t -xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr) -{ - const size_t count = sizeof(*ptr); - __be32 *p = xdr_inline_decode(xdr, count); - - if (unlikely(!p)) - return -EBADMSG; - *ptr = (*p != xdr_zero); - return 0; -} - /** * xdr_stream_decode_u32 - Decode a 32-bit integer * @xdr: pointer to xdr_stream @@ -647,27 +525,6 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) return 0; } -/** - * xdr_stream_decode_u64 - Decode a 64-bit integer - * @xdr: pointer to xdr_stream - * @ptr: location to store 64-bit integer - * - * Return values: - * %0 on success - * %-EBADMSG on XDR buffer overflow - */ -static inline ssize_t -xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) -{ - const size_t count = sizeof(*ptr); - __be32 *p = xdr_inline_decode(xdr, count); - - if (unlikely(!p)) - return -EBADMSG; - xdr_decode_hyper(p, ptr); - return 0; -} - /** * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data * @xdr: pointer to xdr_stream diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ec4cea5dd222..61d0315a42ab 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1321,6 +1321,18 @@ static inline long ksys_ftruncate(unsigned int fd, loff_t length) return do_sys_ftruncate(fd, length, 1); } +extern int __close_fd(struct files_struct *files, unsigned int fd); + +/* + * In contrast to sys_close(), this stub does not check whether the syscall + * should or should not be restarted, but returns the raw error codes from + * __close_fd(). + */ +static inline int ksys_close(unsigned int fd) +{ + return __close_fd(current->files, fd); +} + extern long do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index da861f2e34ce..6ee587d0aeaa 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -56,8 +56,6 @@ typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_dobool(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 29f55fadc362..71cc05ddaa21 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -50,10 +50,6 @@ enum ucount_type { #ifdef CONFIG_INOTIFY_USER UCOUNT_INOTIFY_INSTANCES, UCOUNT_INOTIFY_WATCHES, -#endif -#ifdef CONFIG_FANOTIFY - UCOUNT_FANOTIFY_GROUPS, - UCOUNT_FANOTIFY_MARKS, #endif UCOUNT_COUNTS, }; diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 56e4a57d2538..8220369ee610 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -394,7 +394,6 @@ DEFINE_RPC_RUNNING_EVENT(complete); DEFINE_RPC_RUNNING_EVENT(timeout); DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); -DEFINE_RPC_RUNNING_EVENT(call_done); DECLARE_EVENT_CLASS(rpc_task_queued, @@ -1481,7 +1480,8 @@ DEFINE_SVCXDRBUF_EVENT(sendto); svc_rqst_flag(SPLICE_OK) \ svc_rqst_flag(VICTIM) \ svc_rqst_flag(BUSY) \ - svc_rqst_flag_end(DATA) + svc_rqst_flag(DATA) \ + svc_rqst_flag_end(AUTHERR) #undef svc_rqst_flag #undef svc_rqst_flag_end @@ -1547,9 +1547,9 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE); { SVC_COMPLETE, "SVC_COMPLETE" }) TRACE_EVENT(svc_authenticate, - TP_PROTO(const struct svc_rqst *rqst, int auth_res), + TP_PROTO(const struct svc_rqst *rqst, int auth_res, __be32 auth_stat), - TP_ARGS(rqst, auth_res), + TP_ARGS(rqst, auth_res, auth_stat), TP_STRUCT__entry( __field(u32, xid) @@ -1560,7 +1560,7 @@ TRACE_EVENT(svc_authenticate, TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->svc_status = auth_res; - __entry->auth_stat = be32_to_cpu(rqst->rq_auth_stat); + __entry->auth_stat = be32_to_cpu(auth_stat); ), TP_printk("xid=0x%08x auth_res=%s auth_stat=%s", @@ -1578,7 +1578,6 @@ TRACE_EVENT(svc_process, __field(u32, vers) __field(u32, proc) __string(service, name) - __string(procedure, rqst->rq_procinfo->pc_name) __string(addr, rqst->rq_xprt ? rqst->rq_xprt->xpt_remotebuf : "(null)") ), @@ -1588,16 +1587,13 @@ TRACE_EVENT(svc_process, __entry->vers = rqst->rq_vers; __entry->proc = rqst->rq_proc; __assign_str(service, name); - __assign_str(procedure, rqst->rq_procinfo->pc_name); __assign_str(addr, rqst->rq_xprt ? rqst->rq_xprt->xpt_remotebuf : "(null)"); ), - TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%s", + TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%u", __get_str(addr), __entry->xid, - __get_str(service), __entry->vers, - __get_str(procedure) - ) + __get_str(service), __entry->vers, __entry->proc) ); DECLARE_EVENT_CLASS(svc_rqst_event, @@ -1756,7 +1752,6 @@ DECLARE_EVENT_CLASS(svc_xprt_event, ), \ TP_ARGS(xprt)) -DEFINE_SVC_XPRT_EVENT(received); DEFINE_SVC_XPRT_EVENT(no_write_space); DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); @@ -1854,7 +1849,6 @@ TRACE_EVENT(svc_stats_latency, TP_STRUCT__entry( __field(u32, xid) __field(unsigned long, execute) - __string(procedure, rqst->rq_procinfo->pc_name) __string(addr, rqst->rq_xprt->xpt_remotebuf) ), @@ -1862,13 +1856,11 @@ TRACE_EVENT(svc_stats_latency, __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->execute = ktime_to_us(ktime_sub(ktime_get(), rqst->rq_stime)); - __assign_str(procedure, rqst->rq_procinfo->pc_name); __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("addr=%s xid=0x%08x proc=%s execute-us=%lu", - __get_str(addr), __entry->xid, __get_str(procedure), - __entry->execute) + TP_printk("addr=%s xid=0x%08x execute-us=%lu", + __get_str(addr), __entry->xid, __entry->execute) ); DECLARE_EVENT_CLASS(svc_deferred_event, diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index d8536d77fea1..fbf9c5c7dd59 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -20,7 +20,6 @@ #define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ -#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ @@ -28,8 +27,6 @@ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -#define FAN_RENAME 0x10000000 /* File was renamed */ - #define FAN_ONDIR 0x40000000 /* Event occurred against dir */ /* helper events */ @@ -54,18 +51,13 @@ #define FAN_ENABLE_AUDIT 0x00000040 /* Flags to determine fanotify event format */ -#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ #define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ -#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) -/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ -#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ - FAN_REPORT_FID | FAN_REPORT_TARGET_FID) /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ @@ -82,21 +74,12 @@ #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_FLUSH 0x00000080 /* FAN_MARK_FILESYSTEM is 0x00000100 */ -#define FAN_MARK_EVICTABLE 0x00000200 -/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ -#define FAN_MARK_IGNORE 0x00000400 /* These are NOT bitwise flags. Both bits can be used togther. */ #define FAN_MARK_INODE 0x00000000 #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_FILESYSTEM 0x00000100 -/* - * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY - * for non-inode mark types. - */ -#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) - /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ @@ -140,14 +123,6 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_FID 1 #define FAN_EVENT_INFO_TYPE_DFID_NAME 2 #define FAN_EVENT_INFO_TYPE_DFID 3 -#define FAN_EVENT_INFO_TYPE_PIDFD 4 -#define FAN_EVENT_INFO_TYPE_ERROR 5 - -/* Special info types for FAN_RENAME */ -#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 -/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ -#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 -/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -173,21 +148,6 @@ struct fanotify_event_info_fid { unsigned char handle[0]; }; -/* - * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. - * It holds a pidfd for the pid that was responsible for generating an event. - */ -struct fanotify_event_info_pidfd { - struct fanotify_event_info_header hdr; - __s32 pidfd; -}; - -struct fanotify_event_info_error { - struct fanotify_event_info_header hdr; - __s32 error; - __u32 error_count; -}; - struct fanotify_response { __s32 fd; __u32 response; @@ -200,8 +160,6 @@ struct fanotify_response { /* No fd set in event */ #define FAN_NOFD -1 -#define FAN_NOPIDFD FAN_NOFD -#define FAN_EPIDFD -2 /* Helper functions to deal with fanotify_event_metadata buffers */ #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) diff --git a/include/uapi/linux/nfs3.h b/include/uapi/linux/nfs3.h index c22ab77713bd..37e4b34e6b43 100644 --- a/include/uapi/linux/nfs3.h +++ b/include/uapi/linux/nfs3.h @@ -63,12 +63,6 @@ enum nfs3_ftype { NF3BAD = 8 }; -enum nfs3_time_how { - DONT_CHANGE = 0, - SET_TO_SERVER_TIME = 1, - SET_TO_CLIENT_TIME = 2, -}; - struct nfs3_fh { unsigned short size; unsigned char data[NFS3_FHSIZE]; diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h new file mode 100644 index 000000000000..ff0ca88b1c8f --- /dev/null +++ b/include/uapi/linux/nfsd/nfsfh.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file describes the layout of the file handles as passed + * over the wire. + * + * Copyright (C) 1995, 1996, 1997 Olaf Kirch + */ + +#ifndef _UAPI_LINUX_NFSD_FH_H +#define _UAPI_LINUX_NFSD_FH_H + +#include +#include +#include +#include +#include + +/* + * This is the old "dentry style" Linux NFSv2 file handle. + * + * The xino and xdev fields are currently used to transport the + * ino/dev of the exported inode. + */ +struct nfs_fhbase_old { + __u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */ + __u32 fb_ino; /* our inode number */ + __u32 fb_dirino; /* dir inode number, 0 for directories */ + __u32 fb_dev; /* our device */ + __u32 fb_xdev; + __u32 fb_xino; + __u32 fb_generation; +}; + +/* + * This is the new flexible, extensible style NFSv2/v3/v4 file handle. + * by Neil Brown - March 2000 + * + * The file handle starts with a sequence of four-byte words. + * The first word contains a version number (1) and three descriptor bytes + * that tell how the remaining 3 variable length fields should be handled. + * These three bytes are auth_type, fsid_type and fileid_type. + * + * All four-byte values are in host-byte-order. + * + * The auth_type field is deprecated and must be set to 0. + * + * The fsid_type identifies how the filesystem (or export point) is + * encoded. + * Current values: + * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number + * NOTE: we cannot use the kdev_t device id value, because kdev_t.h + * says we mustn't. We must break it up and reassemble. + * 1 - 4 byte user specified identifier + * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED + * 3 - 4 byte device id, encoded for user-space, 4 byte inode number + * 4 - 4 byte inode number and 4 byte uuid + * 5 - 8 byte uuid + * 6 - 16 byte uuid + * 7 - 8 byte inode number and 16 byte uuid + * + * The fileid_type identified how the file within the filesystem is encoded. + * The values for this field are filesystem specific, exccept that + * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' + * in include/linux/exportfs.h for currently registered values. + */ +struct nfs_fhbase_new { + __u8 fb_version; /* == 1, even => nfs_fhbase_old */ + __u8 fb_auth_type; + __u8 fb_fsid_type; + __u8 fb_fileid_type; + __u32 fb_auth[1]; +/* __u32 fb_fsid[0]; floating */ +/* __u32 fb_fileid[0]; floating */ +}; + +struct knfsd_fh { + unsigned int fh_size; /* significant for NFSv3. + * Points to the current size while building + * a new file handle + */ + union { + struct nfs_fhbase_old fh_old; + __u32 fh_pad[NFS4_FHSIZE/4]; + struct nfs_fhbase_new fh_new; + } fh_base; +}; + +#define ofh_dcookie fh_base.fh_old.fb_dcookie +#define ofh_ino fh_base.fh_old.fb_ino +#define ofh_dirino fh_base.fh_old.fb_dirino +#define ofh_dev fh_base.fh_old.fb_dev +#define ofh_xdev fh_base.fh_old.fb_xdev +#define ofh_xino fh_base.fh_old.fb_xino +#define ofh_generation fh_base.fh_old.fb_generation + +#define fh_version fh_base.fh_new.fb_version +#define fh_fsid_type fh_base.fh_new.fb_fsid_type +#define fh_auth_type fh_base.fh_new.fb_auth_type +#define fh_fileid_type fh_base.fh_new.fb_fileid_type +#define fh_fsid fh_base.fh_new.fb_auth + +/* Do not use, provided for userspace compatiblity. */ +#define fh_auth fh_base.fh_new.fb_auth + +#endif /* _UAPI_LINUX_NFSD_FH_H */ diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index 691f90dd09d2..b2ebacd2f309 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -100,7 +100,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa audit_update_mark(audit_mark, dentry->d_inode); audit_mark->rule = krule; - ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0); + ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true); if (ret < 0) { audit_mark->path = NULL; fsnotify_put_mark(&audit_mark->mark); @@ -161,7 +161,8 @@ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask, audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark); - if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group)) + if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group) || + WARN_ON_ONCE(!inode)) return 0; if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) { @@ -182,8 +183,7 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = { static int __init audit_fsnotify_init(void) { - audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops, - FSNOTIFY_GROUP_DUPS); + audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops); if (IS_ERR(audit_fsnotify_group)) { audit_fsnotify_group = NULL; audit_panic("cannot create audit fsnotify group"); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 0c35879bbf7c..39241207ec04 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -1077,7 +1077,7 @@ static int __init audit_tree_init(void) audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC); - audit_tree_group = fsnotify_alloc_group(&audit_tree_ops, 0); + audit_tree_group = fsnotify_alloc_group(&audit_tree_ops); if (IS_ERR(audit_tree_group)) audit_panic("cannot initialize fsnotify group for rectree watches"); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 5cf22fe30149..edbeffee64b8 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -472,7 +472,8 @@ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask, parent = container_of(inode_mark, struct audit_parent, mark); - if (WARN_ON_ONCE(inode_mark->group != audit_watch_group)) + if (WARN_ON_ONCE(inode_mark->group != audit_watch_group) || + WARN_ON_ONCE(!inode)) return 0; if (mask & (FS_CREATE|FS_MOVED_TO) && inode) @@ -492,7 +493,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = { static int __init audit_watch_init(void) { - audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops, 0); + audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops); if (IS_ERR(audit_watch_group)) { audit_watch_group = NULL; audit_panic("cannot create audit fsnotify group"); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5966013bc788..6b14b4c4068c 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -507,7 +507,7 @@ static void *bpf_obj_do_get(const char __user *pathname, return ERR_PTR(ret); inode = d_backing_inode(path.dentry); - ret = path_permission(&path, ACC_MODE(flags)); + ret = inode_permission(inode, ACC_MODE(flags)); if (ret) goto out; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f3b7856eadb6..2b3b1a687d36 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3909,6 +3909,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, pid_t pid = attr->task_fd_query.pid; u32 fd = attr->task_fd_query.fd; const struct perf_event *event; + struct files_struct *files; struct task_struct *task; struct file *file; int err; @@ -3928,11 +3929,23 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (!task) return -ENOENT; - err = 0; - file = fget_task(task, fd); + files = get_files_struct(task); put_task_struct(task); + if (!files) + return -ENOENT; + + err = 0; + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); if (!file) - return -EBADF; + err = -EBADF; + else + get_file(file); + spin_unlock(&files->file_lock); + put_files_struct(files); + + if (err) + goto out; if (file->f_op == &bpf_link_fops) { struct bpf_link *link = file->private_data; @@ -3972,6 +3985,7 @@ out_not_supp: err = -ENOTSUPP; put_file: fput(file); +out: return err; } diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 762b4d7c3779..f3d3a562a802 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -185,7 +185,7 @@ again: for (; curr_fd < max_fds; curr_fd++) { struct file *f; - f = files_lookup_fd_rcu(curr_files, curr_fd); + f = fcheck_files(curr_files, curr_fd); if (!f) continue; if (!get_file_rcu(f)) diff --git a/kernel/fork.c b/kernel/fork.c index 9b2428865267..c47ad81c627c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3138,21 +3138,21 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) * the exec layer of the kernel. */ -int unshare_files(void) +int unshare_files(struct files_struct **displaced) { struct task_struct *task = current; - struct files_struct *old, *copy = NULL; + struct files_struct *copy = NULL; int error; error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©); - if (error || !copy) + if (error || !copy) { + *displaced = NULL; return error; - - old = task->files; + } + *displaced = task->files; task_lock(task); task->files = copy; task_unlock(task); - put_files_struct(old); return 0; } diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 17d3a704bafa..96505113b907 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -200,11 +200,6 @@ unsigned long kallsyms_lookup_name(const char *name) return module_kallsyms_lookup_name(name); } -#ifdef CONFIG_LIVEPATCH -/* - * Iterate over all symbols in vmlinux. For symbols from modules use - * module_kallsyms_on_each_symbol instead. - */ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) @@ -220,9 +215,8 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, if (ret != 0) return ret; } - return 0; + return module_kallsyms_on_each_symbol(fn, data); } -#endif /* CONFIG_LIVEPATCH */ static unsigned long get_symbol_pos(unsigned long addr, unsigned long *symbolsize, diff --git a/kernel/kcmp.c b/kernel/kcmp.c index 5353edfad8e1..c0d2ad9b4705 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -61,11 +61,16 @@ static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) static struct file * get_file_raw_ptr(struct task_struct *task, unsigned int idx) { - struct file *file; + struct file *file = NULL; + task_lock(task); rcu_read_lock(); - file = task_lookup_fd_rcu(task, idx); + + if (task->files) + file = fcheck_files(task->files, idx); + rcu_read_unlock(); + task_unlock(task); return file; } @@ -102,6 +107,7 @@ static int kcmp_epoll_target(struct task_struct *task1, { struct file *filp, *filp_epoll, *filp_tgt; struct kcmp_epoll_slot slot; + struct files_struct *files; if (copy_from_user(&slot, uslot, sizeof(slot))) return -EFAULT; @@ -110,12 +116,23 @@ static int kcmp_epoll_target(struct task_struct *task1, if (!filp) return -EBADF; - filp_epoll = fget_task(task2, slot.efd); - if (!filp_epoll) + files = get_files_struct(task2); + if (!files) return -EBADF; - filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); - fput(filp_epoll); + spin_lock(&files->file_lock); + filp_epoll = fcheck_files(files, slot.efd); + if (filp_epoll) + get_file(filp_epoll); + else + filp_tgt = ERR_PTR(-EBADF); + spin_unlock(&files->file_lock); + put_files_struct(files); + + if (filp_epoll) { + filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); + fput(filp_epoll); + } if (IS_ERR(filp_tgt)) return PTR_ERR(filp_tgt); diff --git a/kernel/kthread.c b/kernel/kthread.c index ec9f61995004..9d736f57b84f 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -262,21 +262,6 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -/** - * kthread_exit - Cause the current kthread return @result to kthread_stop(). - * @result: The integer value to return to kthread_stop(). - * - * While kthread_exit can be called directly, it exists so that - * functions which do some additional work in non-modular code such as - * module_put_and_kthread_exit can be implemented. - * - * Does not return. - */ -void __noreturn kthread_exit(long result) -{ - do_exit(result); -} - static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -294,13 +279,13 @@ static int kthread(void *_create) done = xchg(&create->done, NULL); if (!done) { kfree(create); - kthread_exit(-EINTR); + do_exit(-EINTR); } if (!self) { create->result = ERR_PTR(-ENOMEM); complete(done); - kthread_exit(-ENOMEM); + do_exit(-ENOMEM); } self->threadfn = threadfn; @@ -327,7 +312,7 @@ static int kthread(void *_create) __kthread_parkme(self); ret = threadfn(data); } - kthread_exit(ret); + do_exit(ret); } /* called from do_fork() to get node information for about to be created task */ @@ -637,7 +622,7 @@ EXPORT_SYMBOL_GPL(kthread_park); * instead of calling wake_up_process(): the thread will exit without * calling threadfn(). * - * If threadfn() may call kthread_exit() itself, the caller must ensure + * If threadfn() may call do_exit() itself, the caller must ensure * task_struct can't go away. * * Returns the result of threadfn(), or %-EINTR if wake_up_process() diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 147ed154ebc7..f5faf935c2d8 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include "core.h" #include "patch.h" @@ -58,7 +57,7 @@ static void klp_find_object_module(struct klp_object *obj) if (!klp_is_module(obj)) return; - rcu_read_lock_sched(); + mutex_lock(&module_mutex); /* * We do not want to block removal of patched modules and therefore * we do not take a reference here. The patches are removed by @@ -75,7 +74,7 @@ static void klp_find_object_module(struct klp_object *obj) if (mod && mod->klp_alive) obj->mod = mod; - rcu_read_unlock_sched(); + mutex_unlock(&module_mutex); } static bool klp_initialized(void) @@ -164,10 +163,12 @@ static int klp_find_object_symbol(const char *objname, const char *name, .pos = sympos, }; + mutex_lock(&module_mutex); if (objname) module_kallsyms_on_each_symbol(klp_find_callback, &args); else kallsyms_on_each_symbol(klp_find_callback, &args); + mutex_unlock(&module_mutex); /* * Ensure an address was found. If sympos is 0, ensure symbol is unique; diff --git a/kernel/module.c b/kernel/module.c index 1ea89ae7c2cc..93fade94f108 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -259,6 +259,11 @@ static void mod_update_bounds(struct module *mod) struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ #endif /* CONFIG_KGDB_KDB */ +static void module_assert_mutex(void) +{ + lockdep_assert_held(&module_mutex); +} + static void module_assert_mutex_or_preempt(void) { #ifdef CONFIG_LOCKDEP @@ -338,14 +343,14 @@ static inline void add_taint_module(struct module *mod, unsigned flag, /* * A thread that wants to hold a reference to a module only while it - * is running can call this to safely exit. + * is running can call this to safely exit. nfsd and lockd use this. */ -void __noreturn __module_put_and_kthread_exit(struct module *mod, long code) +void __noreturn __module_put_and_exit(struct module *mod, long code) { module_put(mod); - kthread_exit(code); + do_exit(code); } -EXPORT_SYMBOL(__module_put_and_kthread_exit); +EXPORT_SYMBOL(__module_put_and_exit); /* Find a module section: 0 means not found. */ static unsigned int find_sec(const struct load_info *info, const char *name) @@ -640,6 +645,7 @@ static struct module *find_module_all(const char *name, size_t len, struct module *find_module(const char *name) { + module_assert_mutex(); return find_module_all(name, strlen(name), false); } @@ -4489,7 +4495,6 @@ unsigned long module_kallsyms_lookup_name(const char *name) return ret; } -#ifdef CONFIG_LIVEPATCH int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) @@ -4498,7 +4503,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned int i; int ret; - mutex_lock(&module_mutex); + module_assert_mutex(); + list_for_each_entry(mod, &modules, list) { /* We hold module_mutex: no need for rcu_dereference_sched */ struct mod_kallsyms *kallsyms = mod->kallsyms; @@ -4514,13 +4520,11 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, ret = fn(data, kallsyms_symbol_name(kallsyms, i), mod, kallsyms_symbol_value(sym)); if (ret != 0) - break; + return ret; } } - mutex_unlock(&module_mutex); - return ret; + return 0; } -#endif /* CONFIG_LIVEPATCH */ #endif /* CONFIG_KALLSYMS */ static void cfi_init(struct module *mod) diff --git a/kernel/pid.c b/kernel/pid.c index 15bbb9ddb2bf..48babb1dd3e1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -551,21 +551,13 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) * Note, that this function can only be called after the fd table has * been unshared to avoid leaking the pidfd to the new process. * - * This symbol should not be explicitly exported to loadable modules. - * * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ -int pidfd_create(struct pid *pid, unsigned int flags) +static int pidfd_create(struct pid *pid, unsigned int flags) { int fd; - if (!pid || !pid_has_task(pid, PIDTYPE_TGID)) - return -EINVAL; - - if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC)) - return -EINVAL; - fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), flags | O_RDWR | O_CLOEXEC); if (fd < 0) @@ -605,7 +597,10 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) if (!p) return -ESRCH; - fd = pidfd_create(p, flags); + if (pid_has_task(p, PIDTYPE_TGID)) + fd = pidfd_create(p, flags); + else + fd = -EINVAL; put_pid(p); return fd; diff --git a/kernel/sys.c b/kernel/sys.c index f268f24a87ec..1de01fab5788 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1883,7 +1883,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path)) goto exit; - err = file_permission(exe.file, MAY_EXEC); + err = inode_permission(inode, MAY_EXEC); if (err) goto exit; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index abd37b81e9d8..4deacde2e3ee 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -145,9 +145,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); #ifdef CONFIG_INOTIFY_USER #include #endif -#ifdef CONFIG_FANOTIFY -#include -#endif #ifdef CONFIG_PROC_SYSCTL @@ -549,21 +546,6 @@ static void proc_put_char(void **buf, size_t *size, char c) } } -static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp, - int *valp, - int write, void *data) -{ - if (write) { - *(bool *)valp = *lvalp; - } else { - int val = *(bool *)valp; - - *lvalp = (unsigned long)val; - *negp = false; - } - return 0; -} - static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) @@ -826,26 +808,6 @@ static int do_proc_douintvec(struct ctl_table *table, int write, buffer, lenp, ppos, conv, data); } -/** - * proc_dobool - read/write a bool - * @table: the sysctl table - * @write: %TRUE if this is a write to the sysctl file - * @buffer: the user buffer - * @lenp: the size of the user buffer - * @ppos: file position - * - * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * - * Returns 0 on success. - */ -int proc_dobool(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) -{ - return do_proc_dointvec(table, write, buffer, lenp, ppos, - do_proc_dobool_conv, NULL); -} - /** * proc_dointvec - read a vector of integers * @table: the sysctl table @@ -1682,12 +1644,6 @@ int proc_dostring(struct ctl_table *table, int write, return -ENOSYS; } -int proc_dobool(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - int proc_dointvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -3394,14 +3350,7 @@ static struct ctl_table fs_table[] = { .mode = 0555, .child = inotify_table, }, -#endif -#ifdef CONFIG_FANOTIFY - { - .procname = "fanotify", - .mode = 0555, - .child = fanotify_table, - }, -#endif +#endif #ifdef CONFIG_EPOLL { .procname = "epoll", @@ -3564,7 +3513,6 @@ int __init sysctl_init(void) * No sense putting this after each symbol definition, twice, * exception granted :-) */ -EXPORT_SYMBOL(proc_dobool); EXPORT_SYMBOL(proc_dointvec); EXPORT_SYMBOL(proc_douintvec); EXPORT_SYMBOL(proc_dointvec_jiffies); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5453af26ff76..718357289899 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -124,9 +124,9 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk) if (!p) return true; *p = '\0'; - rcu_read_lock_sched(); + mutex_lock(&module_mutex); ret = !!find_module(tk->symbol); - rcu_read_unlock_sched(); + mutex_unlock(&module_mutex); *p = ':'; return ret; diff --git a/kernel/ucount.c b/kernel/ucount.c index 8d8874f1c35e..11b1596e2542 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -73,10 +73,6 @@ static struct ctl_table user_table[] = { #ifdef CONFIG_INOTIFY_USER UCOUNT_ENTRY("max_inotify_instances"), UCOUNT_ENTRY("max_inotify_watches"), -#endif -#ifdef CONFIG_FANOTIFY - UCOUNT_ENTRY("max_fanotify_groups"), - UCOUNT_ENTRY("max_fanotify_marks"), #endif { } }; diff --git a/mm/madvise.c b/mm/madvise.c index ddecb4434ccf..410f366b5df4 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -568,7 +568,7 @@ static inline bool can_do_file_pageout(struct vm_area_struct *vma) * opens a side channel. */ return inode_owner_or_capable(file_inode(vma->vm_file)) || - file_permission(vma->vm_file, MAY_WRITE) == 0; + inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; } static long madvise_pageout(struct vm_area_struct *vma, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 67b098ec9453..c4077f277ffc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4954,7 +4954,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, /* the process need read permission on control file */ /* AV: shouldn't we check that it's been opened for read instead? */ - ret = file_permission(cfile.file, MAY_READ); + ret = inode_permission(file_inode(cfile.file), MAY_READ); if (ret < 0) goto out_put_cfile; diff --git a/mm/mincore.c b/mm/mincore.c index 7bdb4673f776..02db1a834021 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -167,7 +167,7 @@ static inline bool can_do_mincore(struct vm_area_struct *vma) * mappings, which opens a side channel. */ return inode_owner_or_capable(file_inode(vma->vm_file)) || - file_permission(vma->vm_file, MAY_WRITE) == 0; + inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; } static const struct mm_walk_ops mincore_walk_ops = { diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 09b6d825124e..43c284158f63 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -535,7 +535,7 @@ static int bnep_session(void *arg) up_write(&bnep_session_sem); free_netdev(dev); - module_put_and_kthread_exit(0); + module_put_and_exit(0); return 0; } diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 90d130588a3e..83eb84e8e688 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -323,7 +323,7 @@ static int cmtp_session(void *arg) up_write(&cmtp_session_sem); kfree(session); - module_put_and_kthread_exit(0); + module_put_and_exit(0); return 0; } diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 3ff870599eb7..b946a6379433 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg) l2cap_unregister_user(session->conn, &session->user); hidp_session_put(session); - module_put_and_kthread_exit(0); + module_put_and_exit(0); return 0; } diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index a857fc99431c..e265b8d38aa1 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -800,7 +800,7 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, scratch = alloc_page(GFP_KERNEL); if (!scratch) return -ENOMEM; - xdr_set_scratch_page(xdr, scratch); + xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE); /* res->status */ err = gssx_dec_status(xdr, &res->status); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 329eac782cc5..784c8b24f164 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -707,11 +707,11 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) /* * Verify the checksum on the header and return SVC_OK on success. * Otherwise, return SVC_DROP (in the case of a bad sequence number) - * or return SVC_DENIED and indicate error in rqstp->rq_auth_stat. + * or return SVC_DENIED and indicate error in authp. */ static int gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, - __be32 *rpcstart, struct rpc_gss_wire_cred *gc) + __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp) { struct gss_ctx *ctx_id = rsci->mechctx; struct xdr_buf rpchdr; @@ -725,7 +725,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart; xdr_buf_from_iov(&iov, &rpchdr); - rqstp->rq_auth_stat = rpc_autherr_badverf; + *authp = rpc_autherr_badverf; if (argv->iov_len < 4) return SVC_DENIED; flavor = svc_getnl(argv); @@ -737,13 +737,13 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, if (rqstp->rq_deferred) /* skip verification of revisited request */ return SVC_OK; if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) { - rqstp->rq_auth_stat = rpcsec_gsserr_credproblem; + *authp = rpcsec_gsserr_credproblem; return SVC_DENIED; } if (gc->gc_seq > MAXSEQ) { trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq); - rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem; + *authp = rpcsec_gsserr_ctxproblem; return SVC_DENIED; } if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq)) @@ -1038,8 +1038,6 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &svcdata->clcred; int stat; - rqstp->rq_auth_stat = rpc_autherr_badcred; - /* * A gss export can be specified either by: * export *(sec=krb5,rw) @@ -1055,8 +1053,6 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) stat = svcauth_unix_set_client(rqstp); if (stat == SVC_DROP || stat == SVC_CLOSE) return stat; - - rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } @@ -1140,7 +1136,7 @@ static void gss_free_in_token_pages(struct gssp_in_token *in_token) } static int gss_read_proxy_verf(struct svc_rqst *rqstp, - struct rpc_gss_wire_cred *gc, + struct rpc_gss_wire_cred *gc, __be32 *authp, struct xdr_netobj *in_handle, struct gssp_in_token *in_token) { @@ -1149,7 +1145,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp, int pages, i, res, pgto, pgfrom; size_t inlen, to_offs, from_offs; - res = gss_read_common_verf(gc, argv, &rqstp->rq_auth_stat, in_handle); + res = gss_read_common_verf(gc, argv, authp, in_handle); if (res) return res; @@ -1230,7 +1226,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit, * Otherwise, drop the request pending an answer to the upcall. */ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp, - struct rpc_gss_wire_cred *gc) + struct rpc_gss_wire_cred *gc, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -1239,7 +1235,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp, struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); memset(&rsikey, 0, sizeof(rsikey)); - ret = gss_read_verf(gc, argv, &rqstp->rq_auth_stat, + ret = gss_read_verf(gc, argv, authp, &rsikey.in_handle, &rsikey.in_token); if (ret) return ret; @@ -1342,7 +1338,7 @@ out: } static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, - struct rpc_gss_wire_cred *gc) + struct rpc_gss_wire_cred *gc, __be32 *authp) { struct kvec *resv = &rqstp->rq_res.head[0]; struct xdr_netobj cli_handle; @@ -1354,7 +1350,8 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); memset(&ud, 0, sizeof(ud)); - ret = gss_read_proxy_verf(rqstp, gc, &ud.in_handle, &ud.in_token); + ret = gss_read_proxy_verf(rqstp, gc, authp, + &ud.in_handle, &ud.in_token); if (ret) return ret; @@ -1527,7 +1524,7 @@ static void destroy_use_gss_proxy_proc_entry(struct net *net) {} * response here and return SVC_COMPLETE. */ static int -svcauth_gss_accept(struct svc_rqst *rqstp) +svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -1540,7 +1537,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp) int ret; struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); - rqstp->rq_auth_stat = rpc_autherr_badcred; + *authp = rpc_autherr_badcred; if (!svcdata) svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); if (!svcdata) @@ -1577,22 +1574,22 @@ svcauth_gss_accept(struct svc_rqst *rqstp) if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0)) goto auth_err; - rqstp->rq_auth_stat = rpc_autherr_badverf; + *authp = rpc_autherr_badverf; switch (gc->gc_proc) { case RPC_GSS_PROC_INIT: case RPC_GSS_PROC_CONTINUE_INIT: if (use_gss_proxy(SVC_NET(rqstp))) - return svcauth_gss_proxy_init(rqstp, gc); + return svcauth_gss_proxy_init(rqstp, gc, authp); else - return svcauth_gss_legacy_init(rqstp, gc); + return svcauth_gss_legacy_init(rqstp, gc, authp); case RPC_GSS_PROC_DATA: case RPC_GSS_PROC_DESTROY: /* Look up the context, and check the verifier: */ - rqstp->rq_auth_stat = rpcsec_gsserr_credproblem; + *authp = rpcsec_gsserr_credproblem; rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx); if (!rsci) goto auth_err; - switch (gss_verify_header(rqstp, rsci, rpcstart, gc)) { + switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) { case SVC_OK: break; case SVC_DENIED: @@ -1602,7 +1599,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp) } break; default: - rqstp->rq_auth_stat = rpc_autherr_rejectedcred; + *authp = rpc_autherr_rejectedcred; goto auth_err; } @@ -1618,13 +1615,13 @@ svcauth_gss_accept(struct svc_rqst *rqstp) svc_putnl(resv, RPC_SUCCESS); goto complete; case RPC_GSS_PROC_DATA: - rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem; + *authp = rpcsec_gsserr_ctxproblem; svcdata->verf_start = resv->iov_base + resv->iov_len; if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; rqstp->rq_cred = rsci->cred; get_group_info(rsci->cred.cr_group_info); - rqstp->rq_auth_stat = rpc_autherr_badcred; + *authp = rpc_autherr_badcred; switch (gc->gc_svc) { case RPC_GSS_SVC_NONE: break; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index a4c9d410eb8d..a00890962e11 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -821,7 +821,6 @@ void rpc_exit_task(struct rpc_task *task) else if (task->tk_client) rpc_count_iostats(task, task->tk_client->cl_metrics); if (task->tk_ops->rpc_call_done != NULL) { - trace_rpc_task_call_done(task, task->tk_ops->rpc_call_done); task->tk_ops->rpc_call_done(task, task->tk_calldata); if (task->tk_action != NULL) { /* Always release the RPC slot and buffer memory */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 26d972c54a59..cfe8b911ca01 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -35,37 +35,18 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); -#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL +#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) -/* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; +#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL /* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ - -struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -}; - -static struct svc_pool_map svc_pool_map = { +struct svc_pool_map svc_pool_map = { .mode = SVC_POOL_DEFAULT }; +EXPORT_SYMBOL_GPL(svc_pool_map); static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ @@ -236,12 +217,10 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) /* * Add a reference to the global map of cpus to pools (and - * vice versa) if pools are in use. - * Initialise the map if we're the first user. - * Returns the number of pools. If this is '1', no reference - * was taken. + * vice versa). Initialise the map if we're the first user. + * Returns the number of pools. */ -static unsigned int +unsigned int svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; @@ -251,7 +230,6 @@ svc_pool_map_get(void) if (m->count++) { mutex_unlock(&svc_pool_map_mutex); - WARN_ON_ONCE(m->npools <= 1); return m->npools; } @@ -267,36 +245,30 @@ svc_pool_map_get(void) break; } - if (npools <= 0) { + if (npools < 0) { /* default, or memory allocation failure */ npools = 1; m->mode = SVC_POOL_GLOBAL; } m->npools = npools; - if (npools == 1) - /* service is unpooled, so doesn't hold a reference */ - m->count--; - mutex_unlock(&svc_pool_map_mutex); - return npools; + return m->npools; } +EXPORT_SYMBOL_GPL(svc_pool_map_get); /* - * Drop a reference to the global map of cpus to pools, if - * pools were in use, i.e. if npools > 1. + * Drop a reference to the global map of cpus to pools. * When the last reference is dropped, the map data is * freed; this allows the sysadmin to change the pool * mode using the pool_mode module option without * rebooting or re-loading sunrpc.ko. */ -static void -svc_pool_map_put(int npools) +void +svc_pool_map_put(void) { struct svc_pool_map *m = &svc_pool_map; - if (npools <= 1) - return; mutex_lock(&svc_pool_map_mutex); if (!--m->count) { @@ -309,6 +281,7 @@ svc_pool_map_put(int npools) mutex_unlock(&svc_pool_map_mutex); } +EXPORT_SYMBOL_GPL(svc_pool_map_put); static int svc_pool_map_get_node(unsigned int pidx) { @@ -365,18 +338,21 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) struct svc_pool_map *m = &svc_pool_map; unsigned int pidx = 0; - if (serv->sv_nrpools <= 1) - return serv->sv_pools; - - switch (m->mode) { - case SVC_POOL_PERCPU: - pidx = m->to_pool[cpu]; - break; - case SVC_POOL_PERNODE: - pidx = m->to_pool[cpu_to_node(cpu)]; - break; + /* + * An uninitialised map happens in a pure client when + * lockd is brought up, so silently treat it the + * same as SVC_POOL_GLOBAL. + */ + if (svc_serv_is_pooled(serv)) { + switch (m->mode) { + case SVC_POOL_PERCPU: + pidx = m->to_pool[cpu]; + break; + case SVC_POOL_PERNODE: + pidx = m->to_pool[cpu_to_node(cpu)]; + break; + } } - return &serv->sv_pools[pidx % serv->sv_nrpools]; } @@ -446,7 +422,7 @@ __svc_init_bc(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - int (*threadfn)(void *data)) + const struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int vers; @@ -457,13 +433,13 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return NULL; serv->sv_name = prog->pg_name; serv->sv_program = prog; - kref_init(&serv->sv_refcnt); + serv->sv_nrthreads = 1; serv->sv_stats = prog->pg_stats; if (bufsize > RPCSVC_MAXPAYLOAD) bufsize = RPCSVC_MAXPAYLOAD; serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); - serv->sv_threadfn = threadfn; + serv->sv_ops = ops; xdrsize = 0; while (prog) { prog->pg_lovers = prog->pg_nvers-1; @@ -509,56 +485,59 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, return serv; } -/** - * svc_create - Create an RPC service - * @prog: the RPC program the new service will handle - * @bufsize: maximum message size for @prog - * @threadfn: a function to service RPC requests for @prog - * - * Returns an instantiated struct svc_serv object or NULL. - */ -struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, - int (*threadfn)(void *data)) +struct svc_serv * +svc_create(struct svc_program *prog, unsigned int bufsize, + const struct svc_serv_ops *ops) { - return __svc_create(prog, bufsize, 1, threadfn); + return __svc_create(prog, bufsize, /*npools*/1, ops); } EXPORT_SYMBOL_GPL(svc_create); -/** - * svc_create_pooled - Create an RPC service with pooled threads - * @prog: the RPC program the new service will handle - * @bufsize: maximum message size for @prog - * @threadfn: a function to service RPC requests for @prog - * - * Returns an instantiated struct svc_serv object or NULL. - */ -struct svc_serv *svc_create_pooled(struct svc_program *prog, - unsigned int bufsize, - int (*threadfn)(void *data)) +struct svc_serv * +svc_create_pooled(struct svc_program *prog, unsigned int bufsize, + const struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, threadfn); + serv = __svc_create(prog, bufsize, npools, ops); if (!serv) goto out_err; return serv; out_err: - svc_pool_map_put(npools); + svc_pool_map_put(); return NULL; } EXPORT_SYMBOL_GPL(svc_create_pooled); +void svc_shutdown_net(struct svc_serv *serv, struct net *net) +{ + svc_close_net(serv, net); + + if (serv->sv_ops->svo_shutdown) + serv->sv_ops->svo_shutdown(serv, net); +} +EXPORT_SYMBOL_GPL(svc_shutdown_net); + /* * Destroy an RPC service. Should be called with appropriate locking to - * protect sv_permsocks and sv_tempsocks. + * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. */ void -svc_destroy(struct kref *ref) +svc_destroy(struct svc_serv *serv) { - struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt); + dprintk("svc: svc_destroy(%s, %d)\n", + serv->sv_program->pg_name, + serv->sv_nrthreads); + + if (serv->sv_nrthreads) { + if (--(serv->sv_nrthreads) != 0) { + svc_sock_update_bufs(serv); + return; + } + } else + printk("svc_destroy: no threads for serv=%p!\n", serv); - dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name); del_timer_sync(&serv->sv_temptimer); /* @@ -570,7 +549,8 @@ svc_destroy(struct kref *ref) cache_clean_deferred(serv); - svc_pool_map_put(serv->sv_nrpools); + if (svc_serv_is_pooled(serv)) + svc_pool_map_put(); kfree(serv->sv_pools); kfree(serv); @@ -634,10 +614,6 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) rqstp->rq_server = serv; rqstp->rq_pool = pool; - rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0); - if (!rqstp->rq_scratch_page) - goto out_enomem; - rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_argp) goto out_enomem; @@ -656,7 +632,7 @@ out_enomem: } EXPORT_SYMBOL_GPL(svc_rqst_alloc); -static struct svc_rqst * +struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; @@ -665,17 +641,14 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return ERR_PTR(-ENOMEM); - svc_get(serv); - spin_lock_bh(&serv->sv_lock); - serv->sv_nrthreads += 1; - spin_unlock_bh(&serv->sv_lock); - + serv->sv_nrthreads++; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads++; list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); spin_unlock_bh(&pool->sp_lock); return rqstp; } +EXPORT_SYMBOL_GPL(svc_prepare_thread); /* * Choose a pool in which to create a new thread, for svc_set_num_threads @@ -749,9 +722,11 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) if (IS_ERR(rqstp)) return PTR_ERR(rqstp); - task = kthread_create_on_node(serv->sv_threadfn, rqstp, + __module_get(serv->sv_ops->svo_module); + task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { + module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); return PTR_ERR(task); } @@ -767,13 +742,59 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } + +/* destroy old threads */ +static int +svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + struct task_struct *task; + unsigned int state = serv->sv_nrthreads-1; + + /* destroy old threads */ + do { + task = choose_victim(serv, pool, &state); + if (task == NULL) + break; + send_sig(SIGINT, task, 1); + nrservs++; + } while (nrservs < 0); + + return 0; +} + /* * Create or destroy enough new threads to make the number * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Caller must ensure that mutual exclusion between this and * server startup or shutdown. + * + * Destroying threads relies on the service threads filling in + * rqstp->rq_task, which only the nfs ones do. Assumes the serv + * has been created using svc_create_pooled(). + * + * Based on code that used to be in nfsd_svc() but tweaked + * to be pool-aware. */ +int +svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +{ + if (pool == NULL) { + /* The -1 assumes caller has done a svc_get() */ + nrservs -= (serv->sv_nrthreads-1); + } else { + spin_lock_bh(&pool->sp_lock); + nrservs -= pool->sp_nrthreads; + spin_unlock_bh(&pool->sp_lock); + } + + if (nrservs > 0) + return svc_start_kthreads(serv, pool, nrservs); + if (nrservs < 0) + return svc_signal_kthreads(serv, pool, nrservs); + return 0; +} +EXPORT_SYMBOL_GPL(svc_set_num_threads); /* destroy old threads */ static int @@ -798,10 +819,11 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) } int -svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) +svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { if (pool == NULL) { - nrservs -= serv->sv_nrthreads; + /* The -1 assumes caller has done a svc_get() */ + nrservs -= (serv->sv_nrthreads-1); } else { spin_lock_bh(&pool->sp_lock); nrservs -= pool->sp_nrthreads; @@ -814,28 +836,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return svc_stop_kthreads(serv, pool, nrservs); return 0; } -EXPORT_SYMBOL_GPL(svc_set_num_threads); - -/** - * svc_rqst_replace_page - Replace one page in rq_pages[] - * @rqstp: svc_rqst with pages to replace - * @page: replacement page - * - * When replacing a page in rq_pages, batch the release of the - * replaced pages to avoid hammering the page allocator. - */ -void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) -{ - if (*rqstp->rq_next_page) { - if (!pagevec_space(&rqstp->rq_pvec)) - __pagevec_release(&rqstp->rq_pvec); - pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page); - } - - get_page(page); - *(rqstp->rq_next_page++) = page; -} -EXPORT_SYMBOL_GPL(svc_rqst_replace_page); +EXPORT_SYMBOL_GPL(svc_set_num_threads_sync); /* * Called from a server thread as it's exiting. Caller must hold the "service @@ -845,7 +846,6 @@ void svc_rqst_free(struct svc_rqst *rqstp) { svc_release_buffer(rqstp); - put_page(rqstp->rq_scratch_page); kfree(rqstp->rq_resp); kfree(rqstp->rq_argp); kfree(rqstp->rq_auth_data); @@ -865,14 +865,11 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); - spin_lock_bh(&serv->sv_lock); - serv->sv_nrthreads -= 1; - spin_unlock_bh(&serv->sv_lock); - svc_sock_update_bufs(serv); - svc_rqst_free(rqstp); - svc_put(serv); + /* Release the server */ + if (serv) + svc_destroy(serv); } EXPORT_SYMBOL_GPL(svc_exit_thread); @@ -1164,6 +1161,22 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif +__be32 +svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err) +{ + set_bit(RQ_AUTHERR, &rqstp->rq_flags); + return auth_err; +} +EXPORT_SYMBOL_GPL(svc_return_autherr); + +static __be32 +svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp) +{ + if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags)) + return *statp; + return rpc_auth_ok; +} + static int svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) { @@ -1187,7 +1200,7 @@ svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) test_bit(RQ_DROPME, &rqstp->rq_flags)) return 0; - if (rqstp->rq_auth_stat != rpc_auth_ok) + if (test_bit(RQ_AUTHERR, &rqstp->rq_flags)) return 1; if (*statp != rpc_success) @@ -1237,7 +1250,7 @@ svc_generic_init_request(struct svc_rqst *rqstp, rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc]; /* Initialize storage for argp and resp */ - memset(rqstp->rq_argp, 0, procp->pc_argzero); + memset(rqstp->rq_argp, 0, procp->pc_argsize); memset(rqstp->rq_resp, 0, procp->pc_ressize); /* Bump per-procedure stats counter */ @@ -1266,7 +1279,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) struct svc_process_info process; __be32 *statp; u32 prog, vers; - __be32 rpc_stat; + __be32 auth_stat, rpc_stat; int auth_res; __be32 *reply_statp; @@ -1309,12 +1322,14 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) * We do this before anything else in order to get a decent * auth verifier. */ - auth_res = svc_authenticate(rqstp); + auth_res = svc_authenticate(rqstp, &auth_stat); /* Also give the program a chance to reject this call: */ - if (auth_res == SVC_OK && progp) + if (auth_res == SVC_OK && progp) { + auth_stat = rpc_autherr_badcred; auth_res = progp->pg_authenticate(rqstp); + } if (auth_res != SVC_OK) - trace_svc_authenticate(rqstp, auth_res); + trace_svc_authenticate(rqstp, auth_res, auth_stat); switch (auth_res) { case SVC_OK: break; @@ -1373,15 +1388,15 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto release_dropit; if (*statp == rpc_garbage_args) goto err_garbage; + auth_stat = svc_get_autherr(rqstp, statp); + if (auth_stat != rpc_auth_ok) + goto err_release_bad_auth; } else { dprintk("svc: calling dispatcher\n"); if (!process.dispatch(rqstp, statp)) goto release_dropit; /* Release reply info */ } - if (rqstp->rq_auth_stat != rpc_auth_ok) - goto err_release_bad_auth; - /* Check RPC status result */ if (*statp != rpc_success) resv->iov_len = ((void*)statp) - resv->iov_base + 4; @@ -1410,7 +1425,7 @@ release_dropit: svc_authorise(rqstp); close_xprt: if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) - svc_xprt_close(rqstp->rq_xprt); + svc_close_xprt(rqstp->rq_xprt); dprintk("svc: svc_process close\n"); return 0; @@ -1431,14 +1446,13 @@ err_release_bad_auth: if (procp->pc_release) procp->pc_release(rqstp); err_bad_auth: - dprintk("svc: authentication failed (%d)\n", - be32_to_cpu(rqstp->rq_auth_stat)); + dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); serv->sv_stats->rpcbadauth++; /* Restore write pointer to location of accept status: */ xdr_ressize_check(rqstp, reply_statp); svc_putnl(resv, 1); /* REJECT */ svc_putnl(resv, 1); /* AUTH_ERROR */ - svc_putu32(resv, rqstp->rq_auth_stat); /* status */ + svc_putnl(resv, ntohl(auth_stat)); /* status */ goto sendit; err_bad_prog: @@ -1612,7 +1626,7 @@ u32 svc_max_payload(const struct svc_rqst *rqstp) EXPORT_SYMBOL_GPL(svc_max_payload); /** - * svc_encode_result_payload - mark a range of bytes as a result payload + * svc_encode_read_payload - mark a range of bytes as a READ payload * @rqstp: svc_rqst to operate on * @offset: payload's byte offset in rqstp->rq_res * @length: size of payload, in bytes @@ -1620,28 +1634,26 @@ EXPORT_SYMBOL_GPL(svc_max_payload); * Returns zero on success, or a negative errno if a permanent * error occurred. */ -int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length) +int svc_encode_read_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length) { - return rqstp->rq_xprt->xpt_ops->xpo_result_payload(rqstp, offset, - length); + return rqstp->rq_xprt->xpt_ops->xpo_read_payload(rqstp, offset, length); } -EXPORT_SYMBOL_GPL(svc_encode_result_payload); +EXPORT_SYMBOL_GPL(svc_encode_read_payload); /** * svc_fill_write_vector - Construct data argument for VFS write call * @rqstp: svc_rqst to operate on - * @payload: xdr_buf containing only the write data payload + * @pages: list of pages containing data payload + * @first: buffer containing first section of write payload + * @total: total number of bytes of write payload * * Fills in rqstp::rq_vec, and returns the number of elements. */ -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct xdr_buf *payload) +unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages, + struct kvec *first, size_t total) { - struct page **pages = payload->pages; - struct kvec *first = payload->head; struct kvec *vec = rqstp->rq_vec; - size_t total = payload->len; unsigned int i; /* Some types of transport can present the write payload diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d1eacf3358b8..06e503466c32 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -233,35 +233,30 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return xprt; } -/** - * svc_xprt_received - start next receiver thread - * @xprt: controlling transport - * - * The caller must hold the XPT_BUSY bit and must +/* + * svc_xprt_received conditionally queues the transport for processing + * by another thread. The caller must hold the XPT_BUSY bit and must * not thereafter touch transport data. * * Note: XPT_DATA only gets cleared when a read-attempt finds no (or * insufficient) data. */ -void svc_xprt_received(struct svc_xprt *xprt) +static void svc_xprt_received(struct svc_xprt *xprt) { if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) { WARN_ONCE(1, "xprt=0x%p already busy!", xprt); return; } - trace_svc_xprt_received(xprt); - /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_enqueue with: + * 'put', so we need a reference to call svc_enqueue_xprt with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); svc_xprt_put(xprt); } -EXPORT_SYMBOL_GPL(svc_xprt_received); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) { @@ -272,7 +267,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) svc_xprt_received(new); } -static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, +static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) @@ -308,35 +303,21 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, return -EPROTONOSUPPORT; } -/** - * svc_xprt_create - Add a new listener to @serv - * @serv: target RPC service - * @xprt_name: transport class name - * @net: network namespace - * @family: network address family - * @port: listener port - * @flags: SVC_SOCK flags - * @cred: credential to bind to this transport - * - * Return values: - * %0: New listener added successfully - * %-EPROTONOSUPPORT: Requested transport type not supported - */ -int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, +int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) { int err; - err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); + err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); - err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred); + err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); } return err; } -EXPORT_SYMBOL_GPL(svc_xprt_create); +EXPORT_SYMBOL_GPL(svc_create_xprt); /* * Copy the local and remote xprt addresses to the rqstp structure @@ -412,8 +393,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) smp_rmb(); xpt_flags = READ_ONCE(xprt->xpt_flags); - if (xpt_flags & BIT(XPT_BUSY)) - return false; if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { @@ -426,12 +405,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) return false; } -/** - * svc_xprt_enqueue - Queue a transport on an idle nfsd thread - * @xprt: transport with data pending - * - */ -void svc_xprt_enqueue(struct svc_xprt *xprt) +void svc_xprt_do_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp = NULL; @@ -475,6 +449,19 @@ out_unlock: put_cpu(); trace_svc_xprt_do_enqueue(xprt, rqstp); } +EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); + +/* + * Queue up a transport with data pending. If there are idle nfsd + * processes, wake 'em up. + * + */ +void svc_xprt_enqueue(struct svc_xprt *xprt) +{ + if (test_bit(XPT_BUSY, &xprt->xpt_flags)) + return; + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); +} EXPORT_SYMBOL_GPL(svc_xprt_enqueue); /* @@ -533,7 +520,6 @@ static void svc_xprt_release(struct svc_rqst *rqstp) kfree(rqstp->rq_deferred); rqstp->rq_deferred = NULL; - pagevec_release(&rqstp->rq_pvec); svc_free_res_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; @@ -660,8 +646,6 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) int pages; int i; - pagevec_init(&rqstp->rq_pvec); - /* now allocate needed pages. If we get a failure, sleep briefly */ pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT; if (pages > RPCSVC_MAXPAGES) { @@ -674,13 +658,13 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); if (!p) { - set_current_state(TASK_IDLE); - if (kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (signalled() || kthread_should_stop()) { set_current_state(TASK_RUNNING); return -EINTR; } + schedule_timeout(msecs_to_jiffies(500)); } - freezable_schedule_timeout(msecs_to_jiffies(500)); rqstp->rq_pages[i] = p; } rqstp->rq_page_end = &rqstp->rq_pages[i]; @@ -713,7 +697,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) return false; /* are we shutting down? */ - if (kthread_should_stop()) + if (signalled() || kthread_should_stop()) return false; /* are we freezing? */ @@ -735,14 +719,18 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (rqstp->rq_xprt) goto out_found; - set_current_state(TASK_IDLE); + /* + * We have to be able to interrupt this wait + * to bring down the daemons ... + */ + set_current_state(TASK_INTERRUPTIBLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); smp_mb__after_atomic(); if (likely(rqst_should_sleep(rqstp))) - time_left = freezable_schedule_timeout(timeout); + time_left = schedule_timeout(timeout); else __set_current_state(TASK_RUNNING); @@ -757,7 +745,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (!time_left) atomic_long_inc(&pool->sp_stats.threads_timedout); - if (kthread_should_stop()) + if (signalled() || kthread_should_stop()) return ERR_PTR(-EINTR); return ERR_PTR(-EAGAIN); out_found: @@ -856,7 +844,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) try_to_freeze(); cond_resched(); err = -EINTR; - if (kthread_should_stop()) + if (signalled() || kthread_should_stop()) goto out; xprt = svc_get_next_xprt(rqstp, timeout); @@ -1052,12 +1040,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt) svc_xprt_put(xprt); } -/** - * svc_xprt_close - Close a client connection - * @xprt: transport to disconnect - * - */ -void svc_xprt_close(struct svc_xprt *xprt) +void svc_close_xprt(struct svc_xprt *xprt) { trace_svc_xprt_close(xprt); set_bit(XPT_CLOSE, &xprt->xpt_flags); @@ -1072,7 +1055,7 @@ void svc_xprt_close(struct svc_xprt *xprt) */ svc_delete_xprt(xprt); } -EXPORT_SYMBOL_GPL(svc_xprt_close); +EXPORT_SYMBOL_GPL(svc_close_xprt); static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { @@ -1124,11 +1107,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) } } -/** - * svc_xprt_destroy_all - Destroy transports associated with @serv - * @serv: RPC service to be shut down - * @net: target network namespace - * +/* * Server threads may still be running (especially in the case where the * service is still running in other network namespaces). * @@ -1140,7 +1119,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) * threads, we may need to wait a little while and then check again to * see if they're done. */ -void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) +void svc_close_net(struct svc_serv *serv, struct net *net) { int delay = 0; @@ -1151,7 +1130,6 @@ void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) msleep(delay++); } } -EXPORT_SYMBOL_GPL(svc_xprt_destroy_all); /* * Handle defer and revisit of requests diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 5a8b8e03fdd4..998b196b6176 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -59,12 +59,12 @@ svc_put_auth_ops(struct auth_ops *aops) } int -svc_authenticate(struct svc_rqst *rqstp) +svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) { rpc_authflavor_t flavor; struct auth_ops *aops; - rqstp->rq_auth_stat = rpc_auth_ok; + *authp = rpc_auth_ok; flavor = svc_getnl(&rqstp->rq_arg.head[0]); @@ -72,7 +72,7 @@ svc_authenticate(struct svc_rqst *rqstp) aops = svc_get_auth_ops(flavor); if (aops == NULL) { - rqstp->rq_auth_stat = rpc_autherr_badcred; + *authp = rpc_autherr_badcred; return SVC_DENIED; } @@ -80,7 +80,7 @@ svc_authenticate(struct svc_rqst *rqstp) init_svc_cred(&rqstp->rq_cred); rqstp->rq_authop = aops; - return aops->accept(rqstp); + return aops->accept(rqstp, authp); } EXPORT_SYMBOL_GPL(svc_authenticate); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 1868596259af..60754a292589 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -699,9 +699,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) rqstp->rq_client = NULL; if (rqstp->rq_proc == 0) - goto out; + return SVC_OK; - rqstp->rq_auth_stat = rpc_autherr_badcred; ipm = ip_map_cached_get(xprt); if (ipm == NULL) ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class, @@ -738,16 +737,13 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) put_group_info(cred->cr_group_info); cred->cr_group_info = gi; } - -out: - rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } EXPORT_SYMBOL_GPL(svcauth_unix_set_client); static int -svcauth_null_accept(struct svc_rqst *rqstp) +svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -758,12 +754,12 @@ svcauth_null_accept(struct svc_rqst *rqstp) if (svc_getu32(argv) != 0) { dprintk("svc: bad null cred\n"); - rqstp->rq_auth_stat = rpc_autherr_badcred; + *authp = rpc_autherr_badcred; return SVC_DENIED; } if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { dprintk("svc: bad null verf\n"); - rqstp->rq_auth_stat = rpc_autherr_badverf; + *authp = rpc_autherr_badverf; return SVC_DENIED; } @@ -807,7 +803,7 @@ struct auth_ops svcauth_null = { static int -svcauth_unix_accept(struct svc_rqst *rqstp) +svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -849,7 +845,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp) } groups_sort(cred->cr_group_info); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { - rqstp->rq_auth_stat = rpc_autherr_badverf; + *authp = rpc_autherr_badverf; return SVC_DENIED; } @@ -861,7 +857,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp) return SVC_OK; badcred: - rqstp->rq_auth_stat = rpc_autherr_badcred; + *authp = rpc_autherr_badcred; return SVC_DENIED; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cb0cfcd8a814..3d5ee042c501 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -181,8 +181,8 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) } } -static int svc_sock_result_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length) +static int svc_sock_read_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length) { return 0; } @@ -635,7 +635,7 @@ static const struct svc_xprt_ops svc_udp_ops = { .xpo_create = svc_udp_create, .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, - .xpo_result_payload = svc_sock_result_payload, + .xpo_read_payload = svc_sock_read_payload, .xpo_release_rqst = svc_udp_release_rqst, .xpo_detach = svc_sock_detach, .xpo_free = svc_sock_free, @@ -1209,7 +1209,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, - .xpo_result_payload = svc_sock_result_payload, + .xpo_read_payload = svc_sock_read_payload, .xpo_release_rqst = svc_tcp_release_rqst, .xpo_detach = svc_tcp_sock_detach, .xpo_free = svc_sock_free, @@ -1342,10 +1342,25 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } +bool svc_alien_sock(struct net *net, int fd) +{ + int err; + struct socket *sock = sockfd_lookup(fd, &err); + bool ret = false; + + if (!sock) + goto out; + if (sock_net(sock->sk) != net) + ret = true; + sockfd_put(sock); +out: + return ret; +} +EXPORT_SYMBOL_GPL(svc_alien_sock); + /** * svc_addsock - add a listener socket to an RPC service * @serv: pointer to RPC service to which to add a new listener - * @net: caller's network namespace * @fd: file descriptor of the new listener * @name_return: pointer to buffer to fill in with name of listener * @len: size of the buffer @@ -1355,8 +1370,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, * Name is terminated with '\n'. On error, returns a negative errno * value. */ -int svc_addsock(struct svc_serv *serv, struct net *net, const int fd, - char *name_return, const size_t len, const struct cred *cred) +int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, + const size_t len, const struct cred *cred) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1367,9 +1382,6 @@ int svc_addsock(struct svc_serv *serv, struct net *net, const int fd, if (!so) return err; - err = -EINVAL; - if (sock_net(so->sk) != net) - goto out; err = -EAFNOSUPPORT; if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6)) goto out; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index e2bd0cd39114..d84bb5037bb5 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -669,7 +669,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct kvec *iov = buf->head; int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; - xdr_reset_scratch_buffer(xdr); + xdr_set_scratch_buffer(xdr, NULL, 0); BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; @@ -691,29 +691,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, EXPORT_SYMBOL_GPL(xdr_init_encode); /** - * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages - * @xdr: pointer to xdr_stream struct - * @buf: pointer to XDR buffer into which to encode data - * @pages: list of pages to decode into - * @rqst: pointer to controlling rpc_rqst, for debugging - * - */ -void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, - struct page **pages, struct rpc_rqst *rqst) -{ - xdr_reset_scratch_buffer(xdr); - - xdr->buf = buf; - xdr->page_ptr = pages; - xdr->iov = NULL; - xdr->p = page_address(*pages); - xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); - xdr->rqst = rqst; -} -EXPORT_SYMBOL_GPL(xdr_init_encode_pages); - -/** - * __xdr_commit_encode - Ensure all data is written to buffer + * xdr_commit_encode - Ensure all data is written to buffer * @xdr: pointer to xdr_stream * * We handle encoding across page boundaries by giving the caller a @@ -725,25 +703,22 @@ EXPORT_SYMBOL_GPL(xdr_init_encode_pages); * required at the end of encoding, or any other time when the xdr_buf * data might be read. */ -void __xdr_commit_encode(struct xdr_stream *xdr) +inline void xdr_commit_encode(struct xdr_stream *xdr) { int shift = xdr->scratch.iov_len; void *page; + if (shift == 0) + return; page = page_address(*xdr->page_ptr); memcpy(xdr->scratch.iov_base, page, shift); memmove(page, page + shift, (void *)xdr->p - page); - xdr_reset_scratch_buffer(xdr); + xdr->scratch.iov_len = 0; } -EXPORT_SYMBOL_GPL(__xdr_commit_encode); +EXPORT_SYMBOL_GPL(xdr_commit_encode); -/* - * The buffer space to be reserved crosses the boundary between - * xdr->buf->head and xdr->buf->pages, or between two pages - * in xdr->buf->pages. - */ -static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, - size_t nbytes) +static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, + size_t nbytes) { __be32 *p; int space_left; @@ -768,7 +743,8 @@ static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, * the "scratch" iov to track any temporarily unused fragment of * space at the end of the previous buffer: */ - xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes); + xdr->scratch.iov_base = xdr->p; + xdr->scratch.iov_len = frag1bytes; p = page_address(*xdr->page_ptr); /* * Note this is where the next encode will start after we've @@ -1080,7 +1056,8 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst) { xdr->buf = buf; - xdr_reset_scratch_buffer(xdr); + xdr->scratch.iov_base = NULL; + xdr->scratch.iov_len = 0; xdr->nwords = XDR_QUADLEN(buf->len); if (buf->head[0].iov_len != 0) xdr_set_iov(xdr, buf->head, buf->len); @@ -1128,6 +1105,24 @@ static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) return p; } +/** + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to an empty buffer + * @buflen: size of 'buf' + * + * The scratch buffer is used when decoding from an array of pages. + * If an xdr_inline_decode() call spans across page boundaries, then + * we copy the data into the scratch buffer in order to allow linear + * access. + */ +void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +{ + xdr->scratch.iov_base = buf; + xdr->scratch.iov_len = buflen; +} +EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer); + static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) { __be32 *p; @@ -1437,51 +1432,6 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); -/** - * xdr_stream_subsegment - set @subbuf to a portion of @xdr - * @xdr: an xdr_stream set up for decoding - * @subbuf: the result buffer - * @nbytes: length of @xdr to extract, in bytes - * - * Sets up @subbuf to represent a portion of @xdr. The portion - * starts at the current offset in @xdr, and extends for a length - * of @nbytes. If this is successful, @xdr is advanced to the next - * position following that portion. - * - * Return values: - * %true: @subbuf has been initialized, and @xdr has been advanced. - * %false: a bounds error has occurred - */ -bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, - unsigned int nbytes) -{ - unsigned int remaining, offset, len; - - if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes)) - return false; - - if (subbuf->head[0].iov_len) - if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len)) - return false; - - remaining = subbuf->page_len; - offset = subbuf->page_base; - while (remaining) { - len = min_t(unsigned int, remaining, PAGE_SIZE) - offset; - - if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr)) - return false; - if (!__xdr_inline_decode(xdr, len)) - return false; - - remaining -= len; - offset = 0; - } - - return true; -} -EXPORT_SYMBOL_GPL(xdr_stream_subsegment); - /** * xdr_buf_trim - lop at most "len" bytes off the end of "buf" * @buf: buf to be trimmed diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index feac8c26fb87..c5154bc38e12 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -186,7 +186,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) ret = rpcrdma_bc_send_request(rdma, rqst); if (ret == -ENOTCONN) - svc_xprt_close(sxprt); + svc_close_xprt(sxprt); return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index d6436c13d5c4..c3d588b149aa 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -448,6 +448,7 @@ static ssize_t svc_rdma_encode_write_chunk(__be32 *src, * svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list * @rctxt: Reply context with information about the RPC Call * @sctxt: Send context for the RPC Reply + * @length: size in bytes of the payload in the first Write chunk * * The client provides a Write chunk list in the Call message. Fill * in the segments in the first Write chunk in the Reply's transport @@ -464,12 +465,12 @@ static ssize_t svc_rdma_encode_write_chunk(__be32 *src, */ static ssize_t svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt, - struct svc_rdma_send_ctxt *sctxt) + struct svc_rdma_send_ctxt *sctxt, + unsigned int length) { ssize_t len, ret; - ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, - rctxt->rc_read_payload_length); + ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, length); if (ret < 0) return ret; len = ret; @@ -922,12 +923,21 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) goto err0; if (wr_lst) { /* XXX: Presume the client sent only one Write chunk */ - ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, - rctxt->rc_read_payload_offset, - rctxt->rc_read_payload_length); + unsigned long offset; + unsigned int length; + + if (rctxt->rc_read_payload_length) { + offset = rctxt->rc_read_payload_offset; + length = rctxt->rc_read_payload_length; + } else { + offset = xdr->head[0].iov_len; + length = xdr->page_len; + } + ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset, + length); if (ret < 0) goto err2; - if (svc_rdma_encode_write_list(rctxt, sctxt) < 0) + if (svc_rdma_encode_write_list(rctxt, sctxt, length) < 0) goto err0; } else { if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0) @@ -969,19 +979,19 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) } /** - * svc_rdma_result_payload - special processing for a result payload + * svc_rdma_read_payload - special processing for a READ payload * @rqstp: svc_rqst to operate on * @offset: payload's byte offset in @xdr * @length: size of payload, in bytes * * Returns zero on success. * - * For the moment, just record the xdr_buf location of the result + * For the moment, just record the xdr_buf location of the READ * payload. svc_rdma_sendto will use that location later when * we actually send the payload. */ -int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, - unsigned int length) +int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length) { struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c895f80df659..5f7e3d12523f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, .xpo_recvfrom = svc_rdma_recvfrom, .xpo_sendto = svc_rdma_sendto, - .xpo_result_payload = svc_rdma_result_payload, + .xpo_read_payload = svc_rdma_read_payload, .xpo_release_rqst = svc_rdma_release_rqst, .xpo_detach = svc_rdma_detach, .xpo_free = svc_rdma_free, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 61ae7acea796..dd57a411adf6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -959,7 +959,7 @@ static struct sock *unix_find_other(struct net *net, if (err) goto fail; inode = d_backing_inode(path.dentry); - err = path_permission(&path, MAY_WRITE); + err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 78a48bd0f2b9..08249f7a09d6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -168,9 +168,8 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "panic", "do_exit", "do_task_dead", - "kthread_exit", "make_task_dead", - "__module_put_and_kthread_exit", + "__module_put_and_exit", "complete_and_exit", "__reiserfs_panic", "lbug_with_loc",