Merge branch 'for-3.19' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "A comparatively quieter cycle for nfsd this time, but still with two
  larger changes:

   - RPC server scalability improvements from Jeff Layton (using RCU
     instead of a spinlock to find idle threads).

   - server-side NFSv4.2 ALLOCATE/DEALLOCATE support from Anna
     Schumaker, enabling fallocate on new clients"

* 'for-3.19' of git://linux-nfs.org/~bfields/linux: (32 commits)
  nfsd4: fix xdr4 count of server in fs_location4
  nfsd4: fix xdr4 inclusion of escaped char
  sunrpc/cache: convert to use string_escape_str()
  sunrpc: only call test_bit once in svc_xprt_received
  fs: nfsd: Fix signedness bug in compare_blob
  sunrpc: add some tracepoints around enqueue and dequeue of svc_xprt
  sunrpc: convert to lockless lookup of queued server threads
  sunrpc: fix potential races in pool_stats collection
  sunrpc: add a rcu_head to svc_rqst and use kfree_rcu to free it
  sunrpc: require svc_create callers to pass in meaningful shutdown routine
  sunrpc: have svc_wake_up only deal with pool 0
  sunrpc: convert sp_task_pending flag to use atomic bitops
  sunrpc: move rq_cachetype field to better optimize space
  sunrpc: move rq_splice_ok flag into rq_flags
  sunrpc: move rq_dropme flag into rq_flags
  sunrpc: move rq_usedeferral flag to rq_flags
  sunrpc: move rq_local field to rq_flags
  sunrpc: add a generic rq_flags field to svc_rqst and move rq_secure to it
  nfsd: minor off by one checks in __write_versions()
  sunrpc: release svc_pool_map reference when serv allocation fails
  ...
This commit is contained in:
Linus Torvalds
2014-12-16 15:25:31 -08:00
27 changed files with 530 additions and 274 deletions

View File

@@ -33,6 +33,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/file.h>
#include <linux/falloc.h>
#include <linux/slab.h>
#include "idmap.h"
@@ -772,7 +773,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* the client wants us to do more in this compound:
*/
if (!nfsd4_last_compound_op(rqstp))
rqstp->rq_splice_ok = false;
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
@@ -1013,6 +1014,44 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
__be32 status = nfserr_notsupp;
struct file *file;
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
&fallocate->falloc_stateid,
WR_STATE, &file);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
}
status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
fallocate->falloc_offset,
fallocate->falloc_length,
flags);
fput(file);
return status;
}
static __be32
nfsd4_allocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate)
{
return nfsd4_fallocate(rqstp, cstate, fallocate, 0);
}
static __be32
nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate)
{
return nfsd4_fallocate(rqstp, cstate, fallocate,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
}
static __be32
nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_seek *seek)
@@ -1331,7 +1370,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
rqstp->rq_usedeferral = false;
clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
/*
* According to RFC3010, this takes precedence over all other errors.
@@ -1447,7 +1486,7 @@ encode_op:
BUG_ON(cstate->replay_owner);
out:
/* Reset deferral mechanism for RPC deferrals */
rqstp->rq_usedeferral = true;
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
dprintk("nfsv4 compound returned %d\n", ntohl(status));
return status;
}
@@ -1929,6 +1968,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
/* NFSv4.2 operations */
[OP_ALLOCATE] = {
.op_func = (nfsd4op_func)nfsd4_allocate,
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_name = "OP_ALLOCATE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
},
[OP_DEALLOCATE] = {
.op_func = (nfsd4op_func)nfsd4_deallocate,
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
},
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",

View File

@@ -275,9 +275,11 @@ opaque_hashval(const void *ptr, int nbytes)
return x;
}
static void nfsd4_free_file(struct nfs4_file *f)
static void nfsd4_free_file_rcu(struct rcu_head *rcu)
{
kmem_cache_free(file_slab, f);
struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
kmem_cache_free(file_slab, fp);
}
static inline void
@@ -286,9 +288,10 @@ put_nfs4_file(struct nfs4_file *fi)
might_lock(&state_lock);
if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
hlist_del(&fi->fi_hash);
hlist_del_rcu(&fi->fi_hash);
spin_unlock(&state_lock);
nfsd4_free_file(fi);
WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
}
}
@@ -1440,7 +1443,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&clp->cl_lock);
if (cses->flags & SESSION4_BACK_CHAN) {
{
struct sockaddr *sa = svc_addr(rqstp);
/*
* This is a little silly; with sessions there's no real
@@ -1711,15 +1714,14 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
return 0;
}
static long long
static int
compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
{
long long res;
res = o1->len - o2->len;
if (res)
return res;
return (long long)memcmp(o1->data, o2->data, o1->len);
if (o1->len < o2->len)
return -1;
if (o1->len > o2->len)
return 1;
return memcmp(o1->data, o2->data, o1->len);
}
static int same_name(const char *n1, const char *n2)
@@ -1907,7 +1909,7 @@ add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
static struct nfs4_client *
find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
{
long long cmp;
int cmp;
struct rb_node *node = root->rb_node;
struct nfs4_client *clp;
@@ -3057,10 +3059,9 @@ static struct nfs4_file *nfsd4_alloc_file(void)
}
/* OPEN Share state helper functions */
static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
struct nfs4_file *fp)
{
unsigned int hashval = file_hashval(fh);
lockdep_assert_held(&state_lock);
atomic_set(&fp->fi_ref, 1);
@@ -3073,7 +3074,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
fp->fi_share_deny = 0;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
}
void
@@ -3294,17 +3295,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
/* search file_hashtbl[] for file */
static struct nfs4_file *
find_file_locked(struct knfsd_fh *fh)
find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
{
unsigned int hashval = file_hashval(fh);
struct nfs4_file *fp;
lockdep_assert_held(&state_lock);
hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
get_nfs4_file(fp);
return fp;
if (atomic_inc_not_zero(&fp->fi_ref))
return fp;
}
}
return NULL;
@@ -3314,10 +3312,11 @@ static struct nfs4_file *
find_file(struct knfsd_fh *fh)
{
struct nfs4_file *fp;
unsigned int hashval = file_hashval(fh);
spin_lock(&state_lock);
fp = find_file_locked(fh);
spin_unlock(&state_lock);
rcu_read_lock();
fp = find_file_locked(fh, hashval);
rcu_read_unlock();
return fp;
}
@@ -3325,11 +3324,18 @@ static struct nfs4_file *
find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
{
struct nfs4_file *fp;
unsigned int hashval = file_hashval(fh);
rcu_read_lock();
fp = find_file_locked(fh, hashval);
rcu_read_unlock();
if (fp)
return fp;
spin_lock(&state_lock);
fp = find_file_locked(fh);
if (fp == NULL) {
nfsd4_init_file(new, fh);
fp = find_file_locked(fh, hashval);
if (likely(fp == NULL)) {
nfsd4_init_file(fh, hashval, new);
fp = new;
}
spin_unlock(&state_lock);
@@ -4127,7 +4133,7 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
nfs4_put_stateowner(so);
}
if (open->op_file)
nfsd4_free_file(open->op_file);
kmem_cache_free(file_slab, open->op_file);
if (open->op_stp)
nfs4_put_stid(&open->op_stp->st_stid);
}

View File

@@ -1513,6 +1513,23 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
DECODE_TAIL;
}
static __be32
nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
struct nfsd4_fallocate *fallocate)
{
DECODE_HEAD;
status = nfsd4_decode_stateid(argp, &fallocate->falloc_stateid);
if (status)
return status;
READ_BUF(16);
p = xdr_decode_hyper(p, &fallocate->falloc_offset);
xdr_decode_hyper(p, &fallocate->falloc_length);
DECODE_TAIL;
}
static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
@@ -1604,10 +1621,10 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
/* new operations for NFSv4.2 */
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -1714,7 +1731,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
argp->rqstp->rq_splice_ok = false;
clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
DECODE_TAIL;
}
@@ -1795,9 +1812,12 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
}
else
end++;
if (found_esc)
end = next;
str = end;
}
pathlen = htonl(xdr->buf->len - pathlen_offset);
pathlen = htonl(count);
write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4);
return 0;
}
@@ -3236,10 +3256,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(resp->rqstp->rq_splice_ok);
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
return nfserr_resource;
}
if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) {
if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
WARN_ON_ONCE(1);
return nfserr_resource;
}
@@ -3256,7 +3276,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
goto err_truncate;
}
if (file->f_op->splice_read && resp->rqstp->rq_splice_ok)
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
err = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
err = nfsd4_encode_readv(resp, read, file, maxcount);

View File

@@ -490,7 +490,7 @@ found_entry:
/* From the hall of fame of impractical attacks:
* Is this a user who tries to snoop on the cache? */
rtn = RC_DOIT;
if (!rqstp->rq_secure && rp->c_secure)
if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
goto out;
/* Compose RPC reply header */
@@ -579,7 +579,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
spin_lock(&b->cache_lock);
drc_mem_usage += bufsize;
lru_put_end(b, rp);
rp->c_secure = rqstp->rq_secure;
rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype;
rp->c_state = RC_DONE;
spin_unlock(&b->cache_lock);

View File

@@ -608,7 +608,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
num);
sep = " ";
if (len > remaining)
if (len >= remaining)
break;
remaining -= len;
buf += len;
@@ -623,7 +623,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
'+' : '-',
minor);
if (len > remaining)
if (len >= remaining)
break;
remaining -= len;
buf += len;
@@ -631,7 +631,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
}
len = snprintf(buf, remaining, "\n");
if (len > remaining)
if (len >= remaining)
return -EINVAL;
return tlen + len;
}

View File

@@ -86,7 +86,7 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
int flags = nfsexp_flags(rqstp, exp);
/* Check if the request originated from a secure port. */
if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && !(flags & NFSEXP_INSECURE_PORT)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf)));

View File

@@ -692,7 +692,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
/* Now call the procedure handler, and encode NFS status. */
nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
nfserr = map_new_errors(rqstp->rq_vers, nfserr);
if (nfserr == nfserr_dropit || rqstp->rq_dropme) {
if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
dprintk("nfsd: Dropping request; may be revisited later\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
return 0;

View File

@@ -463,17 +463,24 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
/*
* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
*
* These objects are global. nfsd only keeps one instance of a nfs4_file per
* inode (though it may keep multiple file descriptors open per inode). These
* are tracked in the file_hashtbl which is protected by the state_lock
* spinlock.
* These objects are global. nfsd keeps one instance of a nfs4_file per
* filehandle (though it may keep multiple file descriptors for each). Each
* inode can have multiple filehandles associated with it, so there is
* (potentially) a many to one relationship between this struct and struct
* inode.
*
* These are hashed by filehandle in the file_hashtbl, which is protected by
* the global state_lock spinlock.
*/
struct nfs4_file {
atomic_t fi_ref;
spinlock_t fi_lock;
struct hlist_node fi_hash; /* hash by "struct inode *" */
struct hlist_node fi_hash; /* hash on fi_fhandle */
struct list_head fi_stateids;
struct list_head fi_delegations;
union {
struct list_head fi_delegations;
struct rcu_head fi_rcu;
};
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
struct file * fi_fds[3];
/*

View File

@@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/splice.h>
#include <linux/falloc.h>
#include <linux/fcntl.h>
#include <linux/namei.h>
#include <linux/delay.h>
@@ -533,6 +534,26 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, loff_t len,
int flags)
{
__be32 err;
int error;
if (!S_ISREG(file_inode(file)->i_mode))
return nfserr_inval;
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE);
if (err)
return err;
error = vfs_fallocate(file, flags, offset, len);
if (!error)
error = commit_metadata(fhp);
return nfserrno(error);
}
#endif /* defined(CONFIG_NFSD_V4) */
#ifdef CONFIG_NFSD_V3
@@ -881,7 +902,7 @@ static __be32
nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
if (file->f_op->splice_read && rqstp->rq_splice_ok)
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
return nfsd_splice_read(rqstp, file, offset, count);
else
return nfsd_readv(file, offset, vec, vlen, count);
@@ -937,9 +958,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int stable = *stablep;
int use_wgather;
loff_t pos = offset;
loff_t end = LLONG_MAX;
unsigned int pflags = current->flags;
if (rqstp->rq_local)
if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
/*
* We want less throttling in balance_dirty_pages()
* and shrink_inactive_list() so that nfs to
@@ -967,10 +989,13 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
fsnotify_modify(file);
if (stable) {
if (use_wgather)
if (use_wgather) {
host_err = wait_for_concurrent_writes(file);
else
host_err = vfs_fsync_range(file, offset, offset+*cnt, 0);
} else {
if (*cnt)
end = offset + *cnt - 1;
host_err = vfs_fsync_range(file, offset, end, 0);
}
}
out_nfserr:
@@ -979,7 +1004,7 @@ out_nfserr:
err = 0;
else
err = nfserrno(host_err);
if (rqstp->rq_local)
if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
tsk_restore_flags(current, pflags, PF_LESS_THROTTLE);
return err;
}

View File

@@ -54,6 +54,8 @@ int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,

View File

@@ -428,6 +428,13 @@ struct nfsd4_reclaim_complete {
u32 rca_one_fs;
};
struct nfsd4_fallocate {
/* request */
stateid_t falloc_stateid;
loff_t falloc_offset;
u64 falloc_length;
};
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
@@ -486,6 +493,8 @@ struct nfsd4_op {
struct nfsd4_free_stateid free_stateid;
/* NFSv4.2 */
struct nfsd4_fallocate allocate;
struct nfsd4_fallocate deallocate;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;