Merge tag 'nfsd-4.19-1' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Chuck Lever fixed a problem with NFSv4.0 callbacks over GSS from
  multi-homed servers.

  The only new feature is a minor bit of protocol (change_attr_type)
  which the client doesn't even use yet.

  Other than that, various bugfixes and cleanup"

* tag 'nfsd-4.19-1' of git://linux-nfs.org/~bfields/linux: (27 commits)
  sunrpc: Add comment defining gssd upcall API keywords
  nfsd: Remove callback_cred
  nfsd: Use correct credential for NFSv4.0 callback with GSS
  sunrpc: Extract target name into svc_cred
  sunrpc: Enable the kernel to specify the hostname part of service principals
  sunrpc: Don't use stack buffer with scatterlist
  rpc: remove unneeded variable 'ret' in rdma_listen_handler
  nfsd: use true and false for boolean values
  nfsd: constify write_op[]
  fs/nfsd: Delete invalid assignment statements in nfsd4_decode_exchange_id
  NFSD: Handle full-length symlinks
  NFSD: Refactor the generic write vector fill helper
  svcrdma: Clean up Read chunk path
  svcrdma: Avoid releasing a page in svc_xprt_release()
  nfsd: Mark expected switch fall-through
  sunrpc: remove redundant variables 'checksumlen','blocksize' and 'data'
  nfsd: fix leaked file lock with nfs exported overlayfs
  nfsd: don't advertise a SCSI layout for an unsupported request_queue
  nfsd: fix corrupted reply to badly ordered compound
  nfsd: clarify check_op_ordering
  ...
This commit is contained in:
Linus Torvalds
2018-08-23 16:00:10 -07:00
bovenliggende 6f7948f566 108b833cde
commit 9157141c95
30 gewijzigde bestanden met toevoegingen van 300 en 218 verwijderingen

Bestand weergeven

@@ -102,6 +102,7 @@ struct nfsd_net {
time_t nfsd4_lease;
time_t nfsd4_grace;
bool somebody_reclaimed;
bool nfsd_net_up;
bool lockd_up;

Bestand weergeven

@@ -202,7 +202,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
&argp->first, cnt);
if (!nvecs)
RETURN_STATUS(nfserr_io);
nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
@@ -289,6 +290,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
RETURN_STATUS(nfserr_nametoolong);
argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
page_address(rqstp->rq_arg.pages[0]),
argp->tlen);
if (IS_ERR(argp->tname))
RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
@@ -302,6 +304,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
fh_init(&resp->fh, NFS3_FHSIZE);
nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
argp->tname, &resp->fh);
kfree(argp->tname);
RETURN_STATUS(nfserr);
}

Bestand weergeven

@@ -746,30 +746,17 @@ static int max_cb_time(struct net *net)
return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
}
static struct rpc_cred *callback_cred;
int set_callback_cred(void)
{
if (callback_cred)
return 0;
callback_cred = rpc_lookup_machine_cred("nfs");
if (!callback_cred)
return -ENOMEM;
return 0;
}
void cleanup_callback_cred(void)
{
if (callback_cred) {
put_rpccred(callback_cred);
callback_cred = NULL;
}
}
static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
return get_rpccred(callback_cred);
char *principal = clp->cl_cred.cr_targ_princ ?
clp->cl_cred.cr_targ_princ : "nfs";
struct rpc_cred *cred;
cred = rpc_lookup_machine_cred(principal);
if (!IS_ERR(cred))
get_rpccred(cred);
return cred;
} else {
struct rpc_auth *auth = client->cl_auth;
struct auth_cred acred = {};
@@ -980,6 +967,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
break;
case -ESERVERFAULT:
++session->se_cb_seq_nr;
/* Fall through */
case 1:
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);

Bestand weergeven

@@ -133,27 +133,20 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
if (!(exp->ex_flags & NFSEXP_PNFS))
return;
/*
* If flex file is configured, use it by default. Otherwise
* check if the file system supports exporting a block-like layout.
* If the block device supports reservations prefer the SCSI layout,
* otherwise advertise the block layout.
*/
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES;
#endif
#ifdef CONFIG_NFSD_BLOCKLAYOUT
/* overwrite flex file layout selection if needed */
if (sb->s_export_op->get_uuid &&
sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks)
exp->ex_layout_types |= 1 << LAYOUT_BLOCK_VOLUME;
#endif
#ifdef CONFIG_NFSD_SCSILAYOUT
/* overwrite block layout selection if needed */
if (sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks &&
sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops &&
blk_queue_scsi_passthrough(sb->s_bdev->bd_disk->queue))
exp->ex_layout_types |= 1 << LAYOUT_SCSI;
#endif
}

Bestand weergeven

@@ -354,6 +354,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct svc_fh *resfh = NULL;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
bool reclaim = false;
dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
(int)open->op_fname.len, open->op_fname.data,
@@ -424,6 +425,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
reclaim = true;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, cstate, open);
@@ -452,6 +454,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
WARN(status && open->op_created,
"nfsd4_process_open2 failed to open newly-created file! status=%u\n",
be32_to_cpu(status));
if (reclaim && !status)
nn->somebody_reclaimed = true;
out:
if (resfh && resfh != &cstate->current_fh) {
fh_dup2(&cstate->current_fh, resfh);
@@ -982,24 +986,6 @@ out:
return status;
}
static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
{
int i = 1;
int buflen = write->wr_buflen;
vec[0].iov_base = write->wr_head.iov_base;
vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
buflen -= vec[0].iov_len;
while (buflen) {
vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
buflen -= vec[i].iov_len;
i++;
}
return i;
}
static __be32
nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -1027,7 +1013,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
write->wr_how_written = write->wr_stable_how;
gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
nvecs = fill_in_write_vector(rqstp->rq_vec, write);
nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
&write->wr_head, write->wr_buflen);
if (!nvecs)
return nfserr_io;
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
@@ -1599,7 +1588,7 @@ static const char *nfsd4_op_name(unsigned opnum);
*/
static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
{
struct nfsd4_op *op = &args->ops[0];
struct nfsd4_op *first_op = &args->ops[0];
/* These ordering requirements don't apply to NFSv4.0: */
if (args->minorversion == 0)
@@ -1607,12 +1596,17 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
/* This is weird, but OK, not our problem: */
if (args->opcnt == 0)
return nfs_ok;
if (op->status == nfserr_op_illegal)
if (first_op->status == nfserr_op_illegal)
return nfs_ok;
if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
if (!(nfsd4_ops[first_op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
return nfserr_op_not_in_session;
if (op->opnum == OP_SEQUENCE)
if (first_op->opnum == OP_SEQUENCE)
return nfs_ok;
/*
* So first_op is something allowed outside a session, like
* EXCHANGE_ID; but then it has to be the only op in the
* compound:
*/
if (args->opcnt != 1)
return nfserr_not_only_op;
return nfs_ok;
@@ -1726,6 +1720,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (status) {
op = &args->ops[0];
op->status = status;
resp->opcnt = 1;
goto encode_op;
}

Bestand weergeven

@@ -1979,8 +1979,10 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL);
target->cr_raw_principal = kstrdup(source->cr_raw_principal,
GFP_KERNEL);
if ((source->cr_principal && ! target->cr_principal) ||
(source->cr_raw_principal && ! target->cr_raw_principal))
target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL);
if ((source->cr_principal && !target->cr_principal) ||
(source->cr_raw_principal && !target->cr_raw_principal) ||
(source->cr_targ_princ && !target->cr_targ_princ))
return -ENOMEM;
target->cr_flavor = source->cr_flavor;
@@ -2057,6 +2059,7 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
|| (!gid_eq(cr1->cr_gid, cr2->cr_gid))
|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
return false;
/* XXX: check that cr_targ_princ fields match ? */
if (cr1->cr_principal == cr2->cr_principal)
return true;
if (!cr1->cr_principal || !cr2->cr_principal)
@@ -2956,18 +2959,18 @@ out_no_session:
return status;
}
static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
static bool nfsd4_compound_in_session(struct nfsd4_compound_state *cstate, struct nfs4_sessionid *sid)
{
if (!session)
if (!cstate->session)
return false;
return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
return !memcmp(sid, &cstate->session->se_sessionid, sizeof(*sid));
}
__be32
nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_destroy_session *sessionid = &u->destroy_session;
struct nfs4_sessionid *sessionid = &u->destroy_session.sessionid;
struct nfsd4_session *ses;
__be32 status;
int ref_held_by_me = 0;
@@ -2975,14 +2978,14 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
status = nfserr_not_only_op;
if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
if (nfsd4_compound_in_session(cstate, sessionid)) {
if (!nfsd4_last_compound_op(r))
goto out;
ref_held_by_me++;
}
dump_sessionid(__func__, &sessionid->sessionid);
dump_sessionid(__func__, sessionid);
spin_lock(&nn->client_lock);
ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status);
ses = find_in_sessionid_hashtbl(sessionid, net, &status);
if (!ses)
goto out_client_lock;
status = nfserr_wrong_cred;
@@ -3945,9 +3948,9 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
/*
* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the kernel
* lock) we know the server hasn't removed the lease yet, we know
* it's safe to take a reference.
* callback (and since the lease code is serialized by the
* i_lock) we know the server hasn't removed the lease yet, and
* we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
nfsd4_run_cb(&dp->dl_recall);
@@ -4693,6 +4696,28 @@ nfsd4_end_grace(struct nfsd_net *nn)
*/
}
/*
* If we've waited a lease period but there are still clients trying to
* reclaim, wait a little longer to give them a chance to finish.
*/
static bool clients_still_reclaiming(struct nfsd_net *nn)
{
unsigned long now = get_seconds();
unsigned long double_grace_period_end = nn->boot_time +
2 * nn->nfsd4_lease;
if (!nn->somebody_reclaimed)
return false;
nn->somebody_reclaimed = false;
/*
* If we've given them *two* lease times to reclaim, and they're
* still not done, give up:
*/
if (time_after(now, double_grace_period_end))
return false;
return true;
}
static time_t
nfs4_laundromat(struct nfsd_net *nn)
{
@@ -4706,6 +4731,11 @@ nfs4_laundromat(struct nfsd_net *nn)
time_t t, new_timeo = nn->nfsd4_lease;
dprintk("NFSD: laundromat service - starting\n");
if (clients_still_reclaiming(nn)) {
new_timeo = 0;
goto out;
}
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);
spin_lock(&nn->client_lock);
@@ -4803,7 +4833,7 @@ nfs4_laundromat(struct nfsd_net *nn)
posix_unblock_lock(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
out:
new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
return new_timeo;
}
@@ -6053,6 +6083,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case 0: /* success! */
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
status = 0;
if (lock->lk_reclaim)
nn->somebody_reclaimed = true;
break;
case FILE_LOCK_DEFERRED:
nbl = NULL;
@@ -6293,7 +6325,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
return status;
}
inode = file_inode(filp);
inode = locks_inode(filp);
flctx = inode->i_flctx;
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -7199,14 +7231,10 @@ nfs4_state_start(void)
{
int ret;
ret = set_callback_cred();
if (ret)
return ret;
laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
if (laundry_wq == NULL) {
ret = -ENOMEM;
goto out_cleanup_cred;
goto out;
}
ret = nfsd4_create_callback_queue();
if (ret)
@@ -7217,8 +7245,7 @@ nfs4_state_start(void)
out_free_laundry:
destroy_workqueue(laundry_wq);
out_cleanup_cred:
cleanup_callback_cred();
out:
return ret;
}
@@ -7255,7 +7282,6 @@ nfs4_state_shutdown(void)
{
destroy_workqueue(laundry_wq);
nfsd4_destroy_callback_queue();
cleanup_callback_cred();
}
static void

Bestand weergeven

@@ -1390,10 +1390,8 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
p += XDR_QUADLEN(dummy);
}
/* ssp_window and ssp_num_gss_handles */
/* ignore ssp_window and ssp_num_gss_handles: */
READ_BUF(8);
dummy = be32_to_cpup(p++);
dummy = be32_to_cpup(p++);
break;
default:
goto xdr_error;
@@ -2006,6 +2004,31 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
return p;
}
/*
* ctime (in NFSv4, time_metadata) is not writeable, and the client
* doesn't really care what resolution could theoretically be stored by
* the filesystem.
*
* The client cares how close together changes can be while still
* guaranteeing ctime changes. For most filesystems (which have
* timestamps with nanosecond fields) that is limited by the resolution
* of the time returned from current_time() (which I'm assuming to be
* 1/HZ).
*/
static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
{
struct timespec ts;
u32 ns;
ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran);
ts = ns_to_timespec(ns);
p = xdr_encode_hyper(p, ts.tv_sec);
*p++ = cpu_to_be32(ts.tv_nsec);
return p;
}
static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
{
*p++ = cpu_to_be32(c->atomic);
@@ -2797,9 +2820,7 @@ out_acl:
p = xdr_reserve_space(xdr, 12);
if (!p)
goto out_resource;
*p++ = cpu_to_be32(0);
*p++ = cpu_to_be32(1);
*p++ = cpu_to_be32(0);
p = encode_time_delta(p, d_inode(dentry));
}
if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
p = xdr_reserve_space(xdr, 12);
@@ -2868,6 +2889,16 @@ out_acl:
goto out;
}
if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
p = xdr_reserve_space(xdr, 4);
if (!p)
goto out_resource;
if (IS_I_VERSION(d_inode(dentry)))
*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR);
else
*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
}
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
status = nfsd4_encode_security_label(xdr, rqstp, context,
contextlen);

Bestand weergeven

@@ -73,7 +73,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size);
#endif
static ssize_t (*write_op[])(struct file *, char *, size_t) = {
static ssize_t (*const write_op[])(struct file *, char *, size_t) = {
[NFSD_Fh] = write_filehandle,
[NFSD_FO_UnlockIP] = write_unlock_ip,
[NFSD_FO_UnlockFS] = write_unlock_fs,
@@ -1237,8 +1237,9 @@ static __net_init int nfsd_init_net(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
nn->nfsd4_lease = 45; /* default lease time */
nn->nfsd4_grace = 45;
nn->somebody_reclaimed = false;
nn->clverifier_counter = prandom_u32();
nn->clientid_counter = prandom_u32();

Bestand weergeven

@@ -360,6 +360,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
FATTR4_WORD2_CHANGE_ATTR_TYPE | \
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS)

Bestand weergeven

@@ -451,7 +451,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
switch (fsid_type) {
case FSID_DEV:
if (!old_valid_dev(exp_sb(exp)->s_dev))
return 0;
return false;
/* FALL THROUGH */
case FSID_MAJOR_MINOR:
case FSID_ENCODE_DEV:
@@ -461,13 +461,13 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
case FSID_UUID8:
case FSID_UUID16:
if (!is_root_export(exp))
return 0;
return false;
/* fall through */
case FSID_UUID4_INUM:
case FSID_UUID16_INUM:
return exp->ex_uuid != NULL;
}
return 1;
return true;
}

Bestand weergeven

@@ -218,7 +218,8 @@ nfsd_proc_write(struct svc_rqst *rqstp)
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
&argp->first, cnt);
if (!nvecs)
return nfserr_io;
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
@@ -453,6 +454,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
return nfserr_nametoolong;
argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
page_address(rqstp->rq_arg.pages[0]),
argp->tlen);
if (IS_ERR(argp->tname))
return nfserrno(PTR_ERR(argp->tname));
@@ -465,6 +467,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
argp->tname, &newfh);
kfree(argp->tname);
fh_put(&argp->ffh);
fh_put(&newfh);
return nfserr;

Bestand weergeven

@@ -617,8 +617,6 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
struct nfsd_net *nn);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void);
extern void cleanup_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);