Merge branch 'xfs-stack-fixes' into for-next

This commit is contained in:
Dave Chinner
2014-03-13 19:12:13 +11:00
3 changed files with 262 additions and 140 deletions

View File

@@ -180,16 +180,23 @@ xfs_dir_init(
xfs_inode_t *dp, xfs_inode_t *dp,
xfs_inode_t *pdp) xfs_inode_t *pdp)
{ {
xfs_da_args_t args; struct xfs_da_args *args;
int error; int error;
memset((char *)&args, 0, sizeof(args));
args.dp = dp;
args.trans = tp;
ASSERT(S_ISDIR(dp->i_d.di_mode)); ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
if (error)
return error;
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
if (!args)
return ENOMEM;
args->dp = dp;
args->trans = tp;
error = xfs_dir2_sf_create(args, pdp->i_ino);
kmem_free(args);
return error; return error;
return xfs_dir2_sf_create(&args, pdp->i_ino);
} }
/* /*
@@ -205,41 +212,56 @@ xfs_dir_createname(
xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */ xfs_extlen_t total) /* bmap's total block count */
{ {
xfs_da_args_t args; struct xfs_da_args *args;
int rval; int rval;
int v; /* type-checking value */ int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode)); ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) rval = xfs_dir_ino_validate(tp->t_mountp, inum);
if (rval)
return rval; return rval;
XFS_STATS_INC(xs_dir_create); XFS_STATS_INC(xs_dir_create);
memset(&args, 0, sizeof(xfs_da_args_t)); args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
args.name = name->name; if (!args)
args.namelen = name->len; return ENOMEM;
args.filetype = name->type;
args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = inum;
args.dp = dp;
args.firstblock = first;
args.flist = flist;
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) args->name = name->name;
rval = xfs_dir2_sf_addname(&args); args->namelen = name->len;
else if ((rval = xfs_dir2_isblock(tp, dp, &v))) args->filetype = name->type;
return rval; args->hashval = dp->i_mount->m_dirnameops->hashname(name);
else if (v) args->inumber = inum;
rval = xfs_dir2_block_addname(&args); args->dp = dp;
else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) args->firstblock = first;
return rval; args->flist = flist;
else if (v) args->total = total;
rval = xfs_dir2_leaf_addname(&args); args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_addname(args);
goto out_free;
}
rval = xfs_dir2_isblock(tp, dp, &v);
if (rval)
goto out_free;
if (v) {
rval = xfs_dir2_block_addname(args);
goto out_free;
}
rval = xfs_dir2_isleaf(tp, dp, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_addname(args);
else else
rval = xfs_dir2_node_addname(&args); rval = xfs_dir2_node_addname(args);
out_free:
kmem_free(args);
return rval; return rval;
} }
@@ -282,46 +304,66 @@ xfs_dir_lookup(
xfs_ino_t *inum, /* out: inode number */ xfs_ino_t *inum, /* out: inode number */
struct xfs_name *ci_name) /* out: actual name if CI match */ struct xfs_name *ci_name) /* out: actual name if CI match */
{ {
xfs_da_args_t args; struct xfs_da_args *args;
int rval; int rval;
int v; /* type-checking value */ int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode)); ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup); XFS_STATS_INC(xs_dir_lookup);
memset(&args, 0, sizeof(xfs_da_args_t)); /*
args.name = name->name; * We need to use KM_NOFS here so that lockdep will not throw false
args.namelen = name->len; * positive deadlock warnings on a non-transactional lookup path. It is
args.filetype = name->type; * safe to recurse into inode recalim in that case, but lockdep can't
args.hashval = dp->i_mount->m_dirnameops->hashname(name); * easily be taught about it. Hence KM_NOFS avoids having to add more
args.dp = dp; * lockdep Doing this avoids having to add a bunch of lockdep class
args.whichfork = XFS_DATA_FORK; * annotations into the reclaim path for the ilock.
args.trans = tp; */
args.op_flags = XFS_DA_OP_OKNOENT; args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
args->hashval = dp->i_mount->m_dirnameops->hashname(name);
args->dp = dp;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_OKNOENT;
if (ci_name) if (ci_name)
args.op_flags |= XFS_DA_OP_CILOOKUP; args->op_flags |= XFS_DA_OP_CILOOKUP;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_lookup(&args); rval = xfs_dir2_sf_lookup(args);
else if ((rval = xfs_dir2_isblock(tp, dp, &v))) goto out_check_rval;
return rval; }
else if (v)
rval = xfs_dir2_block_lookup(&args); rval = xfs_dir2_isblock(tp, dp, &v);
else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) if (rval)
return rval; goto out_free;
else if (v) if (v) {
rval = xfs_dir2_leaf_lookup(&args); rval = xfs_dir2_block_lookup(args);
goto out_check_rval;
}
rval = xfs_dir2_isleaf(tp, dp, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_lookup(args);
else else
rval = xfs_dir2_node_lookup(&args); rval = xfs_dir2_node_lookup(args);
out_check_rval:
if (rval == EEXIST) if (rval == EEXIST)
rval = 0; rval = 0;
if (!rval) { if (!rval) {
*inum = args.inumber; *inum = args->inumber;
if (ci_name) { if (ci_name) {
ci_name->name = args.value; ci_name->name = args->value;
ci_name->len = args.valuelen; ci_name->len = args->valuelen;
} }
} }
out_free:
kmem_free(args);
return rval; return rval;
} }
@@ -338,38 +380,51 @@ xfs_dir_removename(
xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */ xfs_extlen_t total) /* bmap's total block count */
{ {
xfs_da_args_t args; struct xfs_da_args *args;
int rval; int rval;
int v; /* type-checking value */ int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode)); ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove); XFS_STATS_INC(xs_dir_remove);
memset(&args, 0, sizeof(xfs_da_args_t)); args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
args.name = name->name; if (!args)
args.namelen = name->len; return ENOMEM;
args.filetype = name->type;
args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = ino;
args.dp = dp;
args.firstblock = first;
args.flist = flist;
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) args->name = name->name;
rval = xfs_dir2_sf_removename(&args); args->namelen = name->len;
else if ((rval = xfs_dir2_isblock(tp, dp, &v))) args->filetype = name->type;
return rval; args->hashval = dp->i_mount->m_dirnameops->hashname(name);
else if (v) args->inumber = ino;
rval = xfs_dir2_block_removename(&args); args->dp = dp;
else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) args->firstblock = first;
return rval; args->flist = flist;
else if (v) args->total = total;
rval = xfs_dir2_leaf_removename(&args); args->whichfork = XFS_DATA_FORK;
args->trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_removename(args);
goto out_free;
}
rval = xfs_dir2_isblock(tp, dp, &v);
if (rval)
goto out_free;
if (v) {
rval = xfs_dir2_block_removename(args);
goto out_free;
}
rval = xfs_dir2_isleaf(tp, dp, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_removename(args);
else else
rval = xfs_dir2_node_removename(&args); rval = xfs_dir2_node_removename(args);
out_free:
kmem_free(args);
return rval; return rval;
} }
@@ -386,40 +441,54 @@ xfs_dir_replace(
xfs_bmap_free_t *flist, /* bmap's freeblock list */ xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */ xfs_extlen_t total) /* bmap's total block count */
{ {
xfs_da_args_t args; struct xfs_da_args *args;
int rval; int rval;
int v; /* type-checking value */ int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode)); ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) rval = xfs_dir_ino_validate(tp->t_mountp, inum);
if (rval)
return rval; return rval;
memset(&args, 0, sizeof(xfs_da_args_t)); args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
args.name = name->name; if (!args)
args.namelen = name->len; return ENOMEM;
args.filetype = name->type;
args.hashval = dp->i_mount->m_dirnameops->hashname(name);
args.inumber = inum;
args.dp = dp;
args.firstblock = first;
args.flist = flist;
args.total = total;
args.whichfork = XFS_DATA_FORK;
args.trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) args->name = name->name;
rval = xfs_dir2_sf_replace(&args); args->namelen = name->len;
else if ((rval = xfs_dir2_isblock(tp, dp, &v))) args->filetype = name->type;
return rval; args->hashval = dp->i_mount->m_dirnameops->hashname(name);
else if (v) args->inumber = inum;
rval = xfs_dir2_block_replace(&args); args->dp = dp;
else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) args->firstblock = first;
return rval; args->flist = flist;
else if (v) args->total = total;
rval = xfs_dir2_leaf_replace(&args); args->whichfork = XFS_DATA_FORK;
args->trans = tp;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_replace(args);
goto out_free;
}
rval = xfs_dir2_isblock(tp, dp, &v);
if (rval)
goto out_free;
if (v) {
rval = xfs_dir2_block_replace(args);
goto out_free;
}
rval = xfs_dir2_isleaf(tp, dp, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_replace(args);
else else
rval = xfs_dir2_node_replace(&args); rval = xfs_dir2_node_replace(args);
out_free:
kmem_free(args);
return rval; return rval;
} }
@@ -434,7 +503,7 @@ xfs_dir_canenter(
struct xfs_name *name, /* name of entry to add */ struct xfs_name *name, /* name of entry to add */
uint resblks) uint resblks)
{ {
xfs_da_args_t args; struct xfs_da_args *args;
int rval; int rval;
int v; /* type-checking value */ int v; /* type-checking value */
@@ -443,29 +512,42 @@ xfs_dir_canenter(
ASSERT(S_ISDIR(dp->i_d.di_mode)); ASSERT(S_ISDIR(dp->i_d.di_mode));
memset(&args, 0, sizeof(xfs_da_args_t)); args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
args.name = name->name; if (!args)
args.namelen = name->len; return ENOMEM;
args.filetype = name->type;
args.hashval = dp->i_mount->m_dirnameops->hashname(name); args->name = name->name;
args.dp = dp; args->namelen = name->len;
args.whichfork = XFS_DATA_FORK; args->filetype = name->type;
args.trans = tp; args->hashval = dp->i_mount->m_dirnameops->hashname(name);
args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | args->dp = dp;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
XFS_DA_OP_OKNOENT; XFS_DA_OP_OKNOENT;
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
rval = xfs_dir2_sf_addname(&args); rval = xfs_dir2_sf_addname(args);
else if ((rval = xfs_dir2_isblock(tp, dp, &v))) goto out_free;
return rval; }
else if (v)
rval = xfs_dir2_block_addname(&args); rval = xfs_dir2_isblock(tp, dp, &v);
else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) if (rval)
return rval; goto out_free;
else if (v) if (v) {
rval = xfs_dir2_leaf_addname(&args); rval = xfs_dir2_block_addname(args);
goto out_free;
}
rval = xfs_dir2_isleaf(tp, dp, &v);
if (rval)
goto out_free;
if (v)
rval = xfs_dir2_leaf_addname(args);
else else
rval = xfs_dir2_node_addname(&args); rval = xfs_dir2_node_addname(args);
out_free:
kmem_free(args);
return rval; return rval;
} }

View File

@@ -48,6 +48,18 @@
#include <linux/fiemap.h> #include <linux/fiemap.h>
#include <linux/slab.h> #include <linux/slab.h>
/*
* Directories have different lock order w.r.t. mmap_sem compared to regular
* files. This is due to readdir potentially triggering page faults on a user
* buffer inside filldir(), and this happens with the ilock on the directory
* held. For regular files, the lock order is the other way around - the
* mmap_sem is taken during the page fault, and then we lock the ilock to do
* block mapping. Hence we need a different class for the directory ilock so
* that lockdep can tell them apart.
*/
static struct lock_class_key xfs_nondir_ilock_class;
static struct lock_class_key xfs_dir_ilock_class;
static int static int
xfs_initxattrs( xfs_initxattrs(
struct inode *inode, struct inode *inode,
@@ -1191,6 +1203,7 @@ xfs_setup_inode(
xfs_diflags_to_iflags(inode, ip); xfs_diflags_to_iflags(inode, ip);
ip->d_ops = ip->i_mount->m_nondir_inode_ops; ip->d_ops = ip->i_mount->m_nondir_inode_ops;
lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
switch (inode->i_mode & S_IFMT) { switch (inode->i_mode & S_IFMT) {
case S_IFREG: case S_IFREG:
inode->i_op = &xfs_inode_operations; inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1211,7 @@ xfs_setup_inode(
inode->i_mapping->a_ops = &xfs_address_space_operations; inode->i_mapping->a_ops = &xfs_address_space_operations;
break; break;
case S_IFDIR: case S_IFDIR:
lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
inode->i_op = &xfs_dir_ci_inode_operations; inode->i_op = &xfs_dir_ci_inode_operations;
else else

View File

@@ -498,13 +498,6 @@ xlog_cil_push(
new_ctx->cil = cil; new_ctx->cil = cil;
cil->xc_ctx = new_ctx; cil->xc_ctx = new_ctx;
/*
* mirror the new sequence into the cil structure so that we can do
* unlocked checks against the current sequence in log forces without
* risking deferencing a freed context pointer.
*/
cil->xc_current_sequence = new_ctx->sequence;
/* /*
* The switch is now done, so we can drop the context lock and move out * The switch is now done, so we can drop the context lock and move out
* of a shared context. We can't just go straight to the commit record, * of a shared context. We can't just go straight to the commit record,
@@ -523,8 +516,15 @@ xlog_cil_push(
* Hence we need to add this context to the committing context list so * Hence we need to add this context to the committing context list so
* that higher sequences will wait for us to write out a commit record * that higher sequences will wait for us to write out a commit record
* before they do. * before they do.
*
* xfs_log_force_lsn requires us to mirror the new sequence into the cil
* structure atomically with the addition of this sequence to the
* committing list. This also ensures that we can do unlocked checks
* against the current sequence in log forces without risking
* deferencing a freed context pointer.
*/ */
spin_lock(&cil->xc_push_lock); spin_lock(&cil->xc_push_lock);
cil->xc_current_sequence = new_ctx->sequence;
list_add(&ctx->committing, &cil->xc_committing); list_add(&ctx->committing, &cil->xc_committing);
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
up_write(&cil->xc_ctx_lock); up_write(&cil->xc_ctx_lock);
@@ -662,8 +662,14 @@ xlog_cil_push_background(
} }
/*
* xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
* number that is passed. When it returns, the work will be queued for
* @push_seq, but it won't be completed. The caller is expected to do any
* waiting for push_seq to complete if it is required.
*/
static void static void
xlog_cil_push_foreground( xlog_cil_push_now(
struct xlog *log, struct xlog *log,
xfs_lsn_t push_seq) xfs_lsn_t push_seq)
{ {
@@ -688,10 +694,8 @@ xlog_cil_push_foreground(
} }
cil->xc_push_seq = push_seq; cil->xc_push_seq = push_seq;
queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
/* do the push now */
xlog_cil_push(log);
} }
bool bool
@@ -795,7 +799,8 @@ xlog_cil_force_lsn(
* xlog_cil_push() handles racing pushes for the same sequence, * xlog_cil_push() handles racing pushes for the same sequence,
* so no need to deal with it here. * so no need to deal with it here.
*/ */
xlog_cil_push_foreground(log, sequence); restart:
xlog_cil_push_now(log, sequence);
/* /*
* See if we can find a previous sequence still committing. * See if we can find a previous sequence still committing.
@@ -803,7 +808,6 @@ xlog_cil_force_lsn(
* before allowing the force of push_seq to go ahead. Hence block * before allowing the force of push_seq to go ahead. Hence block
* on commits for those as well. * on commits for those as well.
*/ */
restart:
spin_lock(&cil->xc_push_lock); spin_lock(&cil->xc_push_lock);
list_for_each_entry(ctx, &cil->xc_committing, committing) { list_for_each_entry(ctx, &cil->xc_committing, committing) {
if (ctx->sequence > sequence) if (ctx->sequence > sequence)
@@ -821,6 +825,28 @@ restart:
/* found it! */ /* found it! */
commit_lsn = ctx->commit_lsn; commit_lsn = ctx->commit_lsn;
} }
/*
* The call to xlog_cil_push_now() executes the push in the background.
* Hence by the time we have got here it our sequence may not have been
* pushed yet. This is true if the current sequence still matches the
* push sequence after the above wait loop and the CIL still contains
* dirty objects.
*
* When the push occurs, it will empty the CIL and
* atomically increment the currect sequence past the push sequence and
* move it into the committing list. Of course, if the CIL is clean at
* the time of the push, it won't have pushed the CIL at all, so in that
* case we should try the push for this sequence again from the start
* just in case.
*/
if (sequence == cil->xc_current_sequence &&
!list_empty(&cil->xc_cil)) {
spin_unlock(&cil->xc_push_lock);
goto restart;
}
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
return commit_lsn; return commit_lsn;
} }