Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (36 commits) xfs: semaphore cleanup xfs: Extend project quotas to support 32bit project ids xfs: remove xfs_buf wrappers xfs: remove xfs_cred.h xfs: remove xfs_globals.h xfs: remove xfs_version.h xfs: remove xfs_refcache.h xfs: fix the xfs_trans_committed xfs: remove unused t_callback field in struct xfs_trans xfs: fix bogus m_maxagi check in xfs_iget xfs: do not use xfs_mod_incore_sb_batch for per-cpu counters xfs: do not use xfs_mod_incore_sb for per-cpu counters xfs: remove XFS_MOUNT_NO_PERCPU_SB xfs: pack xfs_buf structure more tightly xfs: convert buffer cache hash to rbtree xfs: serialise inode reclaim within an AG xfs: batch inode reclaim lookup xfs: implement batched inode lookups for AG walking xfs: split out inode walk inode grabbing xfs: split inode AG walking into separate code for reclaim ...
This commit is contained in:
@@ -188,8 +188,8 @@ _xfs_buf_initialize(
|
||||
atomic_set(&bp->b_hold, 1);
|
||||
init_completion(&bp->b_iowait);
|
||||
INIT_LIST_HEAD(&bp->b_list);
|
||||
INIT_LIST_HEAD(&bp->b_hash_list);
|
||||
init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
|
||||
RB_CLEAR_NODE(&bp->b_rbnode);
|
||||
sema_init(&bp->b_sema, 0); /* held, no waiters */
|
||||
XB_SET_OWNER(bp);
|
||||
bp->b_target = target;
|
||||
bp->b_file_offset = range_base;
|
||||
@@ -262,8 +262,6 @@ xfs_buf_free(
|
||||
{
|
||||
trace_xfs_buf_free(bp, _RET_IP_);
|
||||
|
||||
ASSERT(list_empty(&bp->b_hash_list));
|
||||
|
||||
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
|
||||
uint i;
|
||||
|
||||
@@ -422,8 +420,10 @@ _xfs_buf_find(
|
||||
{
|
||||
xfs_off_t range_base;
|
||||
size_t range_length;
|
||||
xfs_bufhash_t *hash;
|
||||
xfs_buf_t *bp, *n;
|
||||
struct xfs_perag *pag;
|
||||
struct rb_node **rbp;
|
||||
struct rb_node *parent;
|
||||
xfs_buf_t *bp;
|
||||
|
||||
range_base = (ioff << BBSHIFT);
|
||||
range_length = (isize << BBSHIFT);
|
||||
@@ -432,14 +432,37 @@ _xfs_buf_find(
|
||||
ASSERT(!(range_length < (1 << btp->bt_sshift)));
|
||||
ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
|
||||
|
||||
hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
|
||||
/* get tree root */
|
||||
pag = xfs_perag_get(btp->bt_mount,
|
||||
xfs_daddr_to_agno(btp->bt_mount, ioff));
|
||||
|
||||
spin_lock(&hash->bh_lock);
|
||||
/* walk tree */
|
||||
spin_lock(&pag->pag_buf_lock);
|
||||
rbp = &pag->pag_buf_tree.rb_node;
|
||||
parent = NULL;
|
||||
bp = NULL;
|
||||
while (*rbp) {
|
||||
parent = *rbp;
|
||||
bp = rb_entry(parent, struct xfs_buf, b_rbnode);
|
||||
|
||||
list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
|
||||
ASSERT(btp == bp->b_target);
|
||||
if (bp->b_file_offset == range_base &&
|
||||
bp->b_buffer_length == range_length) {
|
||||
if (range_base < bp->b_file_offset)
|
||||
rbp = &(*rbp)->rb_left;
|
||||
else if (range_base > bp->b_file_offset)
|
||||
rbp = &(*rbp)->rb_right;
|
||||
else {
|
||||
/*
|
||||
* found a block offset match. If the range doesn't
|
||||
* match, the only way this is allowed is if the buffer
|
||||
* in the cache is stale and the transaction that made
|
||||
* it stale has not yet committed. i.e. we are
|
||||
* reallocating a busy extent. Skip this buffer and
|
||||
* continue searching to the right for an exact match.
|
||||
*/
|
||||
if (bp->b_buffer_length != range_length) {
|
||||
ASSERT(bp->b_flags & XBF_STALE);
|
||||
rbp = &(*rbp)->rb_right;
|
||||
continue;
|
||||
}
|
||||
atomic_inc(&bp->b_hold);
|
||||
goto found;
|
||||
}
|
||||
@@ -449,17 +472,21 @@ _xfs_buf_find(
|
||||
if (new_bp) {
|
||||
_xfs_buf_initialize(new_bp, btp, range_base,
|
||||
range_length, flags);
|
||||
new_bp->b_hash = hash;
|
||||
list_add(&new_bp->b_hash_list, &hash->bh_list);
|
||||
rb_link_node(&new_bp->b_rbnode, parent, rbp);
|
||||
rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
|
||||
/* the buffer keeps the perag reference until it is freed */
|
||||
new_bp->b_pag = pag;
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
} else {
|
||||
XFS_STATS_INC(xb_miss_locked);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
|
||||
spin_unlock(&hash->bh_lock);
|
||||
return new_bp;
|
||||
|
||||
found:
|
||||
spin_unlock(&hash->bh_lock);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
xfs_perag_put(pag);
|
||||
|
||||
/* Attempt to get the semaphore without sleeping,
|
||||
* if this does not work then we need to drop the
|
||||
@@ -625,8 +652,7 @@ void
|
||||
xfs_buf_readahead(
|
||||
xfs_buftarg_t *target,
|
||||
xfs_off_t ioff,
|
||||
size_t isize,
|
||||
xfs_buf_flags_t flags)
|
||||
size_t isize)
|
||||
{
|
||||
struct backing_dev_info *bdi;
|
||||
|
||||
@@ -634,8 +660,42 @@ xfs_buf_readahead(
|
||||
if (bdi_read_congested(bdi))
|
||||
return;
|
||||
|
||||
flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
|
||||
xfs_buf_read(target, ioff, isize, flags);
|
||||
xfs_buf_read(target, ioff, isize,
|
||||
XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read an uncached buffer from disk. Allocates and returns a locked
|
||||
* buffer containing the disk contents or nothing.
|
||||
*/
|
||||
struct xfs_buf *
|
||||
xfs_buf_read_uncached(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_buftarg *target,
|
||||
xfs_daddr_t daddr,
|
||||
size_t length,
|
||||
int flags)
|
||||
{
|
||||
xfs_buf_t *bp;
|
||||
int error;
|
||||
|
||||
bp = xfs_buf_get_uncached(target, length, flags);
|
||||
if (!bp)
|
||||
return NULL;
|
||||
|
||||
/* set up the buffer for a read IO */
|
||||
xfs_buf_lock(bp);
|
||||
XFS_BUF_SET_ADDR(bp, daddr);
|
||||
XFS_BUF_READ(bp);
|
||||
XFS_BUF_BUSY(bp);
|
||||
|
||||
xfsbdstrat(mp, bp);
|
||||
error = xfs_buf_iowait(bp);
|
||||
if (error || bp->b_error) {
|
||||
xfs_buf_relse(bp);
|
||||
return NULL;
|
||||
}
|
||||
return bp;
|
||||
}
|
||||
|
||||
xfs_buf_t *
|
||||
@@ -707,9 +767,10 @@ xfs_buf_associate_memory(
|
||||
}
|
||||
|
||||
xfs_buf_t *
|
||||
xfs_buf_get_noaddr(
|
||||
xfs_buf_get_uncached(
|
||||
struct xfs_buftarg *target,
|
||||
size_t len,
|
||||
xfs_buftarg_t *target)
|
||||
int flags)
|
||||
{
|
||||
unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
|
||||
int error, i;
|
||||
@@ -725,7 +786,7 @@ xfs_buf_get_noaddr(
|
||||
goto fail_free_buf;
|
||||
|
||||
for (i = 0; i < page_count; i++) {
|
||||
bp->b_pages[i] = alloc_page(GFP_KERNEL);
|
||||
bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
|
||||
if (!bp->b_pages[i])
|
||||
goto fail_free_mem;
|
||||
}
|
||||
@@ -740,7 +801,7 @@ xfs_buf_get_noaddr(
|
||||
|
||||
xfs_buf_unlock(bp);
|
||||
|
||||
trace_xfs_buf_get_noaddr(bp, _RET_IP_);
|
||||
trace_xfs_buf_get_uncached(bp, _RET_IP_);
|
||||
return bp;
|
||||
|
||||
fail_free_mem:
|
||||
@@ -774,29 +835,30 @@ void
|
||||
xfs_buf_rele(
|
||||
xfs_buf_t *bp)
|
||||
{
|
||||
xfs_bufhash_t *hash = bp->b_hash;
|
||||
struct xfs_perag *pag = bp->b_pag;
|
||||
|
||||
trace_xfs_buf_rele(bp, _RET_IP_);
|
||||
|
||||
if (unlikely(!hash)) {
|
||||
if (!pag) {
|
||||
ASSERT(!bp->b_relse);
|
||||
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
|
||||
if (atomic_dec_and_test(&bp->b_hold))
|
||||
xfs_buf_free(bp);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
|
||||
ASSERT(atomic_read(&bp->b_hold) > 0);
|
||||
if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
|
||||
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
|
||||
if (bp->b_relse) {
|
||||
atomic_inc(&bp->b_hold);
|
||||
spin_unlock(&hash->bh_lock);
|
||||
(*(bp->b_relse)) (bp);
|
||||
} else if (bp->b_flags & XBF_FS_MANAGED) {
|
||||
spin_unlock(&hash->bh_lock);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
bp->b_relse(bp);
|
||||
} else {
|
||||
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
|
||||
list_del_init(&bp->b_hash_list);
|
||||
spin_unlock(&hash->bh_lock);
|
||||
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
xfs_perag_put(pag);
|
||||
xfs_buf_free(bp);
|
||||
}
|
||||
}
|
||||
@@ -859,7 +921,7 @@ xfs_buf_lock(
|
||||
trace_xfs_buf_lock(bp, _RET_IP_);
|
||||
|
||||
if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
|
||||
xfs_log_force(bp->b_mount, 0);
|
||||
xfs_log_force(bp->b_target->bt_mount, 0);
|
||||
if (atomic_read(&bp->b_io_remaining))
|
||||
blk_run_address_space(bp->b_target->bt_mapping);
|
||||
down(&bp->b_sema);
|
||||
@@ -970,7 +1032,6 @@ xfs_bwrite(
|
||||
{
|
||||
int error;
|
||||
|
||||
bp->b_mount = mp;
|
||||
bp->b_flags |= XBF_WRITE;
|
||||
bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
|
||||
|
||||
@@ -991,8 +1052,6 @@ xfs_bdwrite(
|
||||
{
|
||||
trace_xfs_buf_bdwrite(bp, _RET_IP_);
|
||||
|
||||
bp->b_mount = mp;
|
||||
|
||||
bp->b_flags &= ~XBF_READ;
|
||||
bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
|
||||
|
||||
@@ -1001,7 +1060,7 @@ xfs_bdwrite(
|
||||
|
||||
/*
|
||||
* Called when we want to stop a buffer from getting written or read.
|
||||
* We attach the EIO error, muck with its flags, and call biodone
|
||||
* We attach the EIO error, muck with its flags, and call xfs_buf_ioend
|
||||
* so that the proper iodone callbacks get called.
|
||||
*/
|
||||
STATIC int
|
||||
@@ -1018,21 +1077,21 @@ xfs_bioerror(
|
||||
XFS_BUF_ERROR(bp, EIO);
|
||||
|
||||
/*
|
||||
* We're calling biodone, so delete XBF_DONE flag.
|
||||
* We're calling xfs_buf_ioend, so delete XBF_DONE flag.
|
||||
*/
|
||||
XFS_BUF_UNREAD(bp);
|
||||
XFS_BUF_UNDELAYWRITE(bp);
|
||||
XFS_BUF_UNDONE(bp);
|
||||
XFS_BUF_STALE(bp);
|
||||
|
||||
xfs_biodone(bp);
|
||||
xfs_buf_ioend(bp, 0);
|
||||
|
||||
return EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as xfs_bioerror, except that we are releasing the buffer
|
||||
* here ourselves, and avoiding the biodone call.
|
||||
* here ourselves, and avoiding the xfs_buf_ioend call.
|
||||
* This is meant for userdata errors; metadata bufs come with
|
||||
* iodone functions attached, so that we can track down errors.
|
||||
*/
|
||||
@@ -1081,7 +1140,7 @@ int
|
||||
xfs_bdstrat_cb(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
|
||||
if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
|
||||
trace_xfs_bdstrat_shut(bp, _RET_IP_);
|
||||
/*
|
||||
* Metadata write that didn't get logged but
|
||||
@@ -1387,62 +1446,24 @@ xfs_buf_iomove(
|
||||
*/
|
||||
void
|
||||
xfs_wait_buftarg(
|
||||
xfs_buftarg_t *btp)
|
||||
struct xfs_buftarg *btp)
|
||||
{
|
||||
xfs_buf_t *bp, *n;
|
||||
xfs_bufhash_t *hash;
|
||||
uint i;
|
||||
struct xfs_perag *pag;
|
||||
uint i;
|
||||
|
||||
for (i = 0; i < (1 << btp->bt_hashshift); i++) {
|
||||
hash = &btp->bt_hash[i];
|
||||
again:
|
||||
spin_lock(&hash->bh_lock);
|
||||
list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
|
||||
ASSERT(btp == bp->b_target);
|
||||
if (!(bp->b_flags & XBF_FS_MANAGED)) {
|
||||
spin_unlock(&hash->bh_lock);
|
||||
/*
|
||||
* Catch superblock reference count leaks
|
||||
* immediately
|
||||
*/
|
||||
BUG_ON(bp->b_bn == 0);
|
||||
delay(100);
|
||||
goto again;
|
||||
}
|
||||
for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
|
||||
pag = xfs_perag_get(btp->bt_mount, i);
|
||||
spin_lock(&pag->pag_buf_lock);
|
||||
while (rb_first(&pag->pag_buf_tree)) {
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
delay(100);
|
||||
spin_lock(&pag->pag_buf_lock);
|
||||
}
|
||||
spin_unlock(&hash->bh_lock);
|
||||
spin_unlock(&pag->pag_buf_lock);
|
||||
xfs_perag_put(pag);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate buffer hash table for a given target.
|
||||
* For devices containing metadata (i.e. not the log/realtime devices)
|
||||
* we need to allocate a much larger hash table.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_alloc_bufhash(
|
||||
xfs_buftarg_t *btp,
|
||||
int external)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */
|
||||
btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
|
||||
sizeof(xfs_bufhash_t));
|
||||
for (i = 0; i < (1 << btp->bt_hashshift); i++) {
|
||||
spin_lock_init(&btp->bt_hash[i].bh_lock);
|
||||
INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
|
||||
}
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_free_bufhash(
|
||||
xfs_buftarg_t *btp)
|
||||
{
|
||||
kmem_free_large(btp->bt_hash);
|
||||
btp->bt_hash = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* buftarg list for delwrite queue processing
|
||||
*/
|
||||
@@ -1475,7 +1496,6 @@ xfs_free_buftarg(
|
||||
xfs_flush_buftarg(btp, 1);
|
||||
if (mp->m_flags & XFS_MOUNT_BARRIER)
|
||||
xfs_blkdev_issue_flush(btp);
|
||||
xfs_free_bufhash(btp);
|
||||
iput(btp->bt_mapping->host);
|
||||
|
||||
/* Unregister the buftarg first so that we don't get a
|
||||
@@ -1597,6 +1617,7 @@ out_error:
|
||||
|
||||
xfs_buftarg_t *
|
||||
xfs_alloc_buftarg(
|
||||
struct xfs_mount *mp,
|
||||
struct block_device *bdev,
|
||||
int external,
|
||||
const char *fsname)
|
||||
@@ -1605,6 +1626,7 @@ xfs_alloc_buftarg(
|
||||
|
||||
btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
|
||||
|
||||
btp->bt_mount = mp;
|
||||
btp->bt_dev = bdev->bd_dev;
|
||||
btp->bt_bdev = bdev;
|
||||
if (xfs_setsize_buftarg_early(btp, bdev))
|
||||
@@ -1613,7 +1635,6 @@ xfs_alloc_buftarg(
|
||||
goto error;
|
||||
if (xfs_alloc_delwrite_queue(btp, fsname))
|
||||
goto error;
|
||||
xfs_alloc_bufhash(btp, external);
|
||||
return btp;
|
||||
|
||||
error:
|
||||
@@ -1904,7 +1925,7 @@ xfs_flush_buftarg(
|
||||
bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
|
||||
|
||||
list_del_init(&bp->b_list);
|
||||
xfs_iowait(bp);
|
||||
xfs_buf_iowait(bp);
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user