Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs: (36 commits)
  xfs: semaphore cleanup
  xfs: Extend project quotas to support 32bit project ids
  xfs: remove xfs_buf wrappers
  xfs: remove xfs_cred.h
  xfs: remove xfs_globals.h
  xfs: remove xfs_version.h
  xfs: remove xfs_refcache.h
  xfs: fix the xfs_trans_committed
  xfs: remove unused t_callback field in struct xfs_trans
  xfs: fix bogus m_maxagi check in xfs_iget
  xfs: do not use xfs_mod_incore_sb_batch for per-cpu counters
  xfs: do not use xfs_mod_incore_sb for per-cpu counters
  xfs: remove XFS_MOUNT_NO_PERCPU_SB
  xfs: pack xfs_buf structure more tightly
  xfs: convert buffer cache hash to rbtree
  xfs: serialise inode reclaim within an AG
  xfs: batch inode reclaim lookup
  xfs: implement batched inode lookups for AG walking
  xfs: split out inode walk inode grabbing
  xfs: split inode AG walking into separate code for reclaim
  ...
This commit is contained in:
Linus Torvalds
2010-10-22 17:32:27 -07:00
60 changed files with 1189 additions and 1379 deletions

View File

@@ -188,8 +188,8 @@ _xfs_buf_initialize(
atomic_set(&bp->b_hold, 1);
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_list);
INIT_LIST_HEAD(&bp->b_hash_list);
init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
RB_CLEAR_NODE(&bp->b_rbnode);
sema_init(&bp->b_sema, 0); /* held, no waiters */
XB_SET_OWNER(bp);
bp->b_target = target;
bp->b_file_offset = range_base;
@@ -262,8 +262,6 @@ xfs_buf_free(
{
trace_xfs_buf_free(bp, _RET_IP_);
ASSERT(list_empty(&bp->b_hash_list));
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
@@ -422,8 +420,10 @@ _xfs_buf_find(
{
xfs_off_t range_base;
size_t range_length;
xfs_bufhash_t *hash;
xfs_buf_t *bp, *n;
struct xfs_perag *pag;
struct rb_node **rbp;
struct rb_node *parent;
xfs_buf_t *bp;
range_base = (ioff << BBSHIFT);
range_length = (isize << BBSHIFT);
@@ -432,14 +432,37 @@ _xfs_buf_find(
ASSERT(!(range_length < (1 << btp->bt_sshift)));
ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
/* get tree root */
pag = xfs_perag_get(btp->bt_mount,
xfs_daddr_to_agno(btp->bt_mount, ioff));
spin_lock(&hash->bh_lock);
/* walk tree */
spin_lock(&pag->pag_buf_lock);
rbp = &pag->pag_buf_tree.rb_node;
parent = NULL;
bp = NULL;
while (*rbp) {
parent = *rbp;
bp = rb_entry(parent, struct xfs_buf, b_rbnode);
list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
ASSERT(btp == bp->b_target);
if (bp->b_file_offset == range_base &&
bp->b_buffer_length == range_length) {
if (range_base < bp->b_file_offset)
rbp = &(*rbp)->rb_left;
else if (range_base > bp->b_file_offset)
rbp = &(*rbp)->rb_right;
else {
/*
* found a block offset match. If the range doesn't
* match, the only way this is allowed is if the buffer
* in the cache is stale and the transaction that made
* it stale has not yet committed. i.e. we are
* reallocating a busy extent. Skip this buffer and
* continue searching to the right for an exact match.
*/
if (bp->b_buffer_length != range_length) {
ASSERT(bp->b_flags & XBF_STALE);
rbp = &(*rbp)->rb_right;
continue;
}
atomic_inc(&bp->b_hold);
goto found;
}
@@ -449,17 +472,21 @@ _xfs_buf_find(
if (new_bp) {
_xfs_buf_initialize(new_bp, btp, range_base,
range_length, flags);
new_bp->b_hash = hash;
list_add(&new_bp->b_hash_list, &hash->bh_list);
rb_link_node(&new_bp->b_rbnode, parent, rbp);
rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
/* the buffer keeps the perag reference until it is freed */
new_bp->b_pag = pag;
spin_unlock(&pag->pag_buf_lock);
} else {
XFS_STATS_INC(xb_miss_locked);
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
}
spin_unlock(&hash->bh_lock);
return new_bp;
found:
spin_unlock(&hash->bh_lock);
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
/* Attempt to get the semaphore without sleeping,
* if this does not work then we need to drop the
@@ -625,8 +652,7 @@ void
xfs_buf_readahead(
xfs_buftarg_t *target,
xfs_off_t ioff,
size_t isize,
xfs_buf_flags_t flags)
size_t isize)
{
struct backing_dev_info *bdi;
@@ -634,8 +660,42 @@ xfs_buf_readahead(
if (bdi_read_congested(bdi))
return;
flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
xfs_buf_read(target, ioff, isize, flags);
xfs_buf_read(target, ioff, isize,
XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
}
/*
* Read an uncached buffer from disk. Allocates and returns a locked
* buffer containing the disk contents or nothing.
*/
struct xfs_buf *
xfs_buf_read_uncached(
struct xfs_mount *mp,
struct xfs_buftarg *target,
xfs_daddr_t daddr,
size_t length,
int flags)
{
xfs_buf_t *bp;
int error;
bp = xfs_buf_get_uncached(target, length, flags);
if (!bp)
return NULL;
/* set up the buffer for a read IO */
xfs_buf_lock(bp);
XFS_BUF_SET_ADDR(bp, daddr);
XFS_BUF_READ(bp);
XFS_BUF_BUSY(bp);
xfsbdstrat(mp, bp);
error = xfs_buf_iowait(bp);
if (error || bp->b_error) {
xfs_buf_relse(bp);
return NULL;
}
return bp;
}
xfs_buf_t *
@@ -707,9 +767,10 @@ xfs_buf_associate_memory(
}
xfs_buf_t *
xfs_buf_get_noaddr(
xfs_buf_get_uncached(
struct xfs_buftarg *target,
size_t len,
xfs_buftarg_t *target)
int flags)
{
unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
int error, i;
@@ -725,7 +786,7 @@ xfs_buf_get_noaddr(
goto fail_free_buf;
for (i = 0; i < page_count; i++) {
bp->b_pages[i] = alloc_page(GFP_KERNEL);
bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
if (!bp->b_pages[i])
goto fail_free_mem;
}
@@ -740,7 +801,7 @@ xfs_buf_get_noaddr(
xfs_buf_unlock(bp);
trace_xfs_buf_get_noaddr(bp, _RET_IP_);
trace_xfs_buf_get_uncached(bp, _RET_IP_);
return bp;
fail_free_mem:
@@ -774,29 +835,30 @@ void
xfs_buf_rele(
xfs_buf_t *bp)
{
xfs_bufhash_t *hash = bp->b_hash;
struct xfs_perag *pag = bp->b_pag;
trace_xfs_buf_rele(bp, _RET_IP_);
if (unlikely(!hash)) {
if (!pag) {
ASSERT(!bp->b_relse);
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
xfs_buf_free(bp);
return;
}
ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
ASSERT(atomic_read(&bp->b_hold) > 0);
if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
spin_unlock(&hash->bh_lock);
(*(bp->b_relse)) (bp);
} else if (bp->b_flags & XBF_FS_MANAGED) {
spin_unlock(&hash->bh_lock);
spin_unlock(&pag->pag_buf_lock);
bp->b_relse(bp);
} else {
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
list_del_init(&bp->b_hash_list);
spin_unlock(&hash->bh_lock);
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
xfs_buf_free(bp);
}
}
@@ -859,7 +921,7 @@ xfs_buf_lock(
trace_xfs_buf_lock(bp, _RET_IP_);
if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
xfs_log_force(bp->b_mount, 0);
xfs_log_force(bp->b_target->bt_mount, 0);
if (atomic_read(&bp->b_io_remaining))
blk_run_address_space(bp->b_target->bt_mapping);
down(&bp->b_sema);
@@ -970,7 +1032,6 @@ xfs_bwrite(
{
int error;
bp->b_mount = mp;
bp->b_flags |= XBF_WRITE;
bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
@@ -991,8 +1052,6 @@ xfs_bdwrite(
{
trace_xfs_buf_bdwrite(bp, _RET_IP_);
bp->b_mount = mp;
bp->b_flags &= ~XBF_READ;
bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
@@ -1001,7 +1060,7 @@ xfs_bdwrite(
/*
* Called when we want to stop a buffer from getting written or read.
* We attach the EIO error, muck with its flags, and call biodone
* We attach the EIO error, muck with its flags, and call xfs_buf_ioend
* so that the proper iodone callbacks get called.
*/
STATIC int
@@ -1018,21 +1077,21 @@ xfs_bioerror(
XFS_BUF_ERROR(bp, EIO);
/*
* We're calling biodone, so delete XBF_DONE flag.
* We're calling xfs_buf_ioend, so delete XBF_DONE flag.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
xfs_biodone(bp);
xfs_buf_ioend(bp, 0);
return EIO;
}
/*
* Same as xfs_bioerror, except that we are releasing the buffer
* here ourselves, and avoiding the biodone call.
* here ourselves, and avoiding the xfs_buf_ioend call.
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
@@ -1081,7 +1140,7 @@ int
xfs_bdstrat_cb(
struct xfs_buf *bp)
{
if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
trace_xfs_bdstrat_shut(bp, _RET_IP_);
/*
* Metadata write that didn't get logged but
@@ -1387,62 +1446,24 @@ xfs_buf_iomove(
*/
void
xfs_wait_buftarg(
xfs_buftarg_t *btp)
struct xfs_buftarg *btp)
{
xfs_buf_t *bp, *n;
xfs_bufhash_t *hash;
uint i;
struct xfs_perag *pag;
uint i;
for (i = 0; i < (1 << btp->bt_hashshift); i++) {
hash = &btp->bt_hash[i];
again:
spin_lock(&hash->bh_lock);
list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
ASSERT(btp == bp->b_target);
if (!(bp->b_flags & XBF_FS_MANAGED)) {
spin_unlock(&hash->bh_lock);
/*
* Catch superblock reference count leaks
* immediately
*/
BUG_ON(bp->b_bn == 0);
delay(100);
goto again;
}
for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
pag = xfs_perag_get(btp->bt_mount, i);
spin_lock(&pag->pag_buf_lock);
while (rb_first(&pag->pag_buf_tree)) {
spin_unlock(&pag->pag_buf_lock);
delay(100);
spin_lock(&pag->pag_buf_lock);
}
spin_unlock(&hash->bh_lock);
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
}
}
/*
* Allocate buffer hash table for a given target.
* For devices containing metadata (i.e. not the log/realtime devices)
* we need to allocate a much larger hash table.
*/
STATIC void
xfs_alloc_bufhash(
xfs_buftarg_t *btp,
int external)
{
unsigned int i;
btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */
btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
sizeof(xfs_bufhash_t));
for (i = 0; i < (1 << btp->bt_hashshift); i++) {
spin_lock_init(&btp->bt_hash[i].bh_lock);
INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
}
}
STATIC void
xfs_free_bufhash(
xfs_buftarg_t *btp)
{
kmem_free_large(btp->bt_hash);
btp->bt_hash = NULL;
}
/*
* buftarg list for delwrite queue processing
*/
@@ -1475,7 +1496,6 @@ xfs_free_buftarg(
xfs_flush_buftarg(btp, 1);
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_blkdev_issue_flush(btp);
xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
/* Unregister the buftarg first so that we don't get a
@@ -1597,6 +1617,7 @@ out_error:
xfs_buftarg_t *
xfs_alloc_buftarg(
struct xfs_mount *mp,
struct block_device *bdev,
int external,
const char *fsname)
@@ -1605,6 +1626,7 @@ xfs_alloc_buftarg(
btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
if (xfs_setsize_buftarg_early(btp, bdev))
@@ -1613,7 +1635,6 @@ xfs_alloc_buftarg(
goto error;
if (xfs_alloc_delwrite_queue(btp, fsname))
goto error;
xfs_alloc_bufhash(btp, external);
return btp;
error:
@@ -1904,7 +1925,7 @@ xfs_flush_buftarg(
bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
list_del_init(&bp->b_list);
xfs_iowait(bp);
xfs_buf_iowait(bp);
xfs_buf_relse(bp);
}
}