Merge branch 'foreign/liubo/replace-lockup' into for-chris-4.6

This commit is contained in:
David Sterba
2016-02-26 15:38:32 +01:00
303 fájl változott, egészen pontosan 2994 új sor hozzáadva és 1694 régi sor törölve

Fájl megtekintése

@@ -1406,7 +1406,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
read_extent_buffer(eb, dest + bytes_left,
name_off, name_len);
if (eb != eb_in) {
btrfs_tree_read_unlock_blocking(eb);
if (!path->skip_locking)
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1426,9 +1427,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
if (eb != eb_in) {
atomic_inc(&eb->refs);
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
if (!path->skip_locking)
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->nodes[0] = NULL;
path->locks[0] = 0;
}
btrfs_release_path(path);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);

Fájl megtekintése

@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
faili = nr_pages - 1;
cb->nr_pages = nr_pages;
/* In the parent-locked case, we only locked the range we are
* interested in. In all other cases, we can opportunistically
* cache decompressed data that goes beyond the requested range. */
if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
add_ra_bio_pages(inode, em_start + em_len, cb);
add_ra_bio_pages(inode, em_start + em_len, cb);
/* include any pages we added in add_ra-bio_pages */
uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;

Fájl megtekintése

@@ -1005,8 +1005,10 @@ struct btrfs_dev_replace {
pid_t lock_owner;
atomic_t nesting_level;
struct mutex lock_finishing_cancel_unmount;
struct mutex lock_management_lock;
struct mutex lock;
rwlock_t lock;
atomic_t read_locks;
atomic_t blocking_readers;
wait_queue_head_t read_lock_wq;
struct btrfs_scrub_progress scrub_progress;
};

Fájl megtekintése

@@ -1689,7 +1689,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
*
*/
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
struct list_head *ins_list)
struct list_head *ins_list, bool *emitted)
{
struct btrfs_dir_item *di;
struct btrfs_delayed_item *curr, *next;
@@ -1733,6 +1733,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
if (over)
return 1;
*emitted = true;
}
return 0;
}

Fájl megtekintése

@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
struct list_head *ins_list);
struct list_head *ins_list, bool *emitted);
/* for init */
int __init btrfs_delayed_inode_init(void);

Fájl megtekintése

@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_dev_replace_item *ptr;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 0);
if (!dev_replace->is_valid ||
!dev_replace->item_needs_writeback) {
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
return 0;
}
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
key.objectid = 0;
key.type = BTRFS_DEV_REPLACE_KEY;
@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr(eb, path->slots[0],
struct btrfs_dev_replace_item);
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 1);
if (dev_replace->srcdev)
btrfs_set_dev_replace_src_devid(eb, ptr,
dev_replace->srcdev->devid);
@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
btrfs_set_dev_replace_cursor_right(eb, ptr,
dev_replace->cursor_right);
dev_replace->item_needs_writeback = 0;
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_mark_buffer_dirty(eb);
@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
return PTR_ERR(trans);
}
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->is_valid = 1;
dev_replace->item_needs_writeback = 1;
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
if (ret)
@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 1);
goto leave;
}
@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
leave:
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
return ret;
}
@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* don't allow cancel or unmount to disturb the finishing procedure */
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 0);
/* was the operation canceled, or is it finished? */
if (dev_replace->replace_state !=
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return 0;
}
tgt_device = dev_replace->tgtdev;
src_device = dev_replace->srcdev;
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
/*
* flush all outstanding I/O and inode extent mappings before the
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* keep away write_all_supers() during the finishing procedure */
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
mutex_lock(&root->fs_info->chunk_mutex);
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 1);
dev_replace->replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
rcu_str_deref(src_device->name),
src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret);
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
mutex_unlock(&root->fs_info->chunk_mutex);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex);
@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++;
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_rm_dev_replace_blocked(fs_info);
@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *srcdev;
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 0);
/* even if !dev_replace_is_valid, the values are good enough for
* the replace_status ioctl */
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
break;
}
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
}
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
return -EROFS;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
goto leave;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0);
@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
break;
}
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
}
@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
struct task_struct *task;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
return 0;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
break;
@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
btrfs_info(fs_info,
"you may cancel the operation after 'mount -o degraded'");
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
return 0;
}
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 1);
WARN_ON(atomic_xchg(
&fs_info->mutually_exclusive_operation_running, 1));
@@ -865,50 +865,60 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
return 1;
}
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace)
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
{
/* the beginning is just an optimization for the typical case */
if (atomic_read(&dev_replace->nesting_level) == 0) {
acquire_lock:
/* this is not a nested case where the same thread
* is trying to acqurire the same lock twice */
mutex_lock(&dev_replace->lock);
mutex_lock(&dev_replace->lock_management_lock);
dev_replace->lock_owner = current->pid;
atomic_inc(&dev_replace->nesting_level);
mutex_unlock(&dev_replace->lock_management_lock);
return;
if (rw == 1) {
/* write */
again:
wait_event(dev_replace->read_lock_wq,
atomic_read(&dev_replace->blocking_readers) == 0);
write_lock(&dev_replace->lock);
if (atomic_read(&dev_replace->blocking_readers)) {
write_unlock(&dev_replace->lock);
goto again;
}
} else {
read_lock(&dev_replace->lock);
atomic_inc(&dev_replace->read_locks);
}
mutex_lock(&dev_replace->lock_management_lock);
if (atomic_read(&dev_replace->nesting_level) > 0 &&
dev_replace->lock_owner == current->pid) {
WARN_ON(!mutex_is_locked(&dev_replace->lock));
atomic_inc(&dev_replace->nesting_level);
mutex_unlock(&dev_replace->lock_management_lock);
return;
}
mutex_unlock(&dev_replace->lock_management_lock);
goto acquire_lock;
}
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace)
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
{
WARN_ON(!mutex_is_locked(&dev_replace->lock));
mutex_lock(&dev_replace->lock_management_lock);
WARN_ON(atomic_read(&dev_replace->nesting_level) < 1);
WARN_ON(dev_replace->lock_owner != current->pid);
atomic_dec(&dev_replace->nesting_level);
if (atomic_read(&dev_replace->nesting_level) == 0) {
dev_replace->lock_owner = 0;
mutex_unlock(&dev_replace->lock_management_lock);
mutex_unlock(&dev_replace->lock);
if (rw == 1) {
/* write */
ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
write_unlock(&dev_replace->lock);
} else {
mutex_unlock(&dev_replace->lock_management_lock);
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
atomic_dec(&dev_replace->read_locks);
read_unlock(&dev_replace->lock);
}
}
/* inc blocking cnt and release read lock */
void btrfs_dev_replace_set_lock_blocking(
struct btrfs_dev_replace *dev_replace)
{
/* only set blocking for read lock */
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
atomic_inc(&dev_replace->blocking_readers);
read_unlock(&dev_replace->lock);
}
/* acquire read lock and dec blocking cnt */
void btrfs_dev_replace_clear_lock_blocking(
struct btrfs_dev_replace *dev_replace)
{
/* only set blocking for read lock */
ASSERT(atomic_read(&dev_replace->read_locks) > 0);
ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
read_lock(&dev_replace->lock);
if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
waitqueue_active(&dev_replace->read_lock_wq))
wake_up(&dev_replace->read_lock_wq);
}
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
{
percpu_counter_inc(&fs_info->bio_counter);

Fájl megtekintése

@@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
void btrfs_dev_replace_clear_lock_blocking(
struct btrfs_dev_replace *dev_replace);
static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
{

Fájl megtekintése

@@ -2273,9 +2273,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
fs_info->dev_replace.lock_owner = 0;
atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
mutex_init(&fs_info->dev_replace.lock_management_lock);
mutex_init(&fs_info->dev_replace.lock);
rwlock_init(&fs_info->dev_replace.lock);
atomic_set(&fs_info->dev_replace.read_locks, 0);
atomic_set(&fs_info->dev_replace.blocking_readers, 0);
init_waitqueue_head(&fs_info->replace_wait);
init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);
}
static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)

Fájl megtekintése

@@ -2897,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree,
struct block_device *bdev;
int ret;
int nr = 0;
int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
size_t pg_offset = 0;
size_t iosize;
size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
unsigned long this_bio_flag = 0;
set_page_extent_mapped(page);
@@ -2942,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree,
kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
if (!parent_locked)
unlock_extent_cached(tree, cur,
cur + iosize - 1,
&cached, GFP_NOFS);
unlock_extent_cached(tree, cur,
cur + iosize - 1,
&cached, GFP_NOFS);
break;
}
em = __get_extent_map(inode, page, pg_offset, cur,
end - cur + 1, get_extent, em_cached);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
if (!parent_locked)
unlock_extent(tree, cur, end);
unlock_extent(tree, cur, end);
break;
}
extent_offset = cur - em->start;
@@ -3038,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree,
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
if (parent_locked)
free_extent_state(cached);
else
unlock_extent_cached(tree, cur,
cur + iosize - 1,
&cached, GFP_NOFS);
unlock_extent_cached(tree, cur,
cur + iosize - 1,
&cached, GFP_NOFS);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3052,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree,
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
if (!parent_locked)
unlock_extent(tree, cur, cur + iosize - 1);
unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3063,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree,
*/
if (block_start == EXTENT_MAP_INLINE) {
SetPageError(page);
if (!parent_locked)
unlock_extent(tree, cur, cur + iosize - 1);
unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3083,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
*bio_flags = this_bio_flag;
} else {
SetPageError(page);
if (!parent_locked)
unlock_extent(tree, cur, cur + iosize - 1);
unlock_extent(tree, cur, cur + iosize - 1);
}
cur = cur + iosize;
pg_offset += iosize;
@@ -3214,20 +3205,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num)
{
struct bio *bio = NULL;
unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
int ret;
ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
&bio_flags, READ, NULL);
if (bio)
ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
return ret;
}
static noinline void update_nr_written(struct page *page,
struct writeback_control *wbc,
unsigned long nr_written)

Fájl megtekintése

@@ -29,7 +29,6 @@
*/
#define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
#define EXTENT_BIO_PARENT_LOCKED 4
#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
@@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void extent_io_exit(void);

Fájl megtekintése

@@ -5731,6 +5731,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
char *name_ptr;
int name_len;
int is_curr = 0; /* ctx->pos points to the current index? */
bool emitted;
/* FIXME, use a real flag for deciding about the key type */
if (root->fs_info->tree_root == root)
@@ -5759,6 +5760,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (ret < 0)
goto err;
emitted = false;
while (1) {
leaf = path->nodes[0];
slot = path->slots[0];
@@ -5838,6 +5840,7 @@ skip:
if (over)
goto nopos;
emitted = true;
di_len = btrfs_dir_name_len(leaf, di) +
btrfs_dir_data_len(leaf, di) + sizeof(*di);
di_cur += di_len;
@@ -5850,11 +5853,20 @@ next:
if (key_type == BTRFS_DIR_INDEX_KEY) {
if (is_curr)
ctx->pos++;
ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
if (ret)
goto nopos;
}
/*
* If we haven't emitted any dir entry, we must not touch ctx->pos as
* it was was set to the termination value in previous call. We assume
* that "." and ".." were emitted if we reach this point and set the
* termination value as well for an empty directory.
*/
if (ctx->pos > 2 && !emitted)
goto nopos;
/* Reached end of directory/root. Bump pos past the last item. */
ctx->pos++;

Fájl megtekintése

@@ -2794,24 +2794,29 @@ out:
static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
{
struct page *page;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
page = grab_cache_page(inode->i_mapping, index);
if (!page)
return NULL;
return ERR_PTR(-ENOMEM);
if (!PageUptodate(page)) {
if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
0))
return NULL;
int ret;
ret = btrfs_readpage(NULL, page);
if (ret)
return ERR_PTR(ret);
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
return NULL;
return ERR_PTR(-EIO);
}
if (page->mapping != inode->i_mapping) {
unlock_page(page);
page_cache_release(page);
return ERR_PTR(-EAGAIN);
}
}
unlock_page(page);
return page;
}
@@ -2823,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
pgoff_t index = off >> PAGE_CACHE_SHIFT;
for (i = 0; i < num_pages; i++) {
again:
pages[i] = extent_same_get_page(inode, index + i);
if (!pages[i])
return -ENOMEM;
if (IS_ERR(pages[i])) {
int err = PTR_ERR(pages[i]);
if (err == -EAGAIN)
goto again;
pages[i] = NULL;
return err;
}
}
return 0;
}
static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
static int lock_extent_range(struct inode *inode, u64 off, u64 len,
bool retry_range_locking)
{
/* do any pending delalloc/csum calc on src, one way or
another, and lock file content */
/*
* Do any pending delalloc/csum calculations on inode, one way or
* another, and lock file content.
* The locking order is:
*
* 1) pages
* 2) range in the inode's io tree
*/
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
@@ -2851,8 +2870,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
if (ordered)
btrfs_put_ordered_extent(ordered);
if (!retry_range_locking)
return -EAGAIN;
btrfs_wait_ordered_range(inode, off, len);
}
return 0;
}
static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
@@ -2877,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
}
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
struct inode *inode2, u64 loff2, u64 len)
static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
struct inode *inode2, u64 loff2, u64 len,
bool retry_range_locking)
{
int ret;
if (inode1 < inode2) {
swap(inode1, inode2);
swap(loff1, loff2);
}
lock_extent_range(inode1, loff1, len);
lock_extent_range(inode2, loff2, len);
ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
if (ret)
return ret;
ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
if (ret)
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
loff1 + len - 1);
return ret;
}
struct cmp_pages {
@@ -2901,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
for (i = 0; i < cmp->num_pages; i++) {
pg = cmp->src_pages[i];
if (pg)
if (pg) {
unlock_page(pg);
page_cache_release(pg);
}
pg = cmp->dst_pages[i];
if (pg)
if (pg) {
unlock_page(pg);
page_cache_release(pg);
}
}
kfree(cmp->src_pages);
kfree(cmp->dst_pages);
@@ -2966,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
src_page = cmp->src_pages[i];
dst_page = cmp->dst_pages[i];
ASSERT(PageLocked(src_page));
ASSERT(PageLocked(dst_page));
addr = kmap_atomic(src_page);
dst_addr = kmap_atomic(dst_page);
@@ -3078,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
goto out_unlock;
}
again:
ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
if (ret)
goto out_unlock;
if (same_inode)
lock_extent_range(src, same_lock_start, same_lock_len);
ret = lock_extent_range(src, same_lock_start, same_lock_len,
false);
else
btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
false);
/*
* If one of the inodes has dirty pages in the respective range or
* ordered extents, we need to flush dellaloc and wait for all ordered
* extents in the range. We must unlock the pages and the ranges in the
* io trees to avoid deadlocks when flushing delalloc (requires locking
* pages) and when waiting for ordered extents to complete (they require
* range locking).
*/
if (ret == -EAGAIN) {
/*
* Ranges in the io trees already unlocked. Now unlock all
* pages before waiting for all IO to complete.
*/
btrfs_cmp_data_free(&cmp);
if (same_inode) {
btrfs_wait_ordered_range(src, same_lock_start,
same_lock_len);
} else {
btrfs_wait_ordered_range(src, loff, len);
btrfs_wait_ordered_range(dst, dst_loff, len);
}
goto again;
}
ASSERT(ret == 0);
if (WARN_ON(ret)) {
/* ranges in the io trees already unlocked */
btrfs_cmp_data_free(&cmp);
return ret;
}
/* pass original length for comparison so we stay within i_size */
ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
@@ -3795,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
u64 lock_start = min_t(u64, off, destoff);
u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
lock_extent_range(src, lock_start, lock_len);
ret = lock_extent_range(src, lock_start, lock_len, true);
} else {
btrfs_double_extent_lock(src, off, inode, destoff, len);
ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
true);
}
ASSERT(ret == 0);
if (WARN_ON(ret)) {
/* ranges in the io trees already unlocked */
goto out_unlock;
}
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);

Fájl megtekintése

@@ -396,7 +396,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
}
/* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace);
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
@@ -404,12 +404,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(!re_exist);
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
prev_dev = NULL;
@@ -451,13 +451,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
have_zone = 1;
}
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
if (!have_zone)
goto error;

Fájl megtekintése

@@ -3859,16 +3859,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EIO;
}
btrfs_dev_replace_lock(&fs_info->dev_replace);
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (dev->scrub_device ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_unlock(&fs_info->dev_replace);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EINPROGRESS;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
ret = scrub_workers_get(fs_info, is_dev_replace);
if (ret) {

Fájl megtekintése

@@ -1715,12 +1715,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
num_devices = root->fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&root->fs_info->dev_replace);
btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
@@ -3687,12 +3687,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
num_devices = fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&fs_info->dev_replace);
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -5063,10 +5063,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
ret = 1;
free_extent_map(em);
btrfs_dev_replace_lock(&fs_info->dev_replace);
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
ret++;
btrfs_dev_replace_unlock(&fs_info->dev_replace);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
return ret;
}
@@ -5326,10 +5326,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (!bbio_ret)
goto out;
btrfs_dev_replace_lock(dev_replace);
btrfs_dev_replace_lock(dev_replace, 0);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (!dev_replace_is_ongoing)
btrfs_dev_replace_unlock(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
else
btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
@@ -5752,8 +5754,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->mirror_num = map->num_stripes + 1;
}
out:
if (dev_replace_is_ongoing)
btrfs_dev_replace_unlock(dev_replace);
if (dev_replace_is_ongoing) {
btrfs_dev_replace_clear_lock_blocking(dev_replace);
btrfs_dev_replace_unlock(dev_replace, 0);
}
free_extent_map(em);
return ret;
}