Merge branch 'for-4.13-part1' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "The core updates improve error handling (mostly related to bios), with
  the usual incremental work on the GFP_NOFS (mis)use removal,
  refactoring or cleanups. Except the two top patches, all have been in
  for-next for an extensive amount of time.

  User visible changes:

   - statx support

   - quota override tunable

   - improved compression thresholds

   - obsoleted mount option alloc_start

  Core updates:

   - bio-related updates:
       - faster bio cloning
       - no allocation failures
       - preallocated flush bios

   - more kvzalloc use, memalloc_nofs protections, GFP_NOFS updates

   - prep work for btree_inode removal

   - dir-item validation

   - qgoup fixes and updates

   - cleanups:
       - removed unused struct members, unused code, refactoring
       - argument refactoring (fs_info/root, caller -> callee sink)
       - SEARCH_TREE ioctl docs"

* 'for-4.13-part1' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (115 commits)
  btrfs: Remove false alert when fiemap range is smaller than on-disk extent
  btrfs: Don't clear SGID when inheriting ACLs
  btrfs: fix integer overflow in calc_reclaim_items_nr
  btrfs: scrub: fix target device intialization while setting up scrub context
  btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
  btrfs: qgroup: Introduce extent changeset for qgroup reserve functions
  btrfs: qgroup: Fix qgroup reserved space underflow caused by buffered write and quotas being enabled
  btrfs: qgroup: Return actually freed bytes for qgroup release or free data
  btrfs: qgroup: Cleanup btrfs_qgroup_prepare_account_extents function
  btrfs: qgroup: Add quick exit for non-fs extents
  Btrfs: rework delayed ref total_bytes_pinned accounting
  Btrfs: return old and new total ref mods when adding delayed refs
  Btrfs: always account pinned bytes when dropping a tree block ref
  Btrfs: update total_bytes_pinned when pinning down extents
  Btrfs: make BUG_ON() in add_pinned_bytes() an ASSERT()
  Btrfs: make add_pinned_bytes() take an s64 num_bytes instead of u64
  btrfs: fix validation of XATTR_ITEM dir items
  btrfs: Verify dir_item in iterate_object_props
  btrfs: Check name_len before in btrfs_del_root_ref
  btrfs: Check name_len before reading btrfs_get_name
  ...
This commit is contained in:
Linus Torvalds
2017-07-05 16:41:23 -07:00
47 changed files with 1729 additions and 1421 deletions

View File

@@ -18,6 +18,7 @@
#include <linux/blkdev.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>
#include "ctree.h"
#include "volumes.h"
#include "disk-io.h"
@@ -161,14 +162,6 @@ struct scrub_parity {
unsigned long bitmap[0];
};
struct scrub_wr_ctx {
struct scrub_bio *wr_curr_bio;
struct btrfs_device *tgtdev;
int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
atomic_t flush_all_writes;
struct mutex wr_lock;
};
struct scrub_ctx {
struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
struct btrfs_fs_info *fs_info;
@@ -183,11 +176,14 @@ struct scrub_ctx {
atomic_t cancel_req;
int readonly;
int pages_per_rd_bio;
u32 sectorsize;
u32 nodesize;
int is_dev_replace;
struct scrub_wr_ctx wr_ctx;
struct scrub_bio *wr_curr_bio;
struct mutex wr_lock;
int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */
atomic_t flush_all_writes;
struct btrfs_device *wr_tgtdev;
/*
* statistics
@@ -289,10 +285,6 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
u64 *extent_physical,
struct btrfs_device **extent_dev,
int *extent_mirror_num);
static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
struct btrfs_device *dev,
int is_dev_replace);
static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
struct scrub_page *spage);
static void scrub_wr_submit(struct scrub_ctx *sctx);
@@ -643,8 +635,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
if (!sctx)
return;
scrub_free_wr_ctx(&sctx->wr_ctx);
/* this can happen when scrub is cancelled */
if (sctx->curr != -1) {
struct scrub_bio *sbio = sctx->bios[sctx->curr];
@@ -664,6 +654,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
kfree(sbio);
}
kfree(sctx->wr_curr_bio);
scrub_free_csums(sctx);
kfree(sctx);
}
@@ -680,7 +671,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
struct scrub_ctx *sctx;
int i;
struct btrfs_fs_info *fs_info = dev->fs_info;
int ret;
sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
if (!sctx)
@@ -710,8 +700,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
sctx->bios[i]->next_free = -1;
}
sctx->first_free = 0;
sctx->nodesize = fs_info->nodesize;
sctx->sectorsize = fs_info->sectorsize;
atomic_set(&sctx->bios_in_flight, 0);
atomic_set(&sctx->workers_pending, 0);
atomic_set(&sctx->cancel_req, 0);
@@ -722,12 +710,16 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
spin_lock_init(&sctx->stat_lock);
init_waitqueue_head(&sctx->list_wait);
ret = scrub_setup_wr_ctx(&sctx->wr_ctx,
fs_info->dev_replace.tgtdev, is_dev_replace);
if (ret) {
scrub_free_ctx(sctx);
return ERR_PTR(ret);
WARN_ON(sctx->wr_curr_bio != NULL);
mutex_init(&sctx->wr_lock);
sctx->wr_curr_bio = NULL;
if (is_dev_replace) {
WARN_ON(!fs_info->dev_replace.tgtdev);
sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
atomic_set(&sctx->flush_all_writes, 0);
}
return sctx;
nomem:
@@ -742,6 +734,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
u32 nlink;
int ret;
int i;
unsigned nofs_flag;
struct extent_buffer *eb;
struct btrfs_inode_item *inode_item;
struct scrub_warning *swarn = warn_ctx;
@@ -780,7 +773,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
nlink = btrfs_inode_nlink(eb, inode_item);
btrfs_release_path(swarn->path);
/*
* init_path might indirectly call vmalloc, or use GFP_KERNEL. Scrub
* uses GFP_NOFS in this context, so we keep it consistent but it does
* not seem to be strictly necessary.
*/
nofs_flag = memalloc_nofs_save();
ipath = init_ipath(4096, local_root, swarn->path);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(ipath)) {
ret = PTR_ERR(ipath);
ipath = NULL;
@@ -954,7 +954,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
ret = -EIO;
goto out;
}
ret = repair_io_failure(BTRFS_I(inode), offset, PAGE_SIZE,
ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
fixup->logical, page,
offset - page_offset(page),
fixup->mirror_num);
@@ -1737,12 +1737,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}
WARN_ON(!page->page);
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
continue;
}
bio = btrfs_io_bio_alloc(1);
bio->bi_bdev = page->dev->bdev;
bio_add_page(bio, page->page, PAGE_SIZE, 0);
@@ -1830,9 +1825,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
}
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio = btrfs_io_bio_alloc(1);
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_iter.bi_sector = page_bad->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
@@ -1898,37 +1891,31 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
struct scrub_page *spage)
{
struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
struct scrub_bio *sbio;
int ret;
mutex_lock(&wr_ctx->wr_lock);
mutex_lock(&sctx->wr_lock);
again:
if (!wr_ctx->wr_curr_bio) {
wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
if (!sctx->wr_curr_bio) {
sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
GFP_KERNEL);
if (!wr_ctx->wr_curr_bio) {
mutex_unlock(&wr_ctx->wr_lock);
if (!sctx->wr_curr_bio) {
mutex_unlock(&sctx->wr_lock);
return -ENOMEM;
}
wr_ctx->wr_curr_bio->sctx = sctx;
wr_ctx->wr_curr_bio->page_count = 0;
sctx->wr_curr_bio->sctx = sctx;
sctx->wr_curr_bio->page_count = 0;
}
sbio = wr_ctx->wr_curr_bio;
sbio = sctx->wr_curr_bio;
if (sbio->page_count == 0) {
struct bio *bio;
sbio->physical = spage->physical_for_dev_replace;
sbio->logical = spage->logical;
sbio->dev = wr_ctx->tgtdev;
sbio->dev = sctx->wr_tgtdev;
bio = sbio->bio;
if (!bio) {
bio = btrfs_io_bio_alloc(GFP_KERNEL,
wr_ctx->pages_per_wr_bio);
if (!bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
}
bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
sbio->bio = bio;
}
@@ -1951,7 +1938,7 @@ again:
if (sbio->page_count < 1) {
bio_put(sbio->bio);
sbio->bio = NULL;
mutex_unlock(&wr_ctx->wr_lock);
mutex_unlock(&sctx->wr_lock);
return -EIO;
}
scrub_wr_submit(sctx);
@@ -1961,23 +1948,22 @@ again:
sbio->pagev[sbio->page_count] = spage;
scrub_page_get(spage);
sbio->page_count++;
if (sbio->page_count == wr_ctx->pages_per_wr_bio)
if (sbio->page_count == sctx->pages_per_wr_bio)
scrub_wr_submit(sctx);
mutex_unlock(&wr_ctx->wr_lock);
mutex_unlock(&sctx->wr_lock);
return 0;
}
static void scrub_wr_submit(struct scrub_ctx *sctx)
{
struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
struct scrub_bio *sbio;
if (!wr_ctx->wr_curr_bio)
if (!sctx->wr_curr_bio)
return;
sbio = wr_ctx->wr_curr_bio;
wr_ctx->wr_curr_bio = NULL;
sbio = sctx->wr_curr_bio;
sctx->wr_curr_bio = NULL;
WARN_ON(!sbio->bio->bi_bdev);
scrub_pending_bio_inc(sctx);
/* process all writes in a single worker thread. Then the block layer
@@ -2081,7 +2067,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
page = sblock->pagev[0]->page;
buffer = kmap_atomic(page);
len = sctx->sectorsize;
len = sctx->fs_info->sectorsize;
index = 0;
for (;;) {
u64 l = min_t(u64, len, PAGE_SIZE);
@@ -2146,7 +2132,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
BTRFS_UUID_SIZE))
sblock->header_error = 1;
len = sctx->nodesize - BTRFS_CSUM_SIZE;
len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
index = 0;
@@ -2329,10 +2315,7 @@ again:
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
bio = btrfs_io_bio_alloc(GFP_KERNEL,
sctx->pages_per_rd_bio);
if (!bio)
return -ENOMEM;
bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
sbio->bio = bio;
}
@@ -2420,10 +2403,10 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work)
scrub_block_put(sblock);
if (sctx->is_dev_replace &&
atomic_read(&sctx->wr_ctx.flush_all_writes)) {
mutex_lock(&sctx->wr_ctx.wr_lock);
atomic_read(&sctx->flush_all_writes)) {
mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_ctx.wr_lock);
mutex_unlock(&sctx->wr_lock);
}
scrub_pending_bio_dec(sctx);
@@ -2458,10 +2441,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
goto bbio_out;
}
bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
goto bbio_out;
bio = btrfs_io_bio_alloc(0);
bio->bi_iter.bi_sector = logical >> 9;
bio->bi_private = sblock;
bio->bi_end_io = scrub_missing_raid56_end_io;
@@ -2628,10 +2608,10 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
spin_unlock(&sctx->list_lock);
if (sctx->is_dev_replace &&
atomic_read(&sctx->wr_ctx.flush_all_writes)) {
mutex_lock(&sctx->wr_ctx.wr_lock);
atomic_read(&sctx->flush_all_writes)) {
mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_ctx.wr_lock);
mutex_unlock(&sctx->wr_lock);
}
scrub_pending_bio_dec(sctx);
@@ -2726,8 +2706,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
if (!sum)
return 0;
index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize;
num_sectors = sum->len / sctx->sectorsize;
index = ((u32)(logical - sum->bytenr)) / sctx->fs_info->sectorsize;
num_sectors = sum->len / sctx->fs_info->sectorsize;
memcpy(csum, sum->sums + index, sctx->csum_size);
if (index == num_sectors - 1) {
list_del(&sum->list);
@@ -2746,19 +2726,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
u32 blocksize;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
blocksize = sctx->sectorsize;
blocksize = sctx->fs_info->sectorsize;
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed++;
sctx->stat.data_bytes_scrubbed += len;
spin_unlock(&sctx->stat_lock);
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
blocksize = sctx->nodesize;
blocksize = sctx->fs_info->nodesize;
spin_lock(&sctx->stat_lock);
sctx->stat.tree_extents_scrubbed++;
sctx->stat.tree_bytes_scrubbed += len;
spin_unlock(&sctx->stat_lock);
} else {
blocksize = sctx->sectorsize;
blocksize = sctx->fs_info->sectorsize;
WARN_ON(1);
}
@@ -2892,11 +2872,11 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
}
if (flags & BTRFS_EXTENT_FLAG_DATA) {
blocksize = sctx->sectorsize;
blocksize = sctx->fs_info->sectorsize;
} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
blocksize = sctx->nodesize;
blocksize = sctx->fs_info->nodesize;
} else {
blocksize = sctx->sectorsize;
blocksize = sctx->fs_info->sectorsize;
WARN_ON(1);
}
@@ -3037,10 +3017,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
if (ret || !bbio || !bbio->raid_map)
goto bbio_out;
bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
goto bbio_out;
bio = btrfs_io_bio_alloc(0);
bio->bi_iter.bi_sector = sparity->logic_start >> 9;
bio->bi_private = sparity;
bio->bi_end_io = scrub_parity_bio_endio;
@@ -3305,9 +3282,9 @@ out:
logic_end - logic_start);
scrub_parity_put(sparity);
scrub_submit(sctx);
mutex_lock(&sctx->wr_ctx.wr_lock);
mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_ctx.wr_lock);
mutex_unlock(&sctx->wr_lock);
btrfs_release_path(path);
return ret < 0 ? ret : 0;
@@ -3463,14 +3440,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
*/
if (atomic_read(&fs_info->scrub_pause_req)) {
/* push queued extents */
atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
atomic_set(&sctx->flush_all_writes, 1);
scrub_submit(sctx);
mutex_lock(&sctx->wr_ctx.wr_lock);
mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_ctx.wr_lock);
mutex_unlock(&sctx->wr_lock);
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
atomic_set(&sctx->flush_all_writes, 0);
scrub_blocked_if_needed(fs_info);
}
@@ -3677,9 +3654,9 @@ skip:
out:
/* push queued extents */
scrub_submit(sctx);
mutex_lock(&sctx->wr_ctx.wr_lock);
mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_ctx.wr_lock);
mutex_unlock(&sctx->wr_lock);
blk_finish_plug(&plug);
btrfs_free_path(path);
@@ -3859,7 +3836,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/
btrfs_wait_block_group_reservations(cache);
btrfs_wait_nocow_writers(cache);
ret = btrfs_wait_ordered_roots(fs_info, -1,
ret = btrfs_wait_ordered_roots(fs_info, U64_MAX,
cache->key.objectid,
cache->key.offset);
if (ret > 0) {
@@ -3916,11 +3893,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* write requests are really completed when bios_in_flight
* changes to 0.
*/
atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
atomic_set(&sctx->flush_all_writes, 1);
scrub_submit(sctx);
mutex_lock(&sctx->wr_ctx.wr_lock);
mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_ctx.wr_lock);
mutex_unlock(&sctx->wr_lock);
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
@@ -3934,7 +3911,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/
wait_event(sctx->list_wait,
atomic_read(&sctx->workers_pending) == 0);
atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
atomic_set(&sctx->flush_all_writes, 0);
scrub_pause_off(fs_info);
@@ -4337,32 +4314,6 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
btrfs_put_bbio(bbio);
}
static int scrub_setup_wr_ctx(struct scrub_wr_ctx *wr_ctx,
struct btrfs_device *dev,
int is_dev_replace)
{
WARN_ON(wr_ctx->wr_curr_bio != NULL);
mutex_init(&wr_ctx->wr_lock);
wr_ctx->wr_curr_bio = NULL;
if (!is_dev_replace)
return 0;
WARN_ON(!dev->bdev);
wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
wr_ctx->tgtdev = dev;
atomic_set(&wr_ctx->flush_all_writes, 0);
return 0;
}
static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
{
mutex_lock(&wr_ctx->wr_lock);
kfree(wr_ctx->wr_curr_bio);
wr_ctx->wr_curr_bio = NULL;
mutex_unlock(&wr_ctx->wr_lock);
}
static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
int mirror_num, u64 physical_for_dev_replace)
{
@@ -4665,7 +4616,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
struct btrfs_device *dev;
int ret;
dev = sctx->wr_ctx.tgtdev;
dev = sctx->wr_tgtdev;
if (!dev)
return -EIO;
if (!dev->bdev) {
@@ -4673,13 +4624,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
"scrub write_page_nocow(bdev == NULL) is unexpected");
return -EIO;
}
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
spin_unlock(&sctx->stat_lock);
return -ENOMEM;
}
bio = btrfs_io_bio_alloc(1);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;