Merge tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "Features or user visible changes:

   - fallocate: implement zero range mode

   - avoid losing data raid profile when deleting a device

   - tree item checker: more checks for directory items and xattrs

  Notable fixes:

   - raid56 recovery: don't use cached stripes, that could be
     potentially changed and a later RMW or recovery would lead to
     corruptions or failures

   - let raid56 try harder to rebuild damaged data, reading from all
     stripes if necessary

   - fix scrub to repair raid56 in a similar way as in the case above

  Other:

   - cleanups: device freeing, removed some call indirections, redundant
     bio_put/_get, unused parameters, refactorings and renames

   - RCU list traversal fixups

   - simplify mount callchain, remove recursing back when mounting a
     subvolume

   - plug for fsync, may improve bio merging on multiple devices

   - compression heurisic: replace heap sort with radix sort, gains some
     performance

   - add extent map selftests, buffered write vs dio"

* tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (155 commits)
  btrfs: drop devid as device_list_add() arg
  btrfs: get device pointer from device_list_add()
  btrfs: set the total_devices in device_list_add()
  btrfs: move pr_info into device_list_add
  btrfs: make btrfs_free_stale_devices() to match the path
  btrfs: rename btrfs_free_stale_devices() arg to skip_dev
  btrfs: make btrfs_free_stale_devices() argument optional
  btrfs: make btrfs_free_stale_device() to iterate all stales
  btrfs: no need to check for btrfs_fs_devices::seeding
  btrfs: Use IS_ALIGNED in btrfs_truncate_block instead of opencoding it
  Btrfs: noinline merge_extent_mapping
  Btrfs: add WARN_ONCE to detect unexpected error from merge_extent_mapping
  Btrfs: extent map selftest: dio write vs dio read
  Btrfs: extent map selftest: buffered write vs dio read
  Btrfs: add extent map selftests
  Btrfs: move extent map specific code to extent_map.c
  Btrfs: add helper for em merge logic
  Btrfs: fix unexpected EEXIST from btrfs_get_extent
  Btrfs: fix incorrect block_len in merge_extent_mapping
  btrfs: Remove unused readahead spinlock
  ...
This commit is contained in:
Linus Torvalds
2018-01-29 14:04:23 -08:00
47 changed files with 2268 additions and 1406 deletions

View File

@@ -33,7 +33,6 @@
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/sort.h>
#include <linux/log2.h>
#include "ctree.h"
#include "disk-io.h"
@@ -45,6 +44,21 @@
#include "extent_io.h"
#include "extent_map.h"
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
const char* btrfs_compress_type2str(enum btrfs_compression_type type)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB:
case BTRFS_COMPRESS_LZO:
case BTRFS_COMPRESS_ZSTD:
case BTRFS_COMPRESS_NONE:
return btrfs_compress_types[type];
}
return NULL;
}
static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
@@ -348,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
page->mapping = NULL;
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(bio);
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
@@ -372,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
bio_put(bio);
bio = btrfs_bio_alloc(bdev, first_byte);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
@@ -389,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
first_byte += PAGE_SIZE;
cond_resched();
}
bio_get(bio);
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@@ -405,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
bio_put(bio);
return 0;
}
@@ -638,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@@ -666,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio);
}
bio_put(comp_bio);
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
@@ -677,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
}
cur_disk_byte += PAGE_SIZE;
}
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
@@ -693,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio);
}
bio_put(comp_bio);
return 0;
fail2:
@@ -752,6 +754,8 @@ struct heuristic_ws {
u32 sample_size;
/* Buckets store counters for each byte value */
struct bucket_item *bucket;
/* Sorting buffer */
struct bucket_item *bucket_b;
struct list_head list;
};
@@ -763,6 +767,7 @@ static void free_heuristic_ws(struct list_head *ws)
kvfree(workspace->sample);
kfree(workspace->bucket);
kfree(workspace->bucket_b);
kfree(workspace);
}
@@ -782,6 +787,10 @@ static struct list_head *alloc_heuristic_ws(void)
if (!ws->bucket)
goto fail;
ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL);
if (!ws->bucket_b)
goto fail;
INIT_LIST_HEAD(&ws->list);
return &ws->list;
fail:
@@ -1278,13 +1287,103 @@ static u32 shannon_entropy(struct heuristic_ws *ws)
return entropy_sum * 100 / entropy_max;
}
/* Compare buckets by size, ascending */
static int bucket_comp_rev(const void *lv, const void *rv)
{
const struct bucket_item *l = (const struct bucket_item *)lv;
const struct bucket_item *r = (const struct bucket_item *)rv;
#define RADIX_BASE 4U
#define COUNTERS_SIZE (1U << RADIX_BASE)
return r->count - l->count;
static u8 get4bits(u64 num, int shift) {
u8 low4bits;
num >>= shift;
/* Reverse order */
low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE);
return low4bits;
}
/*
* Use 4 bits as radix base
* Use 16 u32 counters for calculating new possition in buf array
*
* @array - array that will be sorted
* @array_buf - buffer array to store sorting results
* must be equal in size to @array
* @num - array size
*/
static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
int num)
{
u64 max_num;
u64 buf_num;
u32 counters[COUNTERS_SIZE];
u32 new_addr;
u32 addr;
int bitlen;
int shift;
int i;
/*
* Try avoid useless loop iterations for small numbers stored in big
* counters. Example: 48 33 4 ... in 64bit array
*/
max_num = array[0].count;
for (i = 1; i < num; i++) {
buf_num = array[i].count;
if (buf_num > max_num)
max_num = buf_num;
}
buf_num = ilog2(max_num);
bitlen = ALIGN(buf_num, RADIX_BASE * 2);
shift = 0;
while (shift < bitlen) {
memset(counters, 0, sizeof(counters));
for (i = 0; i < num; i++) {
buf_num = array[i].count;
addr = get4bits(buf_num, shift);
counters[addr]++;
}
for (i = 1; i < COUNTERS_SIZE; i++)
counters[i] += counters[i - 1];
for (i = num - 1; i >= 0; i--) {
buf_num = array[i].count;
addr = get4bits(buf_num, shift);
counters[addr]--;
new_addr = counters[addr];
array_buf[new_addr] = array[i];
}
shift += RADIX_BASE;
/*
* Normal radix expects to move data from a temporary array, to
* the main one. But that requires some CPU time. Avoid that
* by doing another sort iteration to original array instead of
* memcpy()
*/
memset(counters, 0, sizeof(counters));
for (i = 0; i < num; i ++) {
buf_num = array_buf[i].count;
addr = get4bits(buf_num, shift);
counters[addr]++;
}
for (i = 1; i < COUNTERS_SIZE; i++)
counters[i] += counters[i - 1];
for (i = num - 1; i >= 0; i--) {
buf_num = array_buf[i].count;
addr = get4bits(buf_num, shift);
counters[addr]--;
new_addr = counters[addr];
array[new_addr] = array_buf[i];
}
shift += RADIX_BASE;
}
}
/*
@@ -1314,7 +1413,7 @@ static int byte_core_set_size(struct heuristic_ws *ws)
struct bucket_item *bucket = ws->bucket;
/* Sort in reverse order */
sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);
for (i = 0; i < BYTE_CORE_SET_LOW; i++)
coreset_sum += bucket[i].count;