Merge tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Features or user visible changes: - fallocate: implement zero range mode - avoid losing data raid profile when deleting a device - tree item checker: more checks for directory items and xattrs Notable fixes: - raid56 recovery: don't use cached stripes, that could be potentially changed and a later RMW or recovery would lead to corruptions or failures - let raid56 try harder to rebuild damaged data, reading from all stripes if necessary - fix scrub to repair raid56 in a similar way as in the case above Other: - cleanups: device freeing, removed some call indirections, redundant bio_put/_get, unused parameters, refactorings and renames - RCU list traversal fixups - simplify mount callchain, remove recursing back when mounting a subvolume - plug for fsync, may improve bio merging on multiple devices - compression heurisic: replace heap sort with radix sort, gains some performance - add extent map selftests, buffered write vs dio" * tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (155 commits) btrfs: drop devid as device_list_add() arg btrfs: get device pointer from device_list_add() btrfs: set the total_devices in device_list_add() btrfs: move pr_info into device_list_add btrfs: make btrfs_free_stale_devices() to match the path btrfs: rename btrfs_free_stale_devices() arg to skip_dev btrfs: make btrfs_free_stale_devices() argument optional btrfs: make btrfs_free_stale_device() to iterate all stales btrfs: no need to check for btrfs_fs_devices::seeding btrfs: Use IS_ALIGNED in btrfs_truncate_block instead of opencoding it Btrfs: noinline merge_extent_mapping Btrfs: add WARN_ONCE to detect unexpected error from merge_extent_mapping Btrfs: extent map selftest: dio write vs dio read Btrfs: extent map selftest: buffered write vs dio read Btrfs: add extent map selftests Btrfs: move extent map specific code to extent_map.c Btrfs: add helper for em merge logic Btrfs: fix unexpected EEXIST from btrfs_get_extent Btrfs: fix incorrect block_len in merge_extent_mapping btrfs: Remove unused readahead spinlock ...
This commit is contained in:
@@ -33,7 +33,6 @@
|
||||
#include <linux/bit_spinlock.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/log2.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
@@ -45,6 +44,21 @@
|
||||
#include "extent_io.h"
|
||||
#include "extent_map.h"
|
||||
|
||||
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
|
||||
|
||||
const char* btrfs_compress_type2str(enum btrfs_compression_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case BTRFS_COMPRESS_ZLIB:
|
||||
case BTRFS_COMPRESS_LZO:
|
||||
case BTRFS_COMPRESS_ZSTD:
|
||||
case BTRFS_COMPRESS_NONE:
|
||||
return btrfs_compress_types[type];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int btrfs_decompress_bio(struct compressed_bio *cb);
|
||||
|
||||
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
|
||||
@@ -348,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
page->mapping = NULL;
|
||||
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
|
||||
PAGE_SIZE) {
|
||||
bio_get(bio);
|
||||
|
||||
/*
|
||||
* inc the count before we submit the bio so
|
||||
* we know the end IO handler won't happen before
|
||||
@@ -372,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
|
||||
bio = btrfs_bio_alloc(bdev, first_byte);
|
||||
bio->bi_opf = REQ_OP_WRITE | write_flags;
|
||||
bio->bi_private = cb;
|
||||
@@ -389,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
first_byte += PAGE_SIZE;
|
||||
cond_resched();
|
||||
}
|
||||
bio_get(bio);
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
@@ -405,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -638,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
page->mapping = NULL;
|
||||
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
|
||||
PAGE_SIZE) {
|
||||
bio_get(comp_bio);
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
|
||||
BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
@@ -666,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
bio_endio(comp_bio);
|
||||
}
|
||||
|
||||
bio_put(comp_bio);
|
||||
|
||||
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
|
||||
bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
|
||||
comp_bio->bi_private = cb;
|
||||
@@ -677,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
}
|
||||
cur_disk_byte += PAGE_SIZE;
|
||||
}
|
||||
bio_get(comp_bio);
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
@@ -693,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
bio_endio(comp_bio);
|
||||
}
|
||||
|
||||
bio_put(comp_bio);
|
||||
return 0;
|
||||
|
||||
fail2:
|
||||
@@ -752,6 +754,8 @@ struct heuristic_ws {
|
||||
u32 sample_size;
|
||||
/* Buckets store counters for each byte value */
|
||||
struct bucket_item *bucket;
|
||||
/* Sorting buffer */
|
||||
struct bucket_item *bucket_b;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
@@ -763,6 +767,7 @@ static void free_heuristic_ws(struct list_head *ws)
|
||||
|
||||
kvfree(workspace->sample);
|
||||
kfree(workspace->bucket);
|
||||
kfree(workspace->bucket_b);
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
@@ -782,6 +787,10 @@ static struct list_head *alloc_heuristic_ws(void)
|
||||
if (!ws->bucket)
|
||||
goto fail;
|
||||
|
||||
ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL);
|
||||
if (!ws->bucket_b)
|
||||
goto fail;
|
||||
|
||||
INIT_LIST_HEAD(&ws->list);
|
||||
return &ws->list;
|
||||
fail:
|
||||
@@ -1278,13 +1287,103 @@ static u32 shannon_entropy(struct heuristic_ws *ws)
|
||||
return entropy_sum * 100 / entropy_max;
|
||||
}
|
||||
|
||||
/* Compare buckets by size, ascending */
|
||||
static int bucket_comp_rev(const void *lv, const void *rv)
|
||||
{
|
||||
const struct bucket_item *l = (const struct bucket_item *)lv;
|
||||
const struct bucket_item *r = (const struct bucket_item *)rv;
|
||||
#define RADIX_BASE 4U
|
||||
#define COUNTERS_SIZE (1U << RADIX_BASE)
|
||||
|
||||
return r->count - l->count;
|
||||
static u8 get4bits(u64 num, int shift) {
|
||||
u8 low4bits;
|
||||
|
||||
num >>= shift;
|
||||
/* Reverse order */
|
||||
low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE);
|
||||
return low4bits;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use 4 bits as radix base
|
||||
* Use 16 u32 counters for calculating new possition in buf array
|
||||
*
|
||||
* @array - array that will be sorted
|
||||
* @array_buf - buffer array to store sorting results
|
||||
* must be equal in size to @array
|
||||
* @num - array size
|
||||
*/
|
||||
static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
|
||||
int num)
|
||||
{
|
||||
u64 max_num;
|
||||
u64 buf_num;
|
||||
u32 counters[COUNTERS_SIZE];
|
||||
u32 new_addr;
|
||||
u32 addr;
|
||||
int bitlen;
|
||||
int shift;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Try avoid useless loop iterations for small numbers stored in big
|
||||
* counters. Example: 48 33 4 ... in 64bit array
|
||||
*/
|
||||
max_num = array[0].count;
|
||||
for (i = 1; i < num; i++) {
|
||||
buf_num = array[i].count;
|
||||
if (buf_num > max_num)
|
||||
max_num = buf_num;
|
||||
}
|
||||
|
||||
buf_num = ilog2(max_num);
|
||||
bitlen = ALIGN(buf_num, RADIX_BASE * 2);
|
||||
|
||||
shift = 0;
|
||||
while (shift < bitlen) {
|
||||
memset(counters, 0, sizeof(counters));
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
buf_num = array[i].count;
|
||||
addr = get4bits(buf_num, shift);
|
||||
counters[addr]++;
|
||||
}
|
||||
|
||||
for (i = 1; i < COUNTERS_SIZE; i++)
|
||||
counters[i] += counters[i - 1];
|
||||
|
||||
for (i = num - 1; i >= 0; i--) {
|
||||
buf_num = array[i].count;
|
||||
addr = get4bits(buf_num, shift);
|
||||
counters[addr]--;
|
||||
new_addr = counters[addr];
|
||||
array_buf[new_addr] = array[i];
|
||||
}
|
||||
|
||||
shift += RADIX_BASE;
|
||||
|
||||
/*
|
||||
* Normal radix expects to move data from a temporary array, to
|
||||
* the main one. But that requires some CPU time. Avoid that
|
||||
* by doing another sort iteration to original array instead of
|
||||
* memcpy()
|
||||
*/
|
||||
memset(counters, 0, sizeof(counters));
|
||||
|
||||
for (i = 0; i < num; i ++) {
|
||||
buf_num = array_buf[i].count;
|
||||
addr = get4bits(buf_num, shift);
|
||||
counters[addr]++;
|
||||
}
|
||||
|
||||
for (i = 1; i < COUNTERS_SIZE; i++)
|
||||
counters[i] += counters[i - 1];
|
||||
|
||||
for (i = num - 1; i >= 0; i--) {
|
||||
buf_num = array_buf[i].count;
|
||||
addr = get4bits(buf_num, shift);
|
||||
counters[addr]--;
|
||||
new_addr = counters[addr];
|
||||
array[new_addr] = array_buf[i];
|
||||
}
|
||||
|
||||
shift += RADIX_BASE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1314,7 +1413,7 @@ static int byte_core_set_size(struct heuristic_ws *ws)
|
||||
struct bucket_item *bucket = ws->bucket;
|
||||
|
||||
/* Sort in reverse order */
|
||||
sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
|
||||
radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);
|
||||
|
||||
for (i = 0; i < BYTE_CORE_SET_LOW; i++)
|
||||
coreset_sum += bucket[i].count;
|
||||
|
Reference in New Issue
Block a user