Merge branch 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block
Pull followup block layer updates from Jens Axboe: "I ended up splitting the main pull request for this series into two, mainly because of clashes between NVMe fixes that went into 4.13 after the for-4.14 branches were split off. This pull request is mostly NVMe, but not exclusively. In detail, it contains: - Two pull request for NVMe changes from Christoph. Nothing new on the feature front, basically just fixes all over the map for the core bits, transport, rdma, etc. - Series from Bart, cleaning up various bits in the BFQ scheduler. - Series of bcache fixes, which has been lingering for a release or two. Coly sent this in, but patches from various people in this area. - Set of patches for BFQ from Paolo himself, updating both documentation and fixing some corner cases in performance. - Series from Omar, attempting to now get the 4k loop support correct. Our confidence level is higher this time. - Series from Shaohua for loop as well, improving O_DIRECT performance and fixing a use-after-free" * 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block: (74 commits) bcache: initialize dirty stripes in flash_dev_run() loop: set physical block size to logical block size bcache: fix bch_hprint crash and improve output bcache: Update continue_at() documentation bcache: silence static checker warning bcache: fix for gc and write-back race bcache: increase the number of open buckets bcache: Correct return value for sysfs attach errors bcache: correct cache_dirty_target in __update_writeback_rate() bcache: gc does not work when triggering by manual command bcache: Don't reinvent the wheel but use existing llist API bcache: do not subtract sectors_to_gc for bypassed IO bcache: fix sequential large write IO bypass bcache: Fix leak of bdev reference block/loop: remove unused field block/loop: fix use after free bfq: Use icq_to_bic() consistently bfq: Suppress compiler warnings about comparisons bfq: Check kstrtoul() return value bfq: Declare local functions static ...
This commit is contained in:
@@ -68,6 +68,8 @@
|
||||
#include <linux/random.h>
|
||||
#include <trace/events/bcache.h>
|
||||
|
||||
#define MAX_OPEN_BUCKETS 128
|
||||
|
||||
/* Bucket heap / gen */
|
||||
|
||||
uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
|
||||
@@ -671,7 +673,7 @@ int bch_open_buckets_alloc(struct cache_set *c)
|
||||
|
||||
spin_lock_init(&c->data_bucket_lock);
|
||||
|
||||
for (i = 0; i < 6; i++) {
|
||||
for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
|
||||
struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
|
||||
if (!b)
|
||||
return -ENOMEM;
|
||||
|
@@ -333,6 +333,7 @@ struct cached_dev {
|
||||
/* Limit number of writeback bios in flight */
|
||||
struct semaphore in_flight;
|
||||
struct task_struct *writeback_thread;
|
||||
struct workqueue_struct *writeback_write_wq;
|
||||
|
||||
struct keybuf writeback_keys;
|
||||
|
||||
|
@@ -70,21 +70,10 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
|
||||
list = llist_del_all(&wait_list->list);
|
||||
|
||||
/* We first reverse the list to preserve FIFO ordering and fairness */
|
||||
|
||||
while (list) {
|
||||
struct llist_node *t = list;
|
||||
list = llist_next(list);
|
||||
|
||||
t->next = reverse;
|
||||
reverse = t;
|
||||
}
|
||||
reverse = llist_reverse_order(list);
|
||||
|
||||
/* Then do the wakeups */
|
||||
|
||||
while (reverse) {
|
||||
cl = container_of(reverse, struct closure, list);
|
||||
reverse = llist_next(reverse);
|
||||
|
||||
llist_for_each_entry(cl, reverse, list) {
|
||||
closure_set_waiting(cl, 0);
|
||||
closure_sub(cl, CLOSURE_WAITING + 1);
|
||||
}
|
||||
|
@@ -312,8 +312,6 @@ static inline void closure_wake_up(struct closure_waitlist *list)
|
||||
* been dropped with closure_put()), it will resume execution at @fn running out
|
||||
* of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
|
||||
*
|
||||
* NOTE: This macro expands to a return in the calling function!
|
||||
*
|
||||
* This is because after calling continue_at() you no longer have a ref on @cl,
|
||||
* and whatever @cl owns may be freed out from under you - a running closure fn
|
||||
* has a ref on its own closure which continue_at() drops.
|
||||
@@ -340,8 +338,6 @@ do { \
|
||||
* Causes @fn to be executed out of @cl, in @wq context (or called directly if
|
||||
* @wq is NULL).
|
||||
*
|
||||
* NOTE: like continue_at(), this macro expands to a return in the caller!
|
||||
*
|
||||
* The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
|
||||
* thus it's not safe to touch anything protected by @cl after a
|
||||
* continue_at_nobarrier().
|
||||
|
@@ -196,12 +196,12 @@ static void bch_data_insert_start(struct closure *cl)
|
||||
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
|
||||
struct bio *bio = op->bio, *n;
|
||||
|
||||
if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
|
||||
wake_up_gc(op->c);
|
||||
|
||||
if (op->bypass)
|
||||
return bch_data_invalidate(cl);
|
||||
|
||||
if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
|
||||
wake_up_gc(op->c);
|
||||
|
||||
/*
|
||||
* Journal writes are marked REQ_PREFLUSH; if the original write was a
|
||||
* flush, it'll wait on the journal write.
|
||||
@@ -400,12 +400,6 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
|
||||
if (!congested && !dc->sequential_cutoff)
|
||||
goto rescale;
|
||||
|
||||
if (!congested &&
|
||||
mode == CACHE_MODE_WRITEBACK &&
|
||||
op_is_write(bio->bi_opf) &&
|
||||
op_is_sync(bio->bi_opf))
|
||||
goto rescale;
|
||||
|
||||
spin_lock(&dc->io_lock);
|
||||
|
||||
hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
|
||||
|
@@ -1026,7 +1026,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
|
||||
}
|
||||
|
||||
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
|
||||
bch_sectors_dirty_init(dc);
|
||||
bch_sectors_dirty_init(&dc->disk);
|
||||
atomic_set(&dc->has_dirty, 1);
|
||||
atomic_inc(&dc->count);
|
||||
bch_writeback_queue(dc);
|
||||
@@ -1059,6 +1059,8 @@ static void cached_dev_free(struct closure *cl)
|
||||
cancel_delayed_work_sync(&dc->writeback_rate_update);
|
||||
if (!IS_ERR_OR_NULL(dc->writeback_thread))
|
||||
kthread_stop(dc->writeback_thread);
|
||||
if (dc->writeback_write_wq)
|
||||
destroy_workqueue(dc->writeback_write_wq);
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
|
||||
@@ -1228,6 +1230,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
|
||||
goto err;
|
||||
|
||||
bcache_device_attach(d, c, u - c->uuids);
|
||||
bch_sectors_dirty_init(d);
|
||||
bch_flash_dev_request_init(d);
|
||||
add_disk(d->disk);
|
||||
|
||||
@@ -1374,9 +1377,6 @@ static void cache_set_flush(struct closure *cl)
|
||||
struct btree *b;
|
||||
unsigned i;
|
||||
|
||||
if (!c)
|
||||
closure_return(cl);
|
||||
|
||||
bch_cache_accounting_destroy(&c->accounting);
|
||||
|
||||
kobject_put(&c->internal);
|
||||
@@ -1964,6 +1964,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
|
||||
else
|
||||
err = "device busy";
|
||||
mutex_unlock(&bch_register_lock);
|
||||
if (!IS_ERR(bdev))
|
||||
bdput(bdev);
|
||||
if (attr == &ksysfs_register_quiet)
|
||||
goto out;
|
||||
}
|
||||
|
@@ -192,7 +192,7 @@ STORE(__cached_dev)
|
||||
{
|
||||
struct cached_dev *dc = container_of(kobj, struct cached_dev,
|
||||
disk.kobj);
|
||||
unsigned v = size;
|
||||
ssize_t v = size;
|
||||
struct cache_set *c;
|
||||
struct kobj_uevent_env *env;
|
||||
|
||||
@@ -227,7 +227,7 @@ STORE(__cached_dev)
|
||||
bch_cached_dev_run(dc);
|
||||
|
||||
if (attr == &sysfs_cache_mode) {
|
||||
ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1);
|
||||
v = bch_read_string_list(buf, bch_cache_modes + 1);
|
||||
|
||||
if (v < 0)
|
||||
return v;
|
||||
@@ -615,8 +615,21 @@ STORE(__bch_cache_set)
|
||||
bch_cache_accounting_clear(&c->accounting);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_trigger_gc)
|
||||
if (attr == &sysfs_trigger_gc) {
|
||||
/*
|
||||
* Garbage collection thread only works when sectors_to_gc < 0,
|
||||
* when users write to sysfs entry trigger_gc, most of time
|
||||
* they want to forcibly triger gargage collection. Here -1 is
|
||||
* set to c->sectors_to_gc, to make gc_should_run() give a
|
||||
* chance to permit gc thread to run. "give a chance" means
|
||||
* before going into gc_should_run(), there is still chance
|
||||
* that c->sectors_to_gc being set to other positive value. So
|
||||
* writing sysfs entry trigger_gc won't always make sure gc
|
||||
* thread takes effect.
|
||||
*/
|
||||
atomic_set(&c->sectors_to_gc, -1);
|
||||
wake_up_gc(c);
|
||||
}
|
||||
|
||||
if (attr == &sysfs_prune_cache) {
|
||||
struct shrink_control sc;
|
||||
|
@@ -74,24 +74,44 @@ STRTO_H(strtouint, unsigned int)
|
||||
STRTO_H(strtoll, long long)
|
||||
STRTO_H(strtoull, unsigned long long)
|
||||
|
||||
/**
|
||||
* bch_hprint() - formats @v to human readable string for sysfs.
|
||||
*
|
||||
* @v - signed 64 bit integer
|
||||
* @buf - the (at least 8 byte) buffer to format the result into.
|
||||
*
|
||||
* Returns the number of bytes used by format.
|
||||
*/
|
||||
ssize_t bch_hprint(char *buf, int64_t v)
|
||||
{
|
||||
static const char units[] = "?kMGTPEZY";
|
||||
char dec[4] = "";
|
||||
int u, t = 0;
|
||||
int u = 0, t;
|
||||
|
||||
for (u = 0; v >= 1024 || v <= -1024; u++) {
|
||||
t = v & ~(~0 << 10);
|
||||
v >>= 10;
|
||||
}
|
||||
uint64_t q;
|
||||
|
||||
if (!u)
|
||||
return sprintf(buf, "%llu", v);
|
||||
if (v < 0)
|
||||
q = -v;
|
||||
else
|
||||
q = v;
|
||||
|
||||
if (v < 100 && v > -100)
|
||||
snprintf(dec, sizeof(dec), ".%i", t / 100);
|
||||
/* For as long as the number is more than 3 digits, but at least
|
||||
* once, shift right / divide by 1024. Keep the remainder for
|
||||
* a digit after the decimal point.
|
||||
*/
|
||||
do {
|
||||
u++;
|
||||
|
||||
return sprintf(buf, "%lli%s%c", v, dec, units[u]);
|
||||
t = q & ~(~0 << 10);
|
||||
q >>= 10;
|
||||
} while (q >= 1000);
|
||||
|
||||
if (v < 0)
|
||||
/* '-', up to 3 digits, '.', 1 digit, 1 character, null;
|
||||
* yields 8 bytes.
|
||||
*/
|
||||
return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]);
|
||||
else
|
||||
return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]);
|
||||
}
|
||||
|
||||
ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
|
||||
|
@@ -21,7 +21,8 @@
|
||||
static void __update_writeback_rate(struct cached_dev *dc)
|
||||
{
|
||||
struct cache_set *c = dc->disk.c;
|
||||
uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size;
|
||||
uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size -
|
||||
bcache_flash_devs_sectors_dirty(c);
|
||||
uint64_t cache_dirty_target =
|
||||
div_u64(cache_sectors * dc->writeback_percent, 100);
|
||||
|
||||
@@ -186,7 +187,7 @@ static void write_dirty(struct closure *cl)
|
||||
|
||||
closure_bio_submit(&io->bio, cl);
|
||||
|
||||
continue_at(cl, write_dirty_finish, system_wq);
|
||||
continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
|
||||
}
|
||||
|
||||
static void read_dirty_endio(struct bio *bio)
|
||||
@@ -206,7 +207,7 @@ static void read_dirty_submit(struct closure *cl)
|
||||
|
||||
closure_bio_submit(&io->bio, cl);
|
||||
|
||||
continue_at(cl, write_dirty, system_wq);
|
||||
continue_at(cl, write_dirty, io->dc->writeback_write_wq);
|
||||
}
|
||||
|
||||
static void read_dirty(struct cached_dev *dc)
|
||||
@@ -481,17 +482,17 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
|
||||
return MAP_CONTINUE;
|
||||
}
|
||||
|
||||
void bch_sectors_dirty_init(struct cached_dev *dc)
|
||||
void bch_sectors_dirty_init(struct bcache_device *d)
|
||||
{
|
||||
struct sectors_dirty_init op;
|
||||
|
||||
bch_btree_op_init(&op.op, -1);
|
||||
op.inode = dc->disk.id;
|
||||
op.inode = d->id;
|
||||
|
||||
bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0),
|
||||
bch_btree_map_keys(&op.op, d->c, &KEY(op.inode, 0, 0),
|
||||
sectors_dirty_init_fn, 0);
|
||||
|
||||
dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
|
||||
d->sectors_dirty_last = bcache_dev_sectors_dirty(d);
|
||||
}
|
||||
|
||||
void bch_cached_dev_writeback_init(struct cached_dev *dc)
|
||||
@@ -515,6 +516,11 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
|
||||
|
||||
int bch_cached_dev_writeback_start(struct cached_dev *dc)
|
||||
{
|
||||
dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq",
|
||||
WQ_MEM_RECLAIM, 0);
|
||||
if (!dc->writeback_write_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
|
||||
"bcache_writeback");
|
||||
if (IS_ERR(dc->writeback_thread))
|
||||
|
@@ -14,6 +14,25 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
|
||||
{
|
||||
uint64_t i, ret = 0;
|
||||
|
||||
mutex_lock(&bch_register_lock);
|
||||
|
||||
for (i = 0; i < c->nr_uuids; i++) {
|
||||
struct bcache_device *d = c->devices[i];
|
||||
|
||||
if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
|
||||
continue;
|
||||
ret += bcache_dev_sectors_dirty(d);
|
||||
}
|
||||
|
||||
mutex_unlock(&bch_register_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned offset_to_stripe(struct bcache_device *d,
|
||||
uint64_t offset)
|
||||
{
|
||||
@@ -84,7 +103,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
|
||||
|
||||
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
|
||||
|
||||
void bch_sectors_dirty_init(struct cached_dev *dc);
|
||||
void bch_sectors_dirty_init(struct bcache_device *);
|
||||
void bch_cached_dev_writeback_init(struct cached_dev *);
|
||||
int bch_cached_dev_writeback_start(struct cached_dev *);
|
||||
|
||||
|
Reference in New Issue
Block a user