Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe:
 "It's a pretty quiet round this time, which is nice. This contains:

   - series from Bart, cleaning up the way we set/test/clear atomic
     queue flags.

   - series from Bart, fixing races between gendisk and queue
     registration and removal.

   - set of bcache fixes and improvements from various folks, by way of
     Michael Lyle.

   - set of lightnvm updates from Matias, most of it being the 1.2 to
     2.0 transition.

   - removal of unused DIO flags from Nikolay.

   - blk-mq/sbitmap memory ordering fixes from Omar.

   - divide-by-zero fix for BFQ from Paolo.

   - minor documentation patches from Randy.

   - timeout fix from Tejun.

   - Alpha "can't write a char atomically" fix from Mikulas.

   - set of NVMe fixes by way of Keith.

   - bsg and bsg-lib improvements from Christoph.

   - a few sed-opal fixes from Jonas.

   - cdrom check-disk-change deadlock fix from Maurizio.

   - various little fixes, comment fixes, etc from various folks"

* tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits)
  blk-mq: Directly schedule q->timeout_work when aborting a request
  blktrace: fix comment in blktrace_api.h
  lightnvm: remove function name in strings
  lightnvm: pblk: remove some unnecessary NULL checks
  lightnvm: pblk: don't recover unwritten lines
  lightnvm: pblk: implement 2.0 support
  lightnvm: pblk: implement get log report chunk
  lightnvm: pblk: rename ppaf* to addrf*
  lightnvm: pblk: check for supported version
  lightnvm: implement get log report chunk helpers
  lightnvm: make address conversions depend on generic device
  lightnvm: add support for 2.0 address format
  lightnvm: normalize geometry nomenclature
  lightnvm: complete geo structure with maxoc*
  lightnvm: add shorten OCSSD version in geo
  lightnvm: add minor version to generic geometry
  lightnvm: simplify geometry structure
  lightnvm: pblk: refactor init/exit sequences
  lightnvm: Avoid validation of default op value
  lightnvm: centralize permission check for lightnvm ioctl
  ...
Tento commit je obsažen v:
Linus Torvalds
2018-04-05 14:27:02 -07:00
148 změnil soubory, kde provedl 4312 přidání a 2106 odebrání

Zobrazit soubor

@@ -287,7 +287,8 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
if (kthread_should_stop()) { \
if (kthread_should_stop() || \
test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
set_current_state(TASK_RUNNING); \
return 0; \
} \

Zobrazit soubor

@@ -188,6 +188,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include "bset.h"
#include "util.h"
@@ -258,10 +259,11 @@ struct bcache_device {
struct gendisk *disk;
unsigned long flags;
#define BCACHE_DEV_CLOSING 0
#define BCACHE_DEV_DETACHING 1
#define BCACHE_DEV_UNLINK_DONE 2
#define BCACHE_DEV_CLOSING 0
#define BCACHE_DEV_DETACHING 1
#define BCACHE_DEV_UNLINK_DONE 2
#define BCACHE_DEV_WB_RUNNING 3
#define BCACHE_DEV_RATE_DW_RUNNING 4
unsigned nr_stripes;
unsigned stripe_size;
atomic_t *stripe_sectors_dirty;
@@ -286,6 +288,12 @@ struct io {
sector_t last;
};
enum stop_on_failure {
BCH_CACHED_DEV_STOP_AUTO = 0,
BCH_CACHED_DEV_STOP_ALWAYS,
BCH_CACHED_DEV_STOP_MODE_MAX,
};
struct cached_dev {
struct list_head list;
struct bcache_device disk;
@@ -359,6 +367,7 @@ struct cached_dev {
unsigned sequential_cutoff;
unsigned readahead;
unsigned io_disable:1;
unsigned verify:1;
unsigned bypass_torture_test:1;
@@ -378,6 +387,11 @@ struct cached_dev {
unsigned writeback_rate_i_term_inverse;
unsigned writeback_rate_p_term_inverse;
unsigned writeback_rate_minimum;
enum stop_on_failure stop_when_cache_set_failed;
#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
atomic_t io_errors;
unsigned error_limit;
};
enum alloc_reserve {
@@ -474,10 +488,15 @@ struct gc_stat {
*
* CACHE_SET_RUNNING means all cache devices have been registered and journal
* replay is complete.
*
* CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
* external and internal I/O should be denied when this flag is set.
*
*/
#define CACHE_SET_UNREGISTERING 0
#define CACHE_SET_STOPPING 1
#define CACHE_SET_RUNNING 2
#define CACHE_SET_IO_DISABLE 3
struct cache_set {
struct closure cl;
@@ -867,8 +886,36 @@ static inline void wake_up_allocators(struct cache_set *c)
wake_up_process(ca->alloc_thread);
}
static inline void closure_bio_submit(struct cache_set *c,
struct bio *bio,
struct closure *cl)
{
closure_get(cl);
if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
}
generic_make_request(bio);
}
/*
* Prevent the kthread exits directly, and make sure when kthread_stop()
* is called to stop a kthread, it is still alive. If a kthread might be
* stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is
* necessary before the kthread returns.
*/
static inline void wait_for_kthread_stop(void)
{
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
}
}
/* Forward declarations */
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
blk_status_t, const char *);
@@ -896,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
unsigned, unsigned, bool);
bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *, const char *, ...);
@@ -905,6 +953,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *);
extern struct workqueue_struct *bcache_wq;
extern const char * const bch_cache_modes[];
extern const char * const bch_stop_on_failure_modes[];
extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;

Zobrazit soubor

@@ -1072,7 +1072,7 @@ EXPORT_SYMBOL(bch_btree_iter_init);
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
btree_iter_cmp_fn *cmp)
{
struct btree_iter_set unused;
struct btree_iter_set b __maybe_unused;
struct bkey *ret = NULL;
if (!btree_iter_end(iter)) {
@@ -1087,7 +1087,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
}
if (iter->data->k == iter->data->end)
heap_pop(iter, unused, cmp);
heap_pop(iter, b, cmp);
else
heap_sift(iter, 0, cmp);
}

Zobrazit soubor

@@ -531,14 +531,15 @@ int __bch_keylist_realloc(struct keylist *, unsigned);
#ifdef CONFIG_BCACHE_DEBUG
int __bch_count_data(struct btree_keys *);
void __bch_check_keys(struct btree_keys *, const char *, ...);
void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
void bch_dump_bucket(struct btree_keys *);
#else
static inline int __bch_count_data(struct btree_keys *b) { return -1; }
static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
static inline void __printf(2, 3)
__bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
static inline void bch_dump_bucket(struct btree_keys *b) {}
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);

Zobrazit soubor

@@ -665,6 +665,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
struct btree *b, *t;
unsigned long i, nr = sc->nr_to_scan;
unsigned long freed = 0;
unsigned int btree_cache_used;
if (c->shrinker_disabled)
return SHRINK_STOP;
@@ -689,9 +690,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
nr = min_t(unsigned long, nr, mca_can_free(c));
i = 0;
btree_cache_used = c->btree_cache_used;
list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
if (freed >= nr)
break;
if (nr <= 0)
goto out;
if (++i > 3 &&
!mca_reap(b, 0, false)) {
@@ -699,9 +701,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
rw_unlock(true, b);
freed++;
}
nr--;
}
for (i = 0; (nr--) && i < c->btree_cache_used; i++) {
for (; (nr--) && i < btree_cache_used; i++) {
if (list_empty(&c->btree_cache))
goto out;
@@ -719,7 +722,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
}
out:
mutex_unlock(&c->bucket_lock);
return freed;
return freed * c->btree_pages;
}
static unsigned long bch_mca_count(struct shrinker *shrink,
@@ -959,7 +962,7 @@ err:
return b;
}
/**
/*
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
*
@@ -1744,6 +1747,7 @@ static void bch_btree_gc(struct cache_set *c)
btree_gc_start(c);
/* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
do {
ret = btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
@@ -1751,7 +1755,7 @@ static void bch_btree_gc(struct cache_set *c)
if (ret && ret != -EAGAIN)
pr_warn("gc failed!");
} while (ret);
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
bch_btree_gc_finish(c);
wake_up_allocators(c);
@@ -1789,15 +1793,19 @@ static int bch_gc_thread(void *arg)
while (1) {
wait_event_interruptible(c->gc_wait,
kthread_should_stop() || gc_should_run(c));
kthread_should_stop() ||
test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
gc_should_run(c));
if (kthread_should_stop())
if (kthread_should_stop() ||
test_bit(CACHE_SET_IO_DISABLE, &c->flags))
break;
set_gc_sectors(c);
bch_btree_gc(c);
}
wait_for_kthread_stop();
return 0;
}
@@ -2170,7 +2178,7 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
if (b->key.ptr[0] != btree_ptr ||
b->seq != seq + 1) {
op->lock = b->level;
op->lock = b->level;
goto out;
}
}

Zobrazit soubor

@@ -46,7 +46,7 @@ void closure_sub(struct closure *cl, int v)
}
EXPORT_SYMBOL(closure_sub);
/**
/*
* closure_put - decrement a closure's refcount
*/
void closure_put(struct closure *cl)
@@ -55,7 +55,7 @@ void closure_put(struct closure *cl)
}
EXPORT_SYMBOL(closure_put);
/**
/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
*/
void __closure_wake_up(struct closure_waitlist *wait_list)
@@ -79,9 +79,9 @@ EXPORT_SYMBOL(__closure_wake_up);
/**
* closure_wait - add a closure to a waitlist
*
* @waitlist will own a ref on @cl, which will be released when
* @waitlist: will own a ref on @cl, which will be released when
* closure_wake_up() is called on @waitlist.
* @cl: closure pointer.
*
*/
bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
@@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl)
}
EXPORT_SYMBOL(closure_debug_destroy);
static struct dentry *debug;
static struct dentry *closure_debug;
static int debug_seq_show(struct seq_file *f, void *data)
{
@@ -199,11 +199,12 @@ static const struct file_operations debug_ops = {
.release = single_release
};
void __init closure_debug_init(void)
int __init closure_debug_init(void)
{
debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
closure_debug = debugfs_create_file("closures",
0400, bcache_debug, NULL, &debug_ops);
return IS_ERR_OR_NULL(closure_debug);
}
#endif
MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");

Zobrazit soubor

@@ -105,6 +105,7 @@
struct closure;
struct closure_syncer;
typedef void (closure_fn) (struct closure *);
extern struct dentry *bcache_debug;
struct closure_waitlist {
struct llist_head list;
@@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
void closure_debug_init(void);
int closure_debug_init(void);
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);
#else
static inline void closure_debug_init(void) {}
static inline int closure_debug_init(void) { return 0; }
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}

Zobrazit soubor

@@ -17,7 +17,7 @@
#include <linux/random.h>
#include <linux/seq_file.h>
static struct dentry *debug;
struct dentry *bcache_debug;
#ifdef CONFIG_BCACHE_DEBUG
@@ -232,11 +232,11 @@ static const struct file_operations cache_set_debug_ops = {
void bch_debug_init_cache_set(struct cache_set *c)
{
if (!IS_ERR_OR_NULL(debug)) {
if (!IS_ERR_OR_NULL(bcache_debug)) {
char name[50];
snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
c->debug = debugfs_create_file(name, 0400, debug, c,
c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
&cache_set_debug_ops);
}
}
@@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c)
void bch_debug_exit(void)
{
if (!IS_ERR_OR_NULL(debug))
debugfs_remove_recursive(debug);
if (!IS_ERR_OR_NULL(bcache_debug))
debugfs_remove_recursive(bcache_debug);
}
int __init bch_debug_init(struct kobject *kobj)
{
debug = debugfs_create_dir("bcache", NULL);
bcache_debug = debugfs_create_dir("bcache", NULL);
return IS_ERR_OR_NULL(debug);
return IS_ERR_OR_NULL(bcache_debug);
}

Zobrazit soubor

@@ -534,7 +534,6 @@ err:
static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
struct bucket *g;
unsigned i, stale;
if (!KEY_PTRS(k) ||
@@ -549,7 +548,6 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
return false;
for (i = 0; i < KEY_PTRS(k); i++) {
g = PTR_BUCKET(b->c, k, i);
stale = ptr_stale(b->c, k, i);
btree_bug_on(stale > 96, b,

Zobrazit soubor

@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
b->submit_time_us = local_clock_us();
closure_bio_submit(bio, bio->bi_private);
closure_bio_submit(c, bio, bio->bi_private);
}
void bch_submit_bbio(struct bio *bio, struct cache_set *c,
@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
}
/* IO errors */
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
{
char buf[BDEVNAME_SIZE];
unsigned errors;
WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
errors = atomic_add_return(1, &dc->io_errors);
if (errors < dc->error_limit)
pr_err("%s: IO error on backing device, unrecoverable",
bio_devname(bio, buf));
else
bch_cached_dev_error(dc);
}
void bch_count_io_errors(struct cache *ca,
blk_status_t error,

Zobrazit soubor

@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
bch_bio_map(bio, data);
closure_bio_submit(bio, &cl);
closure_bio_submit(ca->set, bio, &cl);
closure_sync(&cl);
/* This function could be simpler now since we no longer write
@@ -493,7 +493,7 @@ static void journal_reclaim(struct cache_set *c)
struct cache *ca;
uint64_t last_seq;
unsigned iter, n = 0;
atomic_t p;
atomic_t p __maybe_unused;
atomic_long_inc(&c->reclaim);
@@ -594,6 +594,7 @@ static void journal_write_done(struct closure *cl)
}
static void journal_write_unlock(struct closure *cl)
__releases(&c->journal.lock)
{
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
@@ -674,7 +675,7 @@ static void journal_write_unlocked(struct closure *cl)
spin_unlock(&c->journal.lock);
while ((bio = bio_list_pop(&list)))
closure_bio_submit(bio, cl);
closure_bio_submit(c, bio, cl);
continue_at(cl, journal_write_done, NULL);
}
@@ -705,6 +706,7 @@ static void journal_try_write(struct cache_set *c)
static struct journal_write *journal_wait_for_write(struct cache_set *c,
unsigned nkeys)
__acquires(&c->journal.lock)
{
size_t sectors;
struct closure cl;

Zobrazit soubor

@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
}
op->insert_data_done = true;
/* get in bch_data_insert() */
bio_put(bio);
out:
continue_at(cl, bch_data_insert_keys, op->wq);
@@ -295,6 +296,7 @@ err:
/**
* bch_data_insert - stick some data in the cache
* @cl: closure pointer.
*
* This is the starting point for any data to end up in a cache device; it could
* be from a normal write, or a writeback write, or a write to a flash only
@@ -630,6 +632,41 @@ static void request_endio(struct bio *bio)
closure_put(cl);
}
static void backing_request_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
struct cached_dev *dc = container_of(s->d,
struct cached_dev, disk);
/*
* If a bio has REQ_PREFLUSH for writeback mode, it is
* speically assembled in cached_dev_write() for a non-zero
* write request which has REQ_PREFLUSH. we don't set
* s->iop.status by this failure, the status will be decided
* by result of bch_data_insert() operation.
*/
if (unlikely(s->iop.writeback &&
bio->bi_opf & REQ_PREFLUSH)) {
char buf[BDEVNAME_SIZE];
bio_devname(bio, buf);
pr_err("Can't flush %s: returned bi_status %i",
buf, bio->bi_status);
} else {
/* set to orig_bio->bi_status in bio_complete() */
s->iop.status = bio->bi_status;
}
s->recoverable = false;
/* should count I/O error for backing device here */
bch_count_backing_io_errors(dc, bio);
}
bio_put(bio);
closure_put(cl);
}
static void bio_complete(struct search *s)
{
if (s->orig_bio) {
@@ -644,13 +681,21 @@ static void bio_complete(struct search *s)
}
}
static void do_bio_hook(struct search *s, struct bio *orig_bio)
static void do_bio_hook(struct search *s,
struct bio *orig_bio,
bio_end_io_t *end_io_fn)
{
struct bio *bio = &s->bio.bio;
bio_init(bio, NULL, 0);
__bio_clone_fast(bio, orig_bio);
bio->bi_end_io = request_endio;
/*
* bi_end_io can be set separately somewhere else, e.g. the
* variants in,
* - cache_bio->bi_end_io from cached_dev_cache_miss()
* - n->bi_end_io from cache_lookup_fn()
*/
bio->bi_end_io = end_io_fn;
bio->bi_private = &s->cl;
bio_cnt_set(bio, 3);
@@ -676,7 +721,7 @@ static inline struct search *search_alloc(struct bio *bio,
s = mempool_alloc(d->c->search, GFP_NOIO);
closure_init(&s->cl, NULL);
do_bio_hook(s, bio);
do_bio_hook(s, bio, request_endio);
s->orig_bio = bio;
s->cache_miss = NULL;
@@ -743,11 +788,12 @@ static void cached_dev_read_error(struct closure *cl)
trace_bcache_read_retry(s->orig_bio);
s->iop.status = 0;
do_bio_hook(s, s->orig_bio);
do_bio_hook(s, s->orig_bio, backing_request_endio);
/* XXX: invalidate cache */
closure_bio_submit(bio, cl);
/* I/O request sent to backing device */
closure_bio_submit(s->iop.c, bio, cl);
}
continue_at(cl, cached_dev_cache_miss_done, NULL);
@@ -859,7 +905,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
bio_copy_dev(cache_bio, miss);
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
cache_bio->bi_end_io = request_endio;
cache_bio->bi_end_io = backing_request_endio;
cache_bio->bi_private = &s->cl;
bch_bio_map(cache_bio, NULL);
@@ -872,15 +918,17 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss;
s->iop.bio = cache_bio;
bio_get(cache_bio);
closure_bio_submit(cache_bio, &s->cl);
/* I/O request sent to backing device */
closure_bio_submit(s->iop.c, cache_bio, &s->cl);
return ret;
out_put:
bio_put(cache_bio);
out_submit:
miss->bi_end_io = request_endio;
miss->bi_end_io = backing_request_endio;
miss->bi_private = &s->cl;
closure_bio_submit(miss, &s->cl);
/* I/O request sent to backing device */
closure_bio_submit(s->iop.c, miss, &s->cl);
return ret;
}
@@ -943,31 +991,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
s->iop.bio = s->orig_bio;
bio_get(s->iop.bio);
if ((bio_op(bio) != REQ_OP_DISCARD) ||
blk_queue_discard(bdev_get_queue(dc->bdev)))
closure_bio_submit(bio, cl);
if (bio_op(bio) == REQ_OP_DISCARD &&
!blk_queue_discard(bdev_get_queue(dc->bdev)))
goto insert_data;
/* I/O request sent to backing device */
bio->bi_end_io = backing_request_endio;
closure_bio_submit(s->iop.c, bio, cl);
} else if (s->iop.writeback) {
bch_writeback_add(dc);
s->iop.bio = bio;
if (bio->bi_opf & REQ_PREFLUSH) {
/* Also need to send a flush to the backing device */
struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
dc->disk.bio_split);
/*
* Also need to send a flush to the backing
* device.
*/
struct bio *flush;
flush = bio_alloc_bioset(GFP_NOIO, 0,
dc->disk.bio_split);
if (!flush) {
s->iop.status = BLK_STS_RESOURCE;
goto insert_data;
}
bio_copy_dev(flush, bio);
flush->bi_end_io = request_endio;
flush->bi_end_io = backing_request_endio;
flush->bi_private = cl;
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
closure_bio_submit(flush, cl);
/* I/O request sent to backing device */
closure_bio_submit(s->iop.c, flush, cl);
}
} else {
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
closure_bio_submit(bio, cl);
/* I/O request sent to backing device */
bio->bi_end_io = backing_request_endio;
closure_bio_submit(s->iop.c, bio, cl);
}
insert_data:
closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
continue_at(cl, cached_dev_write_complete, NULL);
}
@@ -981,11 +1044,67 @@ static void cached_dev_nodata(struct closure *cl)
bch_journal_meta(s->iop.c, cl);
/* If it's a flush, we send the flush to the backing device too */
closure_bio_submit(bio, cl);
bio->bi_end_io = backing_request_endio;
closure_bio_submit(s->iop.c, bio, cl);
continue_at(cl, cached_dev_bio_complete, NULL);
}
struct detached_dev_io_private {
struct bcache_device *d;
unsigned long start_time;
bio_end_io_t *bi_end_io;
void *bi_private;
};
static void detached_dev_end_io(struct bio *bio)
{
struct detached_dev_io_private *ddip;
ddip = bio->bi_private;
bio->bi_end_io = ddip->bi_end_io;
bio->bi_private = ddip->bi_private;
generic_end_io_acct(ddip->d->disk->queue,
bio_data_dir(bio),
&ddip->d->disk->part0, ddip->start_time);
if (bio->bi_status) {
struct cached_dev *dc = container_of(ddip->d,
struct cached_dev, disk);
/* should count I/O error for backing device here */
bch_count_backing_io_errors(dc, bio);
}
kfree(ddip);
bio->bi_end_io(bio);
}
static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
{
struct detached_dev_io_private *ddip;
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
/*
* no need to call closure_get(&dc->disk.cl),
* because upper layer had already opened bcache device,
* which would call closure_get(&dc->disk.cl)
*/
ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
ddip->d = d;
ddip->start_time = jiffies;
ddip->bi_end_io = bio->bi_end_io;
ddip->bi_private = bio->bi_private;
bio->bi_end_io = detached_dev_end_io;
bio->bi_private = ddip;
if ((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(dc->bdev)))
bio->bi_end_io(bio);
else
generic_make_request(bio);
}
/* Cached devices - read & write stuff */
static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -996,6 +1115,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);
if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
dc->io_disable)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}
atomic_set(&dc->backing_idle, 0);
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
@@ -1022,13 +1148,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
else
cached_dev_read(dc, s);
}
} else {
if ((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(dc->bdev)))
bio_endio(bio);
else
generic_make_request(bio);
}
} else
/* I/O request sent to backing device */
detached_dev_do_request(d, bio);
return BLK_QC_T_NONE;
}
@@ -1112,6 +1234,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
struct bcache_device *d = bio->bi_disk->private_data;
int rw = bio_data_dir(bio);
if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
s = search_alloc(bio, d);

Zobrazit soubor

@@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = {
NULL
};
/* Default is -1; we skip past it for stop_when_cache_set_failed */
const char * const bch_stop_on_failure_modes[] = {
"default",
"auto",
"always",
NULL
};
static struct kobject *bcache_kobj;
struct mutex bch_register_lock;
LIST_HEAD(bch_cache_sets);
@@ -265,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
bio->bi_private = dc;
closure_get(cl);
/* I/O request sent to backing device */
__write_super(&dc->sb, bio);
closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
@@ -521,7 +530,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
bch_bio_map(bio, ca->disk_buckets);
closure_bio_submit(bio, &ca->prio);
closure_bio_submit(ca->set, bio, &ca->prio);
closure_sync(cl);
}
@@ -769,6 +778,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
sector_t sectors)
{
struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
SIZE_MAX / sizeof(atomic_t));
size_t n;
int idx;
@@ -777,9 +788,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
if (!d->nr_stripes ||
d->nr_stripes > INT_MAX ||
d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
if (!d->nr_stripes || d->nr_stripes > max_stripes) {
pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
(unsigned)d->nr_stripes);
return -ENOMEM;
@@ -833,9 +842,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
q->limits.io_min = block_size;
q->limits.logical_block_size = block_size;
q->limits.physical_block_size = block_size;
set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags);
clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags);
set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags);
blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
blk_queue_write_cache(q, true, true);
@@ -899,6 +908,31 @@ void bch_cached_dev_run(struct cached_dev *dc)
pr_debug("error creating sysfs link");
}
/*
* If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
* work dc->writeback_rate_update is running. Wait until the routine
* quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
* cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
* seconds, give up waiting here and continue to cancel it too.
*/
static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
{
int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
do {
if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
&dc->disk.flags))
break;
time_out--;
schedule_timeout_interruptible(1);
} while (time_out > 0);
if (time_out == 0)
pr_warn("give up waiting for dc->writeback_write_update to quit");
cancel_delayed_work_sync(&dc->writeback_rate_update);
}
static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
@@ -911,7 +945,9 @@ static void cached_dev_detach_finish(struct work_struct *w)
mutex_lock(&bch_register_lock);
cancel_delayed_work_sync(&dc->writeback_rate_update);
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
cancel_writeback_rate_update_dwork(dc);
if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
kthread_stop(dc->writeback_thread);
dc->writeback_thread = NULL;
@@ -954,6 +990,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
closure_get(&dc->disk.cl);
bch_writeback_queue(dc);
cached_dev_put(dc);
}
@@ -1065,7 +1102,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
bch_sectors_dirty_init(&dc->disk);
atomic_set(&dc->has_dirty, 1);
refcount_inc(&dc->count);
bch_writeback_queue(dc);
}
@@ -1093,14 +1129,16 @@ static void cached_dev_free(struct closure *cl)
{
struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
cancel_delayed_work_sync(&dc->writeback_rate_update);
mutex_lock(&bch_register_lock);
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
cancel_writeback_rate_update_dwork(dc);
if (!IS_ERR_OR_NULL(dc->writeback_thread))
kthread_stop(dc->writeback_thread);
if (dc->writeback_write_wq)
destroy_workqueue(dc->writeback_write_wq);
mutex_lock(&bch_register_lock);
if (atomic_read(&dc->running))
bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
bcache_device_free(&dc->disk);
@@ -1170,6 +1208,12 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
q->backing_dev_info->ra_pages);
atomic_set(&dc->io_errors, 0);
dc->io_disable = false;
dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
/* default to auto */
dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
bch_cached_dev_request_init(dc);
bch_cached_dev_writeback_init(dc);
return 0;
@@ -1321,6 +1365,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
return flash_dev_run(c, u);
}
bool bch_cached_dev_error(struct cached_dev *dc)
{
char name[BDEVNAME_SIZE];
if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
return false;
dc->io_disable = true;
/* make others know io_disable is true earlier */
smp_mb();
pr_err("stop %s: too many IO errors on backing device %s\n",
dc->disk.disk->disk_name, bdevname(dc->bdev, name));
bcache_device_stop(&dc->disk);
return true;
}
/* Cache set */
__printf(2, 3)
@@ -1332,6 +1394,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
test_bit(CACHE_SET_STOPPING, &c->flags))
return false;
if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
pr_warn("CACHE_SET_IO_DISABLE already set");
/* XXX: we can be called from atomic context
acquire_console_sem();
*/
@@ -1443,25 +1508,72 @@ static void cache_set_flush(struct closure *cl)
closure_return(cl);
}
/*
* This function is only called when CACHE_SET_IO_DISABLE is set, which means
* cache set is unregistering due to too many I/O errors. In this condition,
* the bcache device might be stopped, it depends on stop_when_cache_set_failed
* value and whether the broken cache has dirty data:
*
* dc->stop_when_cache_set_failed dc->has_dirty stop bcache device
* BCH_CACHED_STOP_AUTO 0 NO
* BCH_CACHED_STOP_AUTO 1 YES
* BCH_CACHED_DEV_STOP_ALWAYS 0 YES
* BCH_CACHED_DEV_STOP_ALWAYS 1 YES
*
* The expected behavior is, if stop_when_cache_set_failed is configured to
* "auto" via sysfs interface, the bcache device will not be stopped if the
* backing device is clean on the broken cache device.
*/
static void conditional_stop_bcache_device(struct cache_set *c,
struct bcache_device *d,
struct cached_dev *dc)
{
if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
d->disk->disk_name, c->sb.set_uuid);
bcache_device_stop(d);
} else if (atomic_read(&dc->has_dirty)) {
/*
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
* and dc->has_dirty == 1
*/
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
d->disk->disk_name);
bcache_device_stop(d);
} else {
/*
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
* and dc->has_dirty == 0
*/
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
d->disk->disk_name);
}
}
static void __cache_set_unregister(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, caching);
struct cached_dev *dc;
struct bcache_device *d;
size_t i;
mutex_lock(&bch_register_lock);
for (i = 0; i < c->devices_max_used; i++)
if (c->devices[i]) {
if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
dc = container_of(c->devices[i],
struct cached_dev, disk);
bch_cached_dev_detach(dc);
} else {
bcache_device_stop(c->devices[i]);
}
for (i = 0; i < c->devices_max_used; i++) {
d = c->devices[i];
if (!d)
continue;
if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
dc = container_of(d, struct cached_dev, disk);
bch_cached_dev_detach(dc);
if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
conditional_stop_bcache_device(c, d, dc);
} else {
bcache_device_stop(d);
}
}
mutex_unlock(&bch_register_lock);
@@ -1567,6 +1679,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
return c;
err:
@@ -2148,7 +2261,6 @@ static int __init bcache_init(void)
mutex_init(&bch_register_lock);
init_waitqueue_head(&unregister_wait);
register_reboot_notifier(&reboot);
closure_debug_init();
bcache_major = register_blkdev(0, "bcache");
if (bcache_major < 0) {
@@ -2160,7 +2272,7 @@ static int __init bcache_init(void)
if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
bch_request_init() ||
bch_debug_init(bcache_kobj) ||
bch_debug_init(bcache_kobj) || closure_debug_init() ||
sysfs_create_files(bcache_kobj, files))
goto err;

Zobrazit soubor

@@ -78,6 +78,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
rw_attribute(writeback_percent);
@@ -95,6 +96,7 @@ read_attribute(partial_stripes_expensive);
rw_attribute(synchronous);
rw_attribute(journal_delay_ms);
rw_attribute(io_disable);
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
@@ -125,6 +127,12 @@ SHOW(__bch_cached_dev)
bch_cache_modes + 1,
BDEV_CACHE_MODE(&dc->sb));
if (attr == &sysfs_stop_when_cache_set_failed)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_stop_on_failure_modes + 1,
dc->stop_when_cache_set_failed);
sysfs_printf(data_csum, "%i", dc->disk.data_csum);
var_printf(verify, "%i");
var_printf(bypass_torture_test, "%i");
@@ -133,7 +141,9 @@ SHOW(__bch_cached_dev)
var_print(writeback_delay);
var_print(writeback_percent);
sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
sysfs_printf(io_error_limit, "%i", dc->error_limit);
sysfs_printf(io_disable, "%i", dc->io_disable);
var_print(writeback_rate_update_seconds);
var_print(writeback_rate_i_term_inverse);
var_print(writeback_rate_p_term_inverse);
@@ -173,7 +183,7 @@ SHOW(__bch_cached_dev)
sysfs_hprint(dirty_data,
bcache_dev_sectors_dirty(&dc->disk) << 9);
sysfs_hprint(stripe_size, dc->disk.stripe_size << 9);
sysfs_hprint(stripe_size, ((uint64_t)dc->disk.stripe_size) << 9);
var_printf(partial_stripes_expensive, "%u");
var_hprint(sequential_cutoff);
@@ -224,6 +234,14 @@ STORE(__cached_dev)
d_strtoul(writeback_rate_i_term_inverse);
d_strtoul_nonzero(writeback_rate_p_term_inverse);
sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
if (attr == &sysfs_io_disable) {
int v = strtoul_or_return(buf);
dc->io_disable = v ? 1 : 0;
}
d_strtoi_h(sequential_cutoff);
d_strtoi_h(readahead);
@@ -246,6 +264,15 @@ STORE(__cached_dev)
}
}
if (attr == &sysfs_stop_when_cache_set_failed) {
v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
if (v < 0)
return v;
dc->stop_when_cache_set_failed = v;
}
if (attr == &sysfs_label) {
if (size > SB_LABEL_SIZE)
return -EINVAL;
@@ -309,7 +336,8 @@ STORE(bch_cached_dev)
bch_writeback_queue(dc);
if (attr == &sysfs_writeback_percent)
schedule_delayed_work(&dc->writeback_rate_update,
if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
mutex_unlock(&bch_register_lock);
@@ -324,6 +352,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
&sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
&sysfs_writeback_delay,
@@ -333,6 +362,9 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_writeback_rate_i_term_inverse,
&sysfs_writeback_rate_p_term_inverse,
&sysfs_writeback_rate_debug,
&sysfs_errors,
&sysfs_io_error_limit,
&sysfs_io_disable,
&sysfs_dirty_data,
&sysfs_stripe_size,
&sysfs_partial_stripes_expensive,
@@ -590,6 +622,8 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
sysfs_printf(io_disable, "%i",
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
if (attr == &sysfs_bset_tree_stats)
return bch_bset_print_stats(c, buf);
@@ -679,6 +713,20 @@ STORE(__bch_cache_set)
if (attr == &sysfs_io_error_halflife)
c->error_decay = strtoul_or_return(buf) / 88;
if (attr == &sysfs_io_disable) {
int v = strtoul_or_return(buf);
if (v) {
if (test_and_set_bit(CACHE_SET_IO_DISABLE,
&c->flags))
pr_warn("CACHE_SET_IO_DISABLE already set");
} else {
if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
&c->flags))
pr_warn("CACHE_SET_IO_DISABLE already cleared");
}
}
sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
sysfs_strtoul(verify, c->verify);
sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
@@ -764,6 +812,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
&sysfs_io_disable,
NULL
};
KTYPE(bch_cache_set_internal);

Zobrazit soubor

@@ -32,20 +32,27 @@ int bch_ ## name ## _h(const char *cp, type *res) \
case 'y': \
case 'z': \
u++; \
/* fall through */ \
case 'e': \
u++; \
/* fall through */ \
case 'p': \
u++; \
/* fall through */ \
case 't': \
u++; \
/* fall through */ \
case 'g': \
u++; \
/* fall through */ \
case 'm': \
u++; \
/* fall through */ \
case 'k': \
u++; \
if (e++ == cp) \
return -EINVAL; \
/* fall through */ \
case '\n': \
case '\0': \
if (*e == '\n') \
@@ -75,10 +82,9 @@ STRTO_H(strtoll, long long)
STRTO_H(strtoull, unsigned long long)
/**
* bch_hprint() - formats @v to human readable string for sysfs.
*
* @v - signed 64 bit integer
* @buf - the (at least 8 byte) buffer to format the result into.
* bch_hprint - formats @v to human readable string for sysfs.
* @buf: the (at least 8 byte) buffer to format the result into.
* @v: signed 64 bit integer
*
* Returns the number of bytes used by format.
*/
@@ -218,13 +224,12 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
}
/**
* bch_next_delay() - increment @d by the amount of work done, and return how
* long to delay until the next time to do some work.
* bch_next_delay() - update ratelimiting statistics and calculate next delay
* @d: the struct bch_ratelimit to update
* @done: the amount of work done, in arbitrary units
*
* @d - the struct bch_ratelimit to update
* @done - the amount of work done, in arbitrary units
*
* Returns the amount of time to delay by, in jiffies
* Increment @d by the amount of work done, and return how long to delay in
* jiffies until the next time to do some work.
*/
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{

Zobrazit soubor

@@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
return bdev->bd_inode->i_size >> 9;
}
#define closure_bio_submit(bio, cl) \
do { \
closure_get(cl); \
generic_make_request(bio); \
} while (0)
uint64_t bch_crc64_update(uint64_t, const void *, size_t);
uint64_t bch_crc64(const void *, size_t);

Zobrazit soubor

@@ -114,6 +114,27 @@ static void update_writeback_rate(struct work_struct *work)
struct cached_dev *dc = container_of(to_delayed_work(work),
struct cached_dev,
writeback_rate_update);
struct cache_set *c = dc->disk.c;
/*
* should check BCACHE_DEV_RATE_DW_RUNNING before calling
* cancel_delayed_work_sync().
*/
set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
smp_mb();
/*
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
* check it here too.
*/
if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
smp_mb();
return;
}
down_read(&dc->writeback_lock);
@@ -123,8 +144,23 @@ static void update_writeback_rate(struct work_struct *work)
up_read(&dc->writeback_lock);
schedule_delayed_work(&dc->writeback_rate_update,
/*
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
* check it here too.
*/
if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
}
/*
* should check BCACHE_DEV_RATE_DW_RUNNING before calling
* cancel_delayed_work_sync().
*/
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
smp_mb();
}
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
@@ -253,7 +289,8 @@ static void write_dirty(struct closure *cl)
bio_set_dev(&io->bio, io->dc->bdev);
io->bio.bi_end_io = dirty_endio;
closure_bio_submit(&io->bio, cl);
/* I/O request sent to backing device */
closure_bio_submit(io->dc->disk.c, &io->bio, cl);
}
atomic_set(&dc->writeback_sequence_next, next_sequence);
@@ -279,7 +316,7 @@ static void read_dirty_submit(struct closure *cl)
{
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
closure_bio_submit(&io->bio, cl);
closure_bio_submit(io->dc->disk.c, &io->bio, cl);
continue_at(cl, write_dirty, io->dc->writeback_write_wq);
}
@@ -305,7 +342,9 @@ static void read_dirty(struct cached_dev *dc)
next = bch_keybuf_next(&dc->writeback_keys);
while (!kthread_should_stop() && next) {
while (!kthread_should_stop() &&
!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
next) {
size = 0;
nk = 0;
@@ -402,7 +441,9 @@ static void read_dirty(struct cached_dev *dc)
}
}
while (!kthread_should_stop() && delay) {
while (!kthread_should_stop() &&
!test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
delay) {
schedule_timeout_interruptible(delay);
delay = writeback_delay(dc, 0);
}
@@ -558,21 +599,30 @@ static bool refill_dirty(struct cached_dev *dc)
static int bch_writeback_thread(void *arg)
{
struct cached_dev *dc = arg;
struct cache_set *c = dc->disk.c;
bool searched_full_index;
bch_ratelimit_reset(&dc->writeback_rate);
while (!kthread_should_stop()) {
while (!kthread_should_stop() &&
!test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
down_write(&dc->writeback_lock);
set_current_state(TASK_INTERRUPTIBLE);
if (!atomic_read(&dc->has_dirty) ||
(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
!dc->writeback_running)) {
/*
* If the bache device is detaching, skip here and continue
* to perform writeback. Otherwise, if no dirty data on cache,
* or there is dirty data on cache but writeback is disabled,
* the writeback thread should sleep here and wait for others
* to wake up it.
*/
if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
(!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
up_write(&dc->writeback_lock);
if (kthread_should_stop()) {
if (kthread_should_stop() ||
test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
set_current_state(TASK_RUNNING);
return 0;
break;
}
schedule();
@@ -585,9 +635,16 @@ static int bch_writeback_thread(void *arg)
if (searched_full_index &&
RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
atomic_set(&dc->has_dirty, 0);
cached_dev_put(dc);
SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
bch_write_bdev_super(dc, NULL);
/*
* If bcache device is detaching via sysfs interface,
* writeback thread should stop after there is no dirty
* data on cache. BCACHE_DEV_DETACHING flag is set in
* bch_cached_dev_detach().
*/
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
break;
}
up_write(&dc->writeback_lock);
@@ -599,6 +656,7 @@ static int bch_writeback_thread(void *arg)
while (delay &&
!kthread_should_stop() &&
!test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
delay = schedule_timeout_interruptible(delay);
@@ -606,6 +664,9 @@ static int bch_writeback_thread(void *arg)
}
}
cached_dev_put(dc);
wait_for_kthread_stop();
return 0;
}
@@ -659,6 +720,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_p_term_inverse = 40;
dc->writeback_rate_i_term_inverse = 10000;
WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
}
@@ -669,11 +731,15 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc)
if (!dc->writeback_write_wq)
return -ENOMEM;
cached_dev_get(dc);
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback");
if (IS_ERR(dc->writeback_thread))
if (IS_ERR(dc->writeback_thread)) {
cached_dev_put(dc);
return PTR_ERR(dc->writeback_thread);
}
WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);

Zobrazit soubor

@@ -39,7 +39,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c)
if (!d || !UUID_FLASH_ONLY(&c->uuids[i]))
continue;
ret += bcache_dev_sectors_dirty(d);
ret += bcache_dev_sectors_dirty(d);
}
mutex_unlock(&bch_register_lock);
@@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc)
{
if (!atomic_read(&dc->has_dirty) &&
!atomic_xchg(&dc->has_dirty, 1)) {
refcount_inc(&dc->count);
if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) {
SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY);
/* XXX: should do this synchronously */

Zobrazit soubor

@@ -1857,7 +1857,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits = *limits;
if (!dm_table_supports_discards(t)) {
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
/* Must also clear discard limits... */
q->limits.max_discard_sectors = 0;
q->limits.max_hw_discard_sectors = 0;
@@ -1865,7 +1865,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits.discard_alignment = 0;
q->limits.discard_misaligned = 0;
} else
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
wc = true;
@@ -1875,15 +1875,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
blk_queue_write_cache(q, wc, fua);
if (dm_table_supports_dax(t))
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
if (dm_table_supports_dax_write_cache(t))
dax_write_cache(t->md->dax_dev, true);
/* Ensure that all underlying devices are non-rotational. */
if (dm_table_all_devices_attribute(t, device_is_nonrot))
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
else
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
if (!dm_table_supports_write_same(t))
q->limits.max_write_same_sectors = 0;
@@ -1891,9 +1891,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
q->limits.max_write_zeroes_sectors = 0;
if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
else
queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
dm_table_verify_integrity(t);
@@ -1904,7 +1904,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
* have it set.
*/
if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
}
unsigned int dm_table_get_num_targets(struct dm_table *t)

Zobrazit soubor

@@ -1848,7 +1848,7 @@ static struct mapped_device *alloc_dev(int minor)
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock);
md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL);
if (!md->queue)
goto bad;
md->queue->queuedata = md;

Zobrazit soubor

@@ -138,9 +138,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
}
if (!discard_supported)
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
else
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
/*
* Here we calculate the device offsets.

Zobrazit soubor

@@ -5206,12 +5206,12 @@ static void md_free(struct kobject *ko)
if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state);
if (mddev->gendisk)
del_gendisk(mddev->gendisk);
if (mddev->queue)
blk_cleanup_queue(mddev->queue);
if (mddev->gendisk) {
del_gendisk(mddev->gendisk);
if (mddev->gendisk)
put_disk(mddev->gendisk);
}
percpu_ref_exit(&mddev->writes_pending);
kfree(mddev);
@@ -5619,9 +5619,9 @@ int md_run(struct mddev *mddev)
if (mddev->degraded)
nonrot = false;
if (nonrot)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
else
queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
mddev->queue->backing_dev_info->congested_data = mddev;
mddev->queue->backing_dev_info->congested_fn = md_congested;
}

Zobrazit soubor

@@ -399,9 +399,9 @@ static int raid0_run(struct mddev *mddev)
discard_supported = true;
}
if (!discard_supported)
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue);
else
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
}
/* calculate array device size */

Zobrazit soubor

@@ -1760,7 +1760,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
}
}
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
}
@@ -3110,10 +3110,10 @@ static int raid1_run(struct mddev *mddev)
if (mddev->queue) {
if (discard_supported)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
}

Zobrazit soubor

@@ -1845,7 +1845,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
@@ -3846,10 +3846,10 @@ static int raid10_run(struct mddev *mddev)
if (mddev->queue) {
if (discard_supported)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
}
/* need to check that every block has at least one working mirror */

Zobrazit soubor

@@ -7443,10 +7443,10 @@ static int raid5_run(struct mddev *mddev)
if (devices_handle_discard_safely &&
mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
mddev->queue->limits.discard_granularity >= stripe)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
blk_queue_flag_set(QUEUE_FLAG_DISCARD,
mddev->queue);
else
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
blk_queue_flag_clear(QUEUE_FLAG_DISCARD,
mddev->queue);
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);