Merge tag 'dm-4.10-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - various fixes and improvements to request-based DM and DM multipath

 - some locking improvements in DM bufio

 - add Kconfig option to disable the DM block manager's extra locking
   which mainly serves as a developer tool

 - a few bug fixes to DM's persistent-data

 - a couple changes to prepare for multipage biovec support in the block
   layer

 - various improvements and cleanups in the DM core, DM cache, DM raid
   and DM crypt

 - add ability to have DM crypt use keys from the kernel key retention
   service

 - add a new "error_writes" feature to the DM flakey target, reads are
   left unchanged in this mode

* tag 'dm-4.10-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (40 commits)
  dm flakey: introduce "error_writes" feature
  dm cache policy smq: use hash_32() instead of hash_32_generic()
  dm crypt: reject key strings containing whitespace chars
  dm space map: always set ev if sm_ll_mutate() succeeds
  dm space map metadata: skip useless memcpy in metadata_ll_init_index()
  dm space map metadata: fix 'struct sm_metadata' leak on failed create
  Documentation: dm raid: define data_offset status field
  dm raid: fix discard support regression
  dm raid: don't allow "write behind" with raid4/5/6
  dm mpath: use hw_handler_params if attached hw_handler is same as requested
  dm crypt: add ability to use keys from the kernel key retention service
  dm array: remove a dead assignment in populate_ablock_with_values()
  dm ioctl: use offsetof() instead of open-coding it
  dm rq: simplify use_blk_mq initialization
  dm: use blk_set_queue_dying() in __dm_destroy()
  dm bufio: drop the lock when doing GFP_NOIO allocation
  dm bufio: don't take the lock in dm_bufio_shrink_count
  dm bufio: avoid sleeping while holding the dm_bufio lock
  dm table: simplify dm_table_determine_type()
  dm table: an 'all_blk_mq' table must be loaded for a blk-mq DM device
  ...
This commit is contained in:
Linus Torvalds
2016-12-14 11:01:00 -08:00
22 changed files with 462 additions and 201 deletions

View File

@@ -240,9 +240,17 @@ config DM_BUFIO
as a cache, holding recently-read blocks in memory and performing
delayed writes.
config DM_DEBUG_BLOCK_MANAGER_LOCKING
bool "Block manager locking"
depends on DM_BUFIO
---help---
Block manager locking can catch various metadata corruption issues.
If unsure, say N.
config DM_DEBUG_BLOCK_STACK_TRACING
bool "Keep stack trace of persistent data block lock holders"
depends on STACKTRACE_SUPPORT && DM_BUFIO
depends on STACKTRACE_SUPPORT && DM_DEBUG_BLOCK_MANAGER_LOCKING
select STACKTRACE
---help---
Enable this for messages that may help debug problems with the

View File

@@ -820,12 +820,14 @@ enum new_flag {
static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
{
struct dm_buffer *b;
bool tried_noio_alloc = false;
/*
* dm-bufio is resistant to allocation failures (it just keeps
* one buffer reserved in cases all the allocations fail).
* So set flags to not try too hard:
* GFP_NOIO: don't recurse into the I/O layer
* GFP_NOWAIT: don't wait; if we need to sleep we'll release our
* mutex and wait ourselves.
* __GFP_NORETRY: don't retry and rather return failure
* __GFP_NOMEMALLOC: don't use emergency reserves
* __GFP_NOWARN: don't print a warning in case of failure
@@ -835,7 +837,7 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
*/
while (1) {
if (dm_bufio_cache_size_latch != 1) {
b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
if (b)
return b;
}
@@ -843,6 +845,15 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
if (nf == NF_PREFETCH)
return NULL;
if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) {
dm_bufio_unlock(c);
b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
dm_bufio_lock(c);
if (b)
return b;
tried_noio_alloc = true;
}
if (!list_empty(&c->reserved_buffers)) {
b = list_entry(c->reserved_buffers.next,
struct dm_buffer, lru_list);
@@ -1585,18 +1596,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
static unsigned long
dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
struct dm_bufio_client *c;
unsigned long count;
struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
c = container_of(shrink, struct dm_bufio_client, shrinker);
if (sc->gfp_mask & __GFP_FS)
dm_bufio_lock(c);
else if (!dm_bufio_trylock(c))
return 0;
count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
dm_bufio_unlock(c);
return count;
return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]);
}
/*

View File

@@ -383,7 +383,6 @@ static int __format_metadata(struct dm_cache_metadata *cmd)
goto bad;
dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
if (r < 0)
goto bad;
@@ -789,7 +788,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
{
if (cmd->data_block_size != data_block_size) {
DMERR("data_block_size (%llu) different from that in metadata (%llu)\n",
DMERR("data_block_size (%llu) different from that in metadata (%llu)",
(unsigned long long) data_block_size,
(unsigned long long) cmd->data_block_size);
return false;

View File

@@ -1361,7 +1361,7 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
static unsigned random_level(dm_cblock_t cblock)
{
return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
}
static int smq_load_mapping(struct dm_cache_policy *p,

View File

@@ -989,7 +989,8 @@ static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mod
enum cache_metadata_mode old_mode = get_cache_mode(cache);
if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
DMERR("unable to read needs_check flag, setting failure mode");
DMERR("%s: unable to read needs_check flag, setting failure mode.",
cache_device_name(cache));
new_mode = CM_FAIL;
}

View File

@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/key.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/mempool.h>
@@ -23,12 +24,14 @@
#include <linux/atomic.h>
#include <linux/scatterlist.h>
#include <linux/rbtree.h>
#include <linux/ctype.h>
#include <asm/page.h>
#include <asm/unaligned.h>
#include <crypto/hash.h>
#include <crypto/md5.h>
#include <crypto/algapi.h>
#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <linux/device-mapper.h>
@@ -140,8 +143,9 @@ struct crypt_config {
char *cipher;
char *cipher_string;
char *key_string;
struct crypt_iv_operations *iv_gen_ops;
const struct crypt_iv_operations *iv_gen_ops;
union {
struct iv_essiv_private essiv;
struct iv_benbi_private benbi;
@@ -758,15 +762,15 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv,
return r;
}
static struct crypt_iv_operations crypt_iv_plain_ops = {
static const struct crypt_iv_operations crypt_iv_plain_ops = {
.generator = crypt_iv_plain_gen
};
static struct crypt_iv_operations crypt_iv_plain64_ops = {
static const struct crypt_iv_operations crypt_iv_plain64_ops = {
.generator = crypt_iv_plain64_gen
};
static struct crypt_iv_operations crypt_iv_essiv_ops = {
static const struct crypt_iv_operations crypt_iv_essiv_ops = {
.ctr = crypt_iv_essiv_ctr,
.dtr = crypt_iv_essiv_dtr,
.init = crypt_iv_essiv_init,
@@ -774,17 +778,17 @@ static struct crypt_iv_operations crypt_iv_essiv_ops = {
.generator = crypt_iv_essiv_gen
};
static struct crypt_iv_operations crypt_iv_benbi_ops = {
static const struct crypt_iv_operations crypt_iv_benbi_ops = {
.ctr = crypt_iv_benbi_ctr,
.dtr = crypt_iv_benbi_dtr,
.generator = crypt_iv_benbi_gen
};
static struct crypt_iv_operations crypt_iv_null_ops = {
static const struct crypt_iv_operations crypt_iv_null_ops = {
.generator = crypt_iv_null_gen
};
static struct crypt_iv_operations crypt_iv_lmk_ops = {
static const struct crypt_iv_operations crypt_iv_lmk_ops = {
.ctr = crypt_iv_lmk_ctr,
.dtr = crypt_iv_lmk_dtr,
.init = crypt_iv_lmk_init,
@@ -793,7 +797,7 @@ static struct crypt_iv_operations crypt_iv_lmk_ops = {
.post = crypt_iv_lmk_post
};
static struct crypt_iv_operations crypt_iv_tcw_ops = {
static const struct crypt_iv_operations crypt_iv_tcw_ops = {
.ctr = crypt_iv_tcw_ctr,
.dtr = crypt_iv_tcw_dtr,
.init = crypt_iv_tcw_init,
@@ -994,7 +998,6 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
unsigned i, len, remaining_size;
struct page *page;
struct bio_vec *bvec;
retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
@@ -1019,12 +1022,7 @@ retry:
len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
bvec = &clone->bi_io_vec[clone->bi_vcnt++];
bvec->bv_page = page;
bvec->bv_len = len;
bvec->bv_offset = 0;
clone->bi_iter.bi_size += len;
bio_add_page(clone, page, len, 0);
remaining_size -= len;
}
@@ -1471,7 +1469,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode)
return 0;
}
static int crypt_setkey_allcpus(struct crypt_config *cc)
static int crypt_setkey(struct crypt_config *cc)
{
unsigned subkey_size;
int err = 0, i, r;
@@ -1490,25 +1488,157 @@ static int crypt_setkey_allcpus(struct crypt_config *cc)
return err;
}
#ifdef CONFIG_KEYS
static bool contains_whitespace(const char *str)
{
while (*str)
if (isspace(*str++))
return true;
return false;
}
static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string)
{
char *new_key_string, *key_desc;
int ret;
struct key *key;
const struct user_key_payload *ukp;
/*
* Reject key_string with whitespace. dm core currently lacks code for
* proper whitespace escaping in arguments on DM_TABLE_STATUS path.
*/
if (contains_whitespace(key_string)) {
DMERR("whitespace chars not allowed in key string");
return -EINVAL;
}
/* look for next ':' separating key_type from key_description */
key_desc = strpbrk(key_string, ":");
if (!key_desc || key_desc == key_string || !strlen(key_desc + 1))
return -EINVAL;
if (strncmp(key_string, "logon:", key_desc - key_string + 1) &&
strncmp(key_string, "user:", key_desc - key_string + 1))
return -EINVAL;
new_key_string = kstrdup(key_string, GFP_KERNEL);
if (!new_key_string)
return -ENOMEM;
key = request_key(key_string[0] == 'l' ? &key_type_logon : &key_type_user,
key_desc + 1, NULL);
if (IS_ERR(key)) {
kzfree(new_key_string);
return PTR_ERR(key);
}
rcu_read_lock();
ukp = user_key_payload(key);
if (!ukp) {
rcu_read_unlock();
key_put(key);
kzfree(new_key_string);
return -EKEYREVOKED;
}
if (cc->key_size != ukp->datalen) {
rcu_read_unlock();
key_put(key);
kzfree(new_key_string);
return -EINVAL;
}
memcpy(cc->key, ukp->data, cc->key_size);
rcu_read_unlock();
key_put(key);
/* clear the flag since following operations may invalidate previously valid key */
clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
ret = crypt_setkey(cc);
/* wipe the kernel key payload copy in each case */
memset(cc->key, 0, cc->key_size * sizeof(u8));
if (!ret) {
set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
kzfree(cc->key_string);
cc->key_string = new_key_string;
} else
kzfree(new_key_string);
return ret;
}
static int get_key_size(char **key_string)
{
char *colon, dummy;
int ret;
if (*key_string[0] != ':')
return strlen(*key_string) >> 1;
/* look for next ':' in key string */
colon = strpbrk(*key_string + 1, ":");
if (!colon)
return -EINVAL;
if (sscanf(*key_string + 1, "%u%c", &ret, &dummy) != 2 || dummy != ':')
return -EINVAL;
*key_string = colon;
/* remaining key string should be :<logon|user>:<key_desc> */
return ret;
}
#else
static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string)
{
return -EINVAL;
}
static int get_key_size(char **key_string)
{
return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1;
}
#endif
static int crypt_set_key(struct crypt_config *cc, char *key)
{
int r = -EINVAL;
int key_string_len = strlen(key);
/* The key size may not be changed. */
if (cc->key_size != (key_string_len >> 1))
goto out;
/* Hyphen (which gives a key_size of zero) means there is no key. */
if (!cc->key_size && strcmp(key, "-"))
goto out;
/* ':' means the key is in kernel keyring, short-circuit normal key processing */
if (key[0] == ':') {
r = crypt_set_keyring_key(cc, key + 1);
goto out;
}
/* clear the flag since following operations may invalidate previously valid key */
clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
/* wipe references to any kernel keyring key */
kzfree(cc->key_string);
cc->key_string = NULL;
if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0)
goto out;
set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
r = crypt_setkey_allcpus(cc);
r = crypt_setkey(cc);
if (!r)
set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
out:
/* Hex key string not needed after here, so wipe it. */
@@ -1521,8 +1651,10 @@ static int crypt_wipe_key(struct crypt_config *cc)
{
clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
memset(&cc->key, 0, cc->key_size * sizeof(u8));
kzfree(cc->key_string);
cc->key_string = NULL;
return crypt_setkey_allcpus(cc);
return crypt_setkey(cc);
}
static void crypt_dtr(struct dm_target *ti)
@@ -1558,6 +1690,7 @@ static void crypt_dtr(struct dm_target *ti)
kzfree(cc->cipher);
kzfree(cc->cipher_string);
kzfree(cc->key_string);
/* Must zero key material before freeing */
kzfree(cc);
@@ -1726,12 +1859,13 @@ bad_mem:
/*
* Construct an encryption mapping:
* <cipher> <key> <iv_offset> <dev_path> <start>
* <cipher> [<key>|:<key_size>:<user|logon>:<key_description>] <iv_offset> <dev_path> <start>
*/
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
unsigned int key_size, opt_params;
int key_size;
unsigned int opt_params;
unsigned long long tmpll;
int ret;
size_t iv_size_padding;
@@ -1748,7 +1882,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -EINVAL;
}
key_size = strlen(argv[1]) >> 1;
key_size = get_key_size(&argv[1]);
if (key_size < 0) {
ti->error = "Cannot parse key size";
return -EINVAL;
}
cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
if (!cc) {
@@ -1955,10 +2093,13 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
case STATUSTYPE_TABLE:
DMEMIT("%s ", cc->cipher_string);
if (cc->key_size > 0)
for (i = 0; i < cc->key_size; i++)
DMEMIT("%02x", cc->key[i]);
else
if (cc->key_size > 0) {
if (cc->key_string)
DMEMIT(":%u:%s", cc->key_size, cc->key_string);
else
for (i = 0; i < cc->key_size; i++)
DMEMIT("%02x", cc->key[i]);
} else
DMEMIT("-");
DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
@@ -2014,7 +2155,7 @@ static void crypt_resume(struct dm_target *ti)
static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
{
struct crypt_config *cc = ti->private;
int ret = -EINVAL;
int key_size, ret = -EINVAL;
if (argc < 2)
goto error;
@@ -2025,6 +2166,13 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
return -EINVAL;
}
if (argc == 3 && !strcasecmp(argv[1], "set")) {
/* The key size may not be changed. */
key_size = get_key_size(&argv[2]);
if (key_size < 0 || cc->key_size != key_size) {
memset(argv[2], '0', strlen(argv[2]));
return -EINVAL;
}
ret = crypt_set_key(cc, argv[2]);
if (ret)
return ret;
@@ -2068,7 +2216,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
.version = {1, 14, 1},
.version = {1, 15, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,

View File

@@ -36,7 +36,8 @@ struct flakey_c {
};
enum feature_flag_bits {
DROP_WRITES
DROP_WRITES,
ERROR_WRITES
};
struct per_bio_data {
@@ -76,6 +77,25 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
ti->error = "Feature drop_writes duplicated";
return -EINVAL;
} else if (test_bit(ERROR_WRITES, &fc->flags)) {
ti->error = "Feature drop_writes conflicts with feature error_writes";
return -EINVAL;
}
continue;
}
/*
* error_writes
*/
if (!strcasecmp(arg_name, "error_writes")) {
if (test_and_set_bit(ERROR_WRITES, &fc->flags)) {
ti->error = "Feature error_writes duplicated";
return -EINVAL;
} else if (test_bit(DROP_WRITES, &fc->flags)) {
ti->error = "Feature error_writes conflicts with feature drop_writes";
return -EINVAL;
}
continue;
@@ -135,6 +155,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
return -EINVAL;
} else if (test_bit(ERROR_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
ti->error = "error_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
return -EINVAL;
}
return 0;
@@ -200,11 +224,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (!(fc->up_interval + fc->down_interval)) {
ti->error = "Total (up + down) interval is zero";
r = -EINVAL;
goto bad;
}
if (fc->up_interval + fc->down_interval < fc->up_interval) {
ti->error = "Interval overflow";
r = -EINVAL;
goto bad;
}
@@ -289,22 +315,27 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
pb->bio_submitted = true;
/*
* Error reads if neither corrupt_bio_byte or drop_writes are set.
* Error reads if neither corrupt_bio_byte or drop_writes or error_writes are set.
* Otherwise, flakey_end_io() will decide if the reads should be modified.
*/
if (bio_data_dir(bio) == READ) {
if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags))
if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
!test_bit(ERROR_WRITES, &fc->flags))
return -EIO;
goto map_bio;
}
/*
* Drop writes?
* Drop or error writes?
*/
if (test_bit(DROP_WRITES, &fc->flags)) {
bio_endio(bio);
return DM_MAPIO_SUBMITTED;
}
else if (test_bit(ERROR_WRITES, &fc->flags)) {
bio_io_error(bio);
return DM_MAPIO_SUBMITTED;
}
/*
* Corrupt matching writes.
@@ -340,10 +371,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
*/
corrupt_bio_data(bio, fc);
} else if (!test_bit(DROP_WRITES, &fc->flags)) {
} else if (!test_bit(DROP_WRITES, &fc->flags) &&
!test_bit(ERROR_WRITES, &fc->flags)) {
/*
* Error read during the down_interval if drop_writes
* wasn't configured.
* and error_writes were not configured.
*/
return -EIO;
}
@@ -357,7 +389,7 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
{
unsigned sz = 0;
struct flakey_c *fc = ti->private;
unsigned drop_writes;
unsigned drop_writes, error_writes;
switch (type) {
case STATUSTYPE_INFO:
@@ -370,10 +402,13 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
fc->down_interval);
drop_writes = test_bit(DROP_WRITES, &fc->flags);
DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5);
error_writes = test_bit(ERROR_WRITES, &fc->flags);
DMEMIT("%u ", drop_writes + error_writes + (fc->corrupt_bio_byte > 0) * 5);
if (drop_writes)
DMEMIT("drop_writes ");
else if (error_writes)
DMEMIT("error_writes ");
if (fc->corrupt_bio_byte)
DMEMIT("corrupt_bio_byte %u %c %u %u ",
@@ -410,7 +445,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
static struct target_type flakey_target = {
.name = "flakey",
.version = {1, 3, 1},
.version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = flakey_ctr,
.dtr = flakey_dtr,

View File

@@ -162,7 +162,10 @@ struct dpages {
struct page **p, unsigned long *len, unsigned *offset);
void (*next_page)(struct dpages *dp);
unsigned context_u;
union {
unsigned context_u;
struct bvec_iter context_bi;
};
void *context_ptr;
void *vma_invalidate_address;
@@ -204,25 +207,36 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse
static void bio_get_page(struct dpages *dp, struct page **p,
unsigned long *len, unsigned *offset)
{
struct bio_vec *bvec = dp->context_ptr;
*p = bvec->bv_page;
*len = bvec->bv_len - dp->context_u;
*offset = bvec->bv_offset + dp->context_u;
struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr,
dp->context_bi);
*p = bvec.bv_page;
*len = bvec.bv_len;
*offset = bvec.bv_offset;
/* avoid figuring it out again in bio_next_page() */
dp->context_bi.bi_sector = (sector_t)bvec.bv_len;
}
static void bio_next_page(struct dpages *dp)
{
struct bio_vec *bvec = dp->context_ptr;
dp->context_ptr = bvec + 1;
dp->context_u = 0;
unsigned int len = (unsigned int)dp->context_bi.bi_sector;
bvec_iter_advance((struct bio_vec *)dp->context_ptr,
&dp->context_bi, len);
}
static void bio_dp_init(struct dpages *dp, struct bio *bio)
{
dp->get_page = bio_get_page;
dp->next_page = bio_next_page;
dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
dp->context_u = bio->bi_iter.bi_bvec_done;
/*
* We just use bvec iterator to retrieve pages, so it is ok to
* access the bvec table directly here
*/
dp->context_ptr = bio->bi_io_vec;
dp->context_bi = bio->bi_iter;
}
/*

View File

@@ -1697,7 +1697,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
{
struct dm_ioctl *dmi;
int secure_data;
const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data);
const size_t minimum_data_size = offsetof(struct dm_ioctl, data);
if (copy_from_user(param_kernel, user, minimum_data_size))
return -EFAULT;

View File

@@ -372,16 +372,13 @@ static int __pg_init_all_paths(struct multipath *m)
return atomic_read(&m->pg_init_in_progress);
}
static int pg_init_all_paths(struct multipath *m)
static void pg_init_all_paths(struct multipath *m)
{
int r;
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
r = __pg_init_all_paths(m);
__pg_init_all_paths(m);
spin_unlock_irqrestore(&m->lock, flags);
return r;
}
static void __switch_pg(struct multipath *m, struct priority_group *pg)
@@ -583,16 +580,17 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
* .request_fn stacked on blk-mq path(s) and
* blk-mq stacked on blk-mq path(s).
*/
*__clone = blk_mq_alloc_request(bdev_get_queue(bdev),
rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
if (IS_ERR(*__clone)) {
/* ENOMEM, requeue */
clone = blk_mq_alloc_request(bdev_get_queue(bdev),
rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
if (IS_ERR(clone)) {
/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
clear_request_fn_mpio(m, map_context);
return r;
}
(*__clone)->bio = (*__clone)->biotail = NULL;
(*__clone)->rq_disk = bdev->bd_disk;
(*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
clone->bio = clone->biotail = NULL;
clone->rq_disk = bdev->bd_disk;
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
*__clone = clone;
}
if (pgpath->pg->ps.type->start_io)
@@ -851,19 +849,23 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
retain:
attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
if (attached_handler_name) {
/*
* Clear any hw_handler_params associated with a
* handler that isn't already attached.
*/
if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) {
kfree(m->hw_handler_params);
m->hw_handler_params = NULL;
}
/*
* Reset hw_handler_name to match the attached handler
* and clear any hw_handler_params associated with the
* ignored handler.
*
* NB. This modifies the table line to show the actual
* handler instead of the original table passed in.
*/
kfree(m->hw_handler_name);
m->hw_handler_name = attached_handler_name;
kfree(m->hw_handler_params);
m->hw_handler_params = NULL;
}
}
@@ -1002,6 +1004,8 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
}
m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
if (!m->hw_handler_name)
return -EINVAL;
if (hw_argc > 1) {
char *p;
@@ -1362,7 +1366,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
(pgnum > m->nr_priority_groups)) {
!m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
DMWARN("invalid PG number supplied to switch_pg_num");
return -EINVAL;
}
@@ -1394,7 +1398,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
(pgnum > m->nr_priority_groups)) {
!m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
DMWARN("invalid PG number supplied to bypass_pg");
return -EINVAL;
}

View File

@@ -160,7 +160,6 @@ struct raid_dev {
CTR_FLAG_DAEMON_SLEEP | \
CTR_FLAG_MIN_RECOVERY_RATE | \
CTR_FLAG_MAX_RECOVERY_RATE | \
CTR_FLAG_MAX_WRITE_BEHIND | \
CTR_FLAG_STRIPE_CACHE | \
CTR_FLAG_REGION_SIZE | \
CTR_FLAG_DELTA_DISKS | \
@@ -171,7 +170,6 @@ struct raid_dev {
CTR_FLAG_DAEMON_SLEEP | \
CTR_FLAG_MIN_RECOVERY_RATE | \
CTR_FLAG_MAX_RECOVERY_RATE | \
CTR_FLAG_MAX_WRITE_BEHIND | \
CTR_FLAG_STRIPE_CACHE | \
CTR_FLAG_REGION_SIZE | \
CTR_FLAG_DELTA_DISKS | \
@@ -2050,16 +2048,17 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
mddev->reshape_position = MaxSector;
mddev->raid_disks = le32_to_cpu(sb->num_devices);
mddev->level = le32_to_cpu(sb->level);
mddev->layout = le32_to_cpu(sb->layout);
mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors);
/*
* Reshaping is supported, e.g. reshape_position is valid
* in superblock and superblock content is authoritative.
*/
if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) {
/* Superblock is authoritative wrt given raid set layout! */
mddev->raid_disks = le32_to_cpu(sb->num_devices);
mddev->level = le32_to_cpu(sb->level);
mddev->layout = le32_to_cpu(sb->layout);
mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors);
mddev->new_level = le32_to_cpu(sb->new_level);
mddev->new_layout = le32_to_cpu(sb->new_layout);
mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors);
@@ -2087,38 +2086,44 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
/*
* No takeover/reshaping, because we don't have the extended v1.9.0 metadata
*/
if (le32_to_cpu(sb->level) != mddev->new_level) {
DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)");
return -EINVAL;
}
if (le32_to_cpu(sb->layout) != mddev->new_layout) {
DMERR("Reshaping raid sets not yet supported. (raid layout change)");
DMERR(" 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout);
DMERR(" Old layout: %s w/ %d copies",
raid10_md_layout_to_format(le32_to_cpu(sb->layout)),
raid10_md_layout_to_copies(le32_to_cpu(sb->layout)));
DMERR(" New layout: %s w/ %d copies",
raid10_md_layout_to_format(mddev->layout),
raid10_md_layout_to_copies(mddev->layout));
return -EINVAL;
}
if (le32_to_cpu(sb->stripe_sectors) != mddev->new_chunk_sectors) {
DMERR("Reshaping raid sets not yet supported. (stripe sectors change)");
return -EINVAL;
}
struct raid_type *rt_cur = get_raid_type_by_ll(mddev->level, mddev->layout);
struct raid_type *rt_new = get_raid_type_by_ll(mddev->new_level, mddev->new_layout);
/* We can only change the number of devices in raid1 with old (i.e. pre 1.0.7) metadata */
if (!rt_is_raid1(rs->raid_type) &&
(le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
DMERR("Reshaping raid sets not yet supported. (device count change from %u to %u)",
sb->num_devices, mddev->raid_disks);
if (rs_takeover_requested(rs)) {
if (rt_cur && rt_new)
DMERR("Takeover raid sets from %s to %s not yet supported by metadata. (raid level change)",
rt_cur->name, rt_new->name);
else
DMERR("Takeover raid sets not yet supported by metadata. (raid level change)");
return -EINVAL;
} else if (rs_reshape_requested(rs)) {
DMERR("Reshaping raid sets not yet supported by metadata. (raid layout change keeping level)");
if (mddev->layout != mddev->new_layout) {
if (rt_cur && rt_new)
DMERR(" current layout %s vs new layout %s",
rt_cur->name, rt_new->name);
else
DMERR(" current layout 0x%X vs new layout 0x%X",
le32_to_cpu(sb->layout), mddev->new_layout);
}
if (mddev->chunk_sectors != mddev->new_chunk_sectors)
DMERR(" current stripe sectors %u vs new stripe sectors %u",
mddev->chunk_sectors, mddev->new_chunk_sectors);
if (rs->delta_disks)
DMERR(" current %u disks vs new %u disks",
mddev->raid_disks, mddev->raid_disks + rs->delta_disks);
if (rs_is_raid10(rs)) {
DMERR(" Old layout: %s w/ %u copies",
raid10_md_layout_to_format(mddev->layout),
raid10_md_layout_to_copies(mddev->layout));
DMERR(" New layout: %s w/ %u copies",
raid10_md_layout_to_format(mddev->new_layout),
raid10_md_layout_to_copies(mddev->new_layout));
}
return -EINVAL;
}
DMINFO("Discovered old metadata format; upgrading to extended metadata format");
/* Table line is checked vs. authoritative superblock */
rs_set_new(rs);
}
if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
@@ -2211,7 +2216,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
continue;
if (role != r->raid_disk) {
if (__is_raid10_near(mddev->layout)) {
if (rs_is_raid10(rs) && __is_raid10_near(mddev->layout)) {
if (mddev->raid_disks % __raid10_near_copies(mddev->layout) ||
rs->raid_disks % rs->raid10_copies) {
rs->ti->error =
@@ -2994,6 +2999,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
}
/* Disable/enable discard support on raid set. */
configure_discard_support(rs);
mddev_unlock(&rs->md);
return 0;
@@ -3580,12 +3588,6 @@ static int raid_preresume(struct dm_target *ti)
if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags))
rs_update_sbs(rs);
/*
* Disable/enable discard support on raid set after any
* conversion, because devices can have been added
*/
configure_discard_support(rs);
/* Load the bitmap from disk unless raid0 */
r = __load_dirty_region_bitmap(rs);
if (r)

View File

@@ -23,11 +23,7 @@ static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
#define RESERVED_REQUEST_BASED_IOS 256
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
#ifdef CONFIG_DM_MQ_DEFAULT
static bool use_blk_mq = true;
#else
static bool use_blk_mq = false;
#endif
static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT);
bool dm_use_blk_mq_default(void)
{
@@ -210,6 +206,9 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
*/
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
{
struct request_queue *q = md->queue;
unsigned long flags;
atomic_dec(&md->pending[rw]);
/* nudge anyone waiting on suspend queue */
@@ -222,8 +221,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
* back into ->request_fn() could deadlock attempting to grab the
* queue lock again.
*/
if (!md->queue->mq_ops && run_queue)
blk_run_queue_async(md->queue);
if (!q->mq_ops && run_queue) {
spin_lock_irqsave(q->queue_lock, flags);
blk_run_queue_async(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
/*
* dm_put() must be at the end of this function. See the comment above
@@ -798,7 +800,7 @@ static void dm_old_request_fn(struct request_queue *q)
pos = blk_rq_pos(rq);
if ((dm_old_request_peeked_before_merge_deadline(md) &&
md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
(ti->type->busy && ti->type->busy(ti))) {
blk_delay_queue(q, 10);

View File

@@ -871,7 +871,7 @@ static int dm_table_determine_type(struct dm_table *t)
{
unsigned i;
unsigned bio_based = 0, request_based = 0, hybrid = 0;
bool verify_blk_mq = false;
unsigned sq_count = 0, mq_count = 0;
struct dm_target *tgt;
struct dm_dev_internal *dd;
struct list_head *devices = dm_table_get_devices(t);
@@ -924,12 +924,6 @@ static int dm_table_determine_type(struct dm_table *t)
BUG_ON(!request_based); /* No targets in this table */
if (list_empty(devices) && __table_type_request_based(live_md_type)) {
/* inherit live MD type */
t->type = live_md_type;
return 0;
}
/*
* The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
* having a compatible target use dm_table_set_type.
@@ -948,6 +942,19 @@ verify_rq_based:
return -EINVAL;
}
if (list_empty(devices)) {
int srcu_idx;
struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
/* inherit live table's type and all_blk_mq */
if (live_table) {
t->type = live_table->type;
t->all_blk_mq = live_table->all_blk_mq;
}
dm_put_live_table(t->md, srcu_idx);
return 0;
}
/* Non-request-stackable devices can't be used for request-based dm */
list_for_each_entry(dd, devices, list) {
struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
@@ -959,19 +966,19 @@ verify_rq_based:
}
if (q->mq_ops)
verify_blk_mq = true;
mq_count++;
else
sq_count++;
}
if (sq_count && mq_count) {
DMERR("table load rejected: not all devices are blk-mq request-stackable");
return -EINVAL;
}
t->all_blk_mq = mq_count > 0;
if (verify_blk_mq) {
/* verify _all_ devices in the table are blk-mq devices */
list_for_each_entry(dd, devices, list)
if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
DMERR("table load rejected: not all devices"
" are blk-mq request-stackable");
return -EINVAL;
}
t->all_blk_mq = true;
if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) {
DMERR("table load rejected: all devices are not blk-mq request-stackable");
return -EINVAL;
}
return 0;

View File

@@ -868,7 +868,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
if (r) {
ti->error = "Data device lookup failed";
ti->error = "Hash device lookup failed";
goto bad;
}

View File

@@ -1886,9 +1886,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);
spin_lock_irq(q->queue_lock);
queue_flag_set(QUEUE_FLAG_DYING, q);
spin_unlock_irq(q->queue_lock);
blk_set_queue_dying(q);
if (dm_request_based(md) && md->kworker_task)
kthread_flush_worker(&md->kworker);

View File

@@ -700,13 +700,11 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_
{
int r;
unsigned i;
uint32_t nr_entries;
struct dm_btree_value_type *vt = &info->value_type;
BUG_ON(le32_to_cpu(ab->nr_entries));
BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
nr_entries = le32_to_cpu(ab->nr_entries);
for (i = 0; i < new_nr; i++) {
r = fn(base + i, element_at(info, ab, i), context);
if (r)

View File

@@ -18,6 +18,8 @@
/*----------------------------------------------------------------*/
#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
/*
* This is a read/write semaphore with a couple of differences.
*
@@ -302,6 +304,18 @@ static void report_recursive_bug(dm_block_t b, int r)
(unsigned long long) b);
}
#else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
#define bl_init(x) do { } while (0)
#define bl_down_read(x) 0
#define bl_down_read_nonblock(x) 0
#define bl_up_read(x) do { } while (0)
#define bl_down_write(x) 0
#define bl_up_write(x) do { } while (0)
#define report_recursive_bug(x, y) do { } while (0)
#endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
/*----------------------------------------------------------------*/
/*
@@ -330,8 +344,11 @@ EXPORT_SYMBOL_GPL(dm_block_data);
struct buffer_aux {
struct dm_block_validator *validator;
struct block_lock lock;
int write_locked;
#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
struct block_lock lock;
#endif
};
static void dm_block_manager_alloc_callback(struct dm_buffer *buf)

View File

@@ -464,7 +464,8 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
ll->nr_allocated--;
le32_add_cpu(&ie_disk.nr_free, 1);
ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
}
} else
*ev = SM_NONE;
return ll->save_ie(ll, index, &ie_disk);
}
@@ -547,7 +548,6 @@ static int metadata_ll_init_index(struct ll_disk *ll)
if (r < 0)
return r;
memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
ll->bitmap_root = dm_block_location(b);
dm_tm_unlock(ll->tm, b);

View File

@@ -775,16 +775,14 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
r = sm_ll_new_metadata(&smm->ll, tm);
if (r)
return r;
if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
r = sm_ll_extend(&smm->ll, nr_blocks);
if (r)
return r;
if (!r) {
if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
r = sm_ll_extend(&smm->ll, nr_blocks);
}
memcpy(&smm->sm, &ops, sizeof(smm->sm));
if (r)
return r;
/*
* Now we need to update the newly created data structures with the