Merge tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm from Dan Williams:
 "A rework of media error handling in the BTT driver and other updates.
  It has appeared in a few -next releases and collected some late-
  breaking build-error and warning fixups as a result.

  Summary:

   - Media error handling support in the Block Translation Table (BTT)
     driver is reworked to address sleeping-while-atomic locking and
     memory-allocation-context conflicts.

   - The dax_device lookup overhead for xfs and ext4 is moved out of the
     iomap hot-path to a mount-time lookup.

   - A new 'ecc_unit_size' sysfs attribute is added to advertise the
     read-modify-write boundary property of a persistent memory range.

   - Preparatory fix-ups for arm and powerpc pmem support are included
     along with other miscellaneous fixes"

* tag 'libnvdimm-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (26 commits)
  libnvdimm, btt: fix format string warnings
  libnvdimm, btt: clean up warning and error messages
  ext4: fix null pointer dereference on sbi
  libnvdimm, nfit: move the check on nd_reserved2 to the endpoint
  dax: fix FS_DAX=n BLOCK=y compilation
  libnvdimm: fix integer overflow static analysis warning
  libnvdimm, nd_blk: remove mmio_flush_range()
  libnvdimm, btt: rework error clearing
  libnvdimm: fix potential deadlock while clearing errors
  libnvdimm, btt: cache sector_size in arena_info
  libnvdimm, btt: ensure that flags were also unchanged during a map_read
  libnvdimm, btt: refactor map entry operations with macros
  libnvdimm, btt: fix a missed NVDIMM_IO_ATOMIC case in the write path
  libnvdimm, nfit: export an 'ecc_unit_size' sysfs attribute
  ext4: perform dax_device lookup at mount
  ext2: perform dax_device lookup at mount
  xfs: perform dax_device lookup at mount
  dax: introduce a fs_dax_get_by_bdev() helper
  libnvdimm, btt: check memory allocation failure
  libnvdimm, label: fix index block size calculation
  ...
This commit is contained in:
Linus Torvalds
2017-09-11 13:10:57 -07:00
33 changed files with 396 additions and 169 deletions

View File

@@ -31,6 +31,16 @@ enum log_ent_request {
LOG_OLD_ENT
};
static struct device *to_dev(struct arena_info *arena)
{
return &arena->nd_btt->dev;
}
static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset)
{
return offset + nd_btt->initial_offset;
}
static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
void *buf, size_t n, unsigned long flags)
{
@@ -38,7 +48,7 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets may be shifted from the base of the device */
offset += arena->nd_btt->initial_offset;
offset = adjust_initial_offset(nd_btt, offset);
return nvdimm_read_bytes(ndns, offset, buf, n, flags);
}
@@ -49,7 +59,7 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets may be shifted from the base of the device */
offset += arena->nd_btt->initial_offset;
offset = adjust_initial_offset(nd_btt, offset);
return nvdimm_write_bytes(ndns, offset, buf, n, flags);
}
@@ -62,8 +72,10 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
* We rely on that to make sure rw_bytes does error clearing
* correctly, so make sure that is the case.
*/
WARN_ON_ONCE(!IS_ALIGNED(arena->infooff, 512));
WARN_ON_ONCE(!IS_ALIGNED(arena->info2off, 512));
dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->infooff, 512),
"arena->infooff: %#llx is unaligned\n", arena->infooff);
dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->info2off, 512),
"arena->info2off: %#llx is unaligned\n", arena->info2off);
ret = arena_write_bytes(arena, arena->info2off, super,
sizeof(struct btt_sb), 0);
@@ -76,7 +88,6 @@ static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
{
WARN_ON(!super);
return arena_read_bytes(arena, arena->infooff, super,
sizeof(struct btt_sb), 0);
}
@@ -92,7 +103,10 @@ static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping,
{
u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
WARN_ON(lba >= arena->external_nlba);
if (unlikely(lba >= arena->external_nlba))
dev_err_ratelimited(to_dev(arena),
"%s: lba %#x out of range (max: %#x)\n",
__func__, lba, arena->external_nlba);
return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE, flags);
}
@@ -106,7 +120,7 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
* This 'mapping' is supposed to be just the LBA mapping, without
* any flags set, so strip the flag bits.
*/
mapping &= MAP_LBA_MASK;
mapping = ent_lba(mapping);
ze = (z_flag << 1) + e_flag;
switch (ze) {
@@ -131,7 +145,8 @@ static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
* construed as a valid 'normal' case, but we decide not to,
* to avoid confusion
*/
WARN_ONCE(1, "Invalid use of Z and E flags\n");
dev_err_ratelimited(to_dev(arena),
"Invalid use of Z and E flags\n");
return -EIO;
}
@@ -147,7 +162,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
u32 raw_mapping, postmap, ze, z_flag, e_flag;
u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
WARN_ON(lba >= arena->external_nlba);
if (unlikely(lba >= arena->external_nlba))
dev_err_ratelimited(to_dev(arena),
"%s: lba %#x out of range (max: %#x)\n",
__func__, lba, arena->external_nlba);
ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE, rwb_flags);
if (ret)
@@ -155,10 +173,10 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
raw_mapping = le32_to_cpu(in);
z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
z_flag = ent_z_flag(raw_mapping);
e_flag = ent_e_flag(raw_mapping);
ze = (z_flag << 1) + e_flag;
postmap = raw_mapping & MAP_LBA_MASK;
postmap = ent_lba(raw_mapping);
/* Reuse the {z,e}_flag variables for *trim and *error */
z_flag = 0;
@@ -195,7 +213,6 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
static int btt_log_read_pair(struct arena_info *arena, u32 lane,
struct log_entry *ent)
{
WARN_ON(!ent);
return arena_read_bytes(arena,
arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
2 * LOG_ENT_SIZE, 0);
@@ -299,11 +316,6 @@ static int btt_log_get_old(struct log_entry *ent)
return old;
}
static struct device *to_dev(struct arena_info *arena)
{
return &arena->nd_btt->dev;
}
/*
* This function copies the desired (old/new) log entry into ent if
* it is not NULL. It returns the sub-slot number (0 or 1)
@@ -381,7 +393,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
if (++(arena->freelist[lane].seq) == 4)
arena->freelist[lane].seq = 1;
arena->freelist[lane].block = le32_to_cpu(ent->old_map);
if (ent_e_flag(ent->old_map))
arena->freelist[lane].has_err = 1;
arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
return ret;
}
@@ -407,12 +421,14 @@ static int btt_map_init(struct arena_info *arena)
* make sure rw_bytes does error clearing correctly, so make sure that
* is the case.
*/
WARN_ON_ONCE(!IS_ALIGNED(arena->mapoff, 512));
dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->mapoff, 512),
"arena->mapoff: %#llx is unaligned\n", arena->mapoff);
while (mapsize) {
size_t size = min(mapsize, chunk_size);
WARN_ON_ONCE(size < 512);
dev_WARN_ONCE(to_dev(arena), size < 512,
"chunk size: %#zx is unaligned\n", size);
ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
size, 0);
if (ret)
@@ -449,12 +465,14 @@ static int btt_log_init(struct arena_info *arena)
* make sure rw_bytes does error clearing correctly, so make sure that
* is the case.
*/
WARN_ON_ONCE(!IS_ALIGNED(arena->logoff, 512));
dev_WARN_ONCE(to_dev(arena), !IS_ALIGNED(arena->logoff, 512),
"arena->logoff: %#llx is unaligned\n", arena->logoff);
while (logsize) {
size_t size = min(logsize, chunk_size);
WARN_ON_ONCE(size < 512);
dev_WARN_ONCE(to_dev(arena), size < 512,
"chunk size: %#zx is unaligned\n", size);
ret = arena_write_bytes(arena, arena->logoff + offset, zerobuf,
size, 0);
if (ret)
@@ -480,6 +498,40 @@ static int btt_log_init(struct arena_info *arena)
return ret;
}
static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
{
return arena->dataoff + ((u64)lba * arena->internal_lbasize);
}
static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
{
int ret = 0;
if (arena->freelist[lane].has_err) {
void *zero_page = page_address(ZERO_PAGE(0));
u32 lba = arena->freelist[lane].block;
u64 nsoff = to_namespace_offset(arena, lba);
unsigned long len = arena->sector_size;
mutex_lock(&arena->err_lock);
while (len) {
unsigned long chunk = min(len, PAGE_SIZE);
ret = arena_write_bytes(arena, nsoff, zero_page,
chunk, 0);
if (ret)
break;
len -= chunk;
nsoff += chunk;
if (len == 0)
arena->freelist[lane].has_err = 0;
}
mutex_unlock(&arena->err_lock);
}
return ret;
}
static int btt_freelist_init(struct arena_info *arena)
{
int old, new, ret;
@@ -505,6 +557,17 @@ static int btt_freelist_init(struct arena_info *arena)
arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
arena->freelist[i].block = le32_to_cpu(log_new.old_map);
/*
* FIXME: if error clearing fails during init, we want to make
* the BTT read-only
*/
if (ent_e_flag(log_new.old_map)) {
ret = arena_clear_freelist_error(arena, i);
if (ret)
dev_err_ratelimited(to_dev(arena),
"Unable to clear known errors\n");
}
/* This implies a newly created or untouched flog entry */
if (log_new.old_map == log_new.new_map)
continue;
@@ -525,7 +588,6 @@ static int btt_freelist_init(struct arena_info *arena)
if (ret)
return ret;
}
}
return 0;
@@ -566,6 +628,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
if (!arena)
return NULL;
arena->nd_btt = btt->nd_btt;
arena->sector_size = btt->sector_size;
if (!size)
return arena;
@@ -694,6 +757,7 @@ static int discover_arenas(struct btt *btt)
arena->external_lba_start = cur_nlba;
parse_arena_meta(arena, super, cur_off);
mutex_init(&arena->err_lock);
ret = btt_freelist_init(arena);
if (ret)
goto out;
@@ -904,11 +968,6 @@ static void unlock_map(struct arena_info *arena, u32 premap)
spin_unlock(&arena->map_locks[idx].lock);
}
static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
{
return arena->dataoff + ((u64)lba * arena->internal_lbasize);
}
static int btt_data_read(struct arena_info *arena, struct page *page,
unsigned int off, u32 lba, u32 len)
{
@@ -1032,6 +1091,7 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
*/
while (1) {
u32 new_map;
int new_t, new_e;
if (t_flag) {
zero_fill_data(page, off, cur_len);
@@ -1050,20 +1110,29 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
*/
barrier();
ret = btt_map_read(arena, premap, &new_map, &t_flag,
&e_flag, NVDIMM_IO_ATOMIC);
ret = btt_map_read(arena, premap, &new_map, &new_t,
&new_e, NVDIMM_IO_ATOMIC);
if (ret)
goto out_rtt;
if (postmap == new_map)
if ((postmap == new_map) && (t_flag == new_t) &&
(e_flag == new_e))
break;
postmap = new_map;
t_flag = new_t;
e_flag = new_e;
}
ret = btt_data_read(arena, page, off, postmap, cur_len);
if (ret)
if (ret) {
int rc;
/* Media error - set the e_flag */
rc = btt_map_write(arena, premap, postmap, 0, 1,
NVDIMM_IO_ATOMIC);
goto out_rtt;
}
if (bip) {
ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
@@ -1088,6 +1157,21 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
return ret;
}
/*
* Normally, arena_{read,write}_bytes will take care of the initial offset
* adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem,
* we need the final, raw namespace offset here
*/
static bool btt_is_badblock(struct btt *btt, struct arena_info *arena,
u32 postmap)
{
u64 nsoff = adjust_initial_offset(arena->nd_btt,
to_namespace_offset(arena, postmap));
sector_t phys_sector = nsoff >> 9;
return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize);
}
static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
sector_t sector, struct page *page, unsigned int off,
unsigned int len)
@@ -1100,7 +1184,9 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
while (len) {
u32 cur_len;
int e_flag;
retry:
lane = nd_region_acquire_lane(btt->nd_region);
ret = lba_to_arena(btt, sector, &premap, &arena);
@@ -1113,6 +1199,21 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
goto out_lane;
}
if (btt_is_badblock(btt, arena, arena->freelist[lane].block))
arena->freelist[lane].has_err = 1;
if (mutex_is_locked(&arena->err_lock)
|| arena->freelist[lane].has_err) {
nd_region_release_lane(btt->nd_region, lane);
ret = arena_clear_freelist_error(arena, lane);
if (ret)
return ret;
/* OK to acquire a different lane/free block */
goto retry;
}
new_postmap = arena->freelist[lane].block;
/* Wait if the new block is being read from */
@@ -1138,7 +1239,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
}
lock_map(arena, premap);
ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL,
ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag,
NVDIMM_IO_ATOMIC);
if (ret)
goto out_map;
@@ -1146,6 +1247,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
ret = -EIO;
goto out_map;
}
if (e_flag)
set_e_flag(old_postmap);
log.lba = cpu_to_le32(premap);
log.old_map = cpu_to_le32(old_postmap);
@@ -1156,13 +1259,20 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
if (ret)
goto out_map;
ret = btt_map_write(arena, premap, new_postmap, 0, 0, 0);
ret = btt_map_write(arena, premap, new_postmap, 0, 0,
NVDIMM_IO_ATOMIC);
if (ret)
goto out_map;
unlock_map(arena, premap);
nd_region_release_lane(btt->nd_region, lane);
if (e_flag) {
ret = arena_clear_freelist_error(arena, lane);
if (ret)
return ret;
}
len -= cur_len;
off += cur_len;
sector += btt->sector_size >> SECTOR_SHIFT;
@@ -1211,11 +1321,13 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
BUG_ON(len > PAGE_SIZE);
/* Make sure len is in multiples of sector size. */
/* XXX is this right? */
BUG_ON(len < btt->sector_size);
BUG_ON(len % btt->sector_size);
if (len > PAGE_SIZE || len < btt->sector_size ||
len % btt->sector_size) {
dev_err_ratelimited(&btt->nd_btt->dev,
"unaligned bio segment (len: %d)\n", len);
bio->bi_status = BLK_STS_IOERR;
break;
}
err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
op_is_write(bio_op(bio)), iter.bi_sector);
@@ -1345,6 +1457,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
{
int ret;
struct btt *btt;
struct nd_namespace_io *nsio;
struct device *dev = &nd_btt->dev;
btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
@@ -1358,6 +1471,8 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
INIT_LIST_HEAD(&btt->arena_list);
mutex_init(&btt->init_lock);
btt->nd_region = nd_region;
nsio = to_nd_namespace_io(&nd_btt->ndns->dev);
btt->phys_bb = &nsio->bb;
ret = discover_arenas(btt);
if (ret) {
@@ -1431,6 +1546,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
}
btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
if (!btt_sb)
return -ENOMEM;
/*
* If this returns < 0, that is ok as it just means there wasn't