Merge tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm and dax updates from Dan Williams:
 "Save for a few late fixes, all of these commits have shipped in -next
  releases since before the merge window opened, and 0day has given a
  build success notification.

  The ext4 touches came from Jan, and the xfs touches have Darrick's
  reviewed-by. An xfstest for the MAP_SYNC feature has been through
  a few round of reviews and is on track to be merged.

   - Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable
     'userspace flush' of persistent memory updates via filesystem-dax
     mappings. It arranges for any filesystem metadata updates that may
     be required to satisfy a write fault to also be flushed ("on disk")
     before the kernel returns to userspace from the fault handler.
     Effectively every write-fault that dirties metadata completes an
     fsync() before returning from the fault handler. The new
     MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag
     is validated as supported by the filesystem's ->mmap() file
     operation.

   - Add support for the standard ACPI 6.2 label access methods that
     replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods.
     This enables interoperability with environments that only implement
     the standardized methods.

   - Add support for the ACPI 6.2 NVDIMM media error injection methods.

   - Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for
     latch last shutdown status, firmware update, SMART error injection,
     and SMART alarm threshold control.

   - Cleanup physical address information disclosures to be root-only.

   - Fix revalidation of the DIMM "locked label area" status to support
     dynamic unlock of the label area.

   - Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA
     (system-physical-address) command and error injection commands.

  Acknowledgements that came after the commits were pushed to -next:

   - 957ac8c421 ("dax: fix PMD faults on zero-length files"):
       Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

   - a39e596baa ("xfs: support for synchronous DAX faults") and
     7b565c9f96 ("xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()")
        Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>"

* tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (49 commits)
  acpi, nfit: add 'Enable Latch System Shutdown Status' command support
  dax: fix general protection fault in dax_alloc_inode
  dax: fix PMD faults on zero-length files
  dax: stop requiring a live device for dax_flush()
  brd: remove dax support
  dax: quiet bdev_dax_supported()
  fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core
  tools/testing/nvdimm: unit test clear-error commands
  acpi, nfit: validate commands against the device type
  tools/testing/nvdimm: stricter bounds checking for error injection commands
  xfs: support for synchronous DAX faults
  xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()
  ext4: Support for synchronous DAX faults
  ext4: Simplify error handling in ext4_dax_huge_fault()
  dax: Implement dax_finish_sync_fault()
  dax, iomap: Add support for synchronous faults
  mm: Define MAP_SYNC and VM_SYNC flags
  dax: Allow tuning whether dax_insert_mapping_entry() dirties entry
  dax: Allow dax_iomap_fault() to return pfn
  dax: Fix comment describing dax_iomap_fault()
  ...
This commit is contained in:
Linus Torvalds
2017-11-17 09:51:57 -08:00
48 changed files with 1407 additions and 562 deletions

View File

@@ -70,6 +70,7 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
libnvdimm-y += $(NVDIMM_SRC)/region.o
libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
libnvdimm-y += $(NVDIMM_SRC)/label.o
libnvdimm-y += $(NVDIMM_SRC)/badrange.o
libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o

View File

@@ -168,8 +168,12 @@ struct nfit_test {
spinlock_t lock;
} ars_state;
struct device *dimm_dev[NUM_DCR];
struct badrange badrange;
struct work_struct work;
};
static struct workqueue_struct *nfit_wq;
static struct nfit_test *to_nfit_test(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
@@ -234,48 +238,68 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
return rc;
}
#define NFIT_TEST_ARS_RECORDS 4
#define NFIT_TEST_CLEAR_ERR_UNIT 256
static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
unsigned int buf_len)
{
int ars_recs;
if (buf_len < sizeof(*nd_cmd))
return -EINVAL;
/* for testing, only store up to n records that fit within 4k */
ars_recs = SZ_4K / sizeof(struct nd_ars_record);
nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+ NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record);
+ ars_recs * sizeof(struct nd_ars_record);
nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT;
return 0;
}
/*
* Initialize the ars_state to return an ars_result 1 second in the future with
* a 4K error range in the middle of the requested address range.
*/
static void post_ars_status(struct ars_state *ars_state, u64 addr, u64 len)
static void post_ars_status(struct ars_state *ars_state,
struct badrange *badrange, u64 addr, u64 len)
{
struct nd_cmd_ars_status *ars_status;
struct nd_ars_record *ars_record;
struct badrange_entry *be;
u64 end = addr + len - 1;
int i = 0;
ars_state->deadline = jiffies + 1*HZ;
ars_status = ars_state->ars_status;
ars_status->status = 0;
ars_status->out_length = sizeof(struct nd_cmd_ars_status)
+ sizeof(struct nd_ars_record);
ars_status->address = addr;
ars_status->length = len;
ars_status->type = ND_ARS_PERSISTENT;
ars_status->num_records = 1;
ars_record = &ars_status->records[0];
ars_record->handle = 0;
ars_record->err_address = addr + len / 2;
ars_record->length = SZ_4K;
spin_lock(&badrange->lock);
list_for_each_entry(be, &badrange->list, list) {
u64 be_end = be->start + be->length - 1;
u64 rstart, rend;
/* skip entries outside the range */
if (be_end < addr || be->start > end)
continue;
rstart = (be->start < addr) ? addr : be->start;
rend = (be_end < end) ? be_end : end;
ars_record = &ars_status->records[i];
ars_record->handle = 0;
ars_record->err_address = rstart;
ars_record->length = rend - rstart + 1;
i++;
}
spin_unlock(&badrange->lock);
ars_status->num_records = i;
ars_status->out_length = sizeof(struct nd_cmd_ars_status)
+ i * sizeof(struct nd_ars_record);
}
static int nfit_test_cmd_ars_start(struct ars_state *ars_state,
static int nfit_test_cmd_ars_start(struct nfit_test *t,
struct ars_state *ars_state,
struct nd_cmd_ars_start *ars_start, unsigned int buf_len,
int *cmd_rc)
{
@@ -289,7 +313,7 @@ static int nfit_test_cmd_ars_start(struct ars_state *ars_state,
} else {
ars_start->status = 0;
ars_start->scrub_time = 1;
post_ars_status(ars_state, ars_start->address,
post_ars_status(ars_state, &t->badrange, ars_start->address,
ars_start->length);
*cmd_rc = 0;
}
@@ -320,7 +344,8 @@ static int nfit_test_cmd_ars_status(struct ars_state *ars_state,
return 0;
}
static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err,
static int nfit_test_cmd_clear_error(struct nfit_test *t,
struct nd_cmd_clear_error *clear_err,
unsigned int buf_len, int *cmd_rc)
{
const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1;
@@ -330,18 +355,91 @@ static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err,
if ((clear_err->address & mask) || (clear_err->length & mask))
return -EINVAL;
/*
* Report 'all clear' success for all commands even though a new
* scrub will find errors again. This is enough to have the
* error removed from the 'badblocks' tracking in the pmem
* driver.
*/
badrange_forget(&t->badrange, clear_err->address, clear_err->length);
clear_err->status = 0;
clear_err->cleared = clear_err->length;
*cmd_rc = 0;
return 0;
}
struct region_search_spa {
u64 addr;
struct nd_region *region;
};
static int is_region_device(struct device *dev)
{
return !strncmp(dev->kobj.name, "region", 6);
}
static int nfit_test_search_region_spa(struct device *dev, void *data)
{
struct region_search_spa *ctx = data;
struct nd_region *nd_region;
resource_size_t ndr_end;
if (!is_region_device(dev))
return 0;
nd_region = to_nd_region(dev);
ndr_end = nd_region->ndr_start + nd_region->ndr_size;
if (ctx->addr >= nd_region->ndr_start && ctx->addr < ndr_end) {
ctx->region = nd_region;
return 1;
}
return 0;
}
static int nfit_test_search_spa(struct nvdimm_bus *bus,
struct nd_cmd_translate_spa *spa)
{
int ret;
struct nd_region *nd_region = NULL;
struct nvdimm *nvdimm = NULL;
struct nd_mapping *nd_mapping = NULL;
struct region_search_spa ctx = {
.addr = spa->spa,
.region = NULL,
};
u64 dpa;
ret = device_for_each_child(&bus->dev, &ctx,
nfit_test_search_region_spa);
if (!ret)
return -ENODEV;
nd_region = ctx.region;
dpa = ctx.addr - nd_region->ndr_start;
/*
* last dimm is selected for test
*/
nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
nvdimm = nd_mapping->nvdimm;
spa->devices[0].nfit_device_handle = handle[nvdimm->id];
spa->num_nvdimms = 1;
spa->devices[0].dpa = dpa;
return 0;
}
static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
struct nd_cmd_translate_spa *spa, unsigned int buf_len)
{
if (buf_len < spa->translate_length)
return -EINVAL;
if (nfit_test_search_spa(bus, spa) < 0 || !spa->num_nvdimms)
spa->status = 2;
return 0;
}
static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len)
{
static const struct nd_smart_payload smart_data = {
@@ -378,6 +476,93 @@ static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t,
return 0;
}
static void uc_error_notify(struct work_struct *work)
{
struct nfit_test *t = container_of(work, typeof(*t), work);
__acpi_nfit_notify(&t->pdev.dev, t, NFIT_NOTIFY_UC_MEMORY_ERROR);
}
static int nfit_test_cmd_ars_error_inject(struct nfit_test *t,
struct nd_cmd_ars_err_inj *err_inj, unsigned int buf_len)
{
int rc;
if (buf_len != sizeof(*err_inj)) {
rc = -EINVAL;
goto err;
}
if (err_inj->err_inj_spa_range_length <= 0) {
rc = -EINVAL;
goto err;
}
rc = badrange_add(&t->badrange, err_inj->err_inj_spa_range_base,
err_inj->err_inj_spa_range_length);
if (rc < 0)
goto err;
if (err_inj->err_inj_options & (1 << ND_ARS_ERR_INJ_OPT_NOTIFY))
queue_work(nfit_wq, &t->work);
err_inj->status = 0;
return 0;
err:
err_inj->status = NFIT_ARS_INJECT_INVALID;
return rc;
}
static int nfit_test_cmd_ars_inject_clear(struct nfit_test *t,
struct nd_cmd_ars_err_inj_clr *err_clr, unsigned int buf_len)
{
int rc;
if (buf_len != sizeof(*err_clr)) {
rc = -EINVAL;
goto err;
}
if (err_clr->err_inj_clr_spa_range_length <= 0) {
rc = -EINVAL;
goto err;
}
badrange_forget(&t->badrange, err_clr->err_inj_clr_spa_range_base,
err_clr->err_inj_clr_spa_range_length);
err_clr->status = 0;
return 0;
err:
err_clr->status = NFIT_ARS_INJECT_INVALID;
return rc;
}
static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
struct nd_cmd_ars_err_inj_stat *err_stat,
unsigned int buf_len)
{
struct badrange_entry *be;
int max = SZ_4K / sizeof(struct nd_error_stat_query_record);
int i = 0;
err_stat->status = 0;
spin_lock(&t->badrange.lock);
list_for_each_entry(be, &t->badrange.list, list) {
err_stat->record[i].err_inj_stat_spa_range_base = be->start;
err_stat->record[i].err_inj_stat_spa_range_length = be->length;
i++;
if (i > max)
break;
}
spin_unlock(&t->badrange.lock);
err_stat->inj_err_rec_count = i;
return 0;
}
static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -449,6 +634,38 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
}
} else {
struct ars_state *ars_state = &t->ars_state;
struct nd_cmd_pkg *call_pkg = buf;
if (!nd_desc)
return -ENOTTY;
if (cmd == ND_CMD_CALL) {
func = call_pkg->nd_command;
buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
buf = (void *) call_pkg->nd_payload;
switch (func) {
case NFIT_CMD_TRANSLATE_SPA:
rc = nfit_test_cmd_translate_spa(
acpi_desc->nvdimm_bus, buf, buf_len);
return rc;
case NFIT_CMD_ARS_INJECT_SET:
rc = nfit_test_cmd_ars_error_inject(t, buf,
buf_len);
return rc;
case NFIT_CMD_ARS_INJECT_CLEAR:
rc = nfit_test_cmd_ars_inject_clear(t, buf,
buf_len);
return rc;
case NFIT_CMD_ARS_INJECT_GET:
rc = nfit_test_cmd_ars_inject_status(t, buf,
buf_len);
return rc;
default:
return -ENOTTY;
}
}
if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
return -ENOTTY;
@@ -458,15 +675,15 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
rc = nfit_test_cmd_ars_cap(buf, buf_len);
break;
case ND_CMD_ARS_START:
rc = nfit_test_cmd_ars_start(ars_state, buf, buf_len,
cmd_rc);
rc = nfit_test_cmd_ars_start(t, ars_state, buf,
buf_len, cmd_rc);
break;
case ND_CMD_ARS_STATUS:
rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len,
cmd_rc);
break;
case ND_CMD_CLEAR_ERROR:
rc = nfit_test_cmd_clear_error(buf, buf_len, cmd_rc);
rc = nfit_test_cmd_clear_error(t, buf, buf_len, cmd_rc);
break;
default:
return -ENOTTY;
@@ -566,10 +783,9 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
static int ars_state_init(struct device *dev, struct ars_state *ars_state)
{
/* for testing, only store up to n records that fit within 4k */
ars_state->ars_status = devm_kzalloc(dev,
sizeof(struct nd_cmd_ars_status)
+ sizeof(struct nd_ars_record) * NFIT_TEST_ARS_RECORDS,
GFP_KERNEL);
sizeof(struct nd_cmd_ars_status) + SZ_4K, GFP_KERNEL);
if (!ars_state->ars_status)
return -ENOMEM;
spin_lock_init(&ars_state->lock);
@@ -1419,7 +1635,8 @@ static void nfit_test0_setup(struct nfit_test *t)
+ i * sizeof(u64);
}
post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
SPA0_SIZE);
acpi_desc = &t->acpi_desc;
set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
@@ -1430,7 +1647,12 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
}
static void nfit_test1_setup(struct nfit_test *t)
@@ -1520,7 +1742,8 @@ static void nfit_test1_setup(struct nfit_test *t)
dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0;
post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE);
post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
SPA2_SIZE);
acpi_desc = &t->acpi_desc;
set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
@@ -1589,6 +1812,7 @@ static int nfit_ctl_test(struct device *dev)
unsigned long mask, cmd_size, offset;
union {
struct nd_cmd_get_config_size cfg_size;
struct nd_cmd_clear_error clear_err;
struct nd_cmd_ars_status ars_stat;
struct nd_cmd_ars_cap ars_cap;
char buf[sizeof(struct nd_cmd_ars_status)
@@ -1613,10 +1837,15 @@ static int nfit_ctl_test(struct device *dev)
.cmd_mask = 1UL << ND_CMD_ARS_CAP
| 1UL << ND_CMD_ARS_START
| 1UL << ND_CMD_ARS_STATUS
| 1UL << ND_CMD_CLEAR_ERROR,
| 1UL << ND_CMD_CLEAR_ERROR
| 1UL << ND_CMD_CALL,
.module = THIS_MODULE,
.provider_name = "ACPI.NFIT",
.ndctl = acpi_nfit_ctl,
.bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
| 1UL << NFIT_CMD_ARS_INJECT_SET
| 1UL << NFIT_CMD_ARS_INJECT_CLEAR
| 1UL << NFIT_CMD_ARS_INJECT_GET,
},
.dev = &adev->dev,
};
@@ -1767,6 +1996,23 @@ static int nfit_ctl_test(struct device *dev)
return -EIO;
}
/* test clear error */
cmd_size = sizeof(cmds.clear_err);
cmds.clear_err = (struct nd_cmd_clear_error) {
.length = 512,
.cleared = 512,
};
rc = setup_result(cmds.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
cmds.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
__func__, __LINE__, rc, cmd_rc);
return -EIO;
}
return 0;
}
@@ -1915,6 +2161,10 @@ static __init int nfit_test_init(void)
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
nfit_wq = create_singlethread_workqueue("nfit");
if (!nfit_wq)
return -ENOMEM;
nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
if (IS_ERR(nfit_test_dimm)) {
rc = PTR_ERR(nfit_test_dimm);
@@ -1931,6 +2181,7 @@ static __init int nfit_test_init(void)
goto err_register;
}
INIT_LIST_HEAD(&nfit_test->resources);
badrange_init(&nfit_test->badrange);
switch (i) {
case 0:
nfit_test->num_pm = NUM_PM;
@@ -1966,6 +2217,7 @@ static __init int nfit_test_init(void)
goto err_register;
instances[i] = nfit_test;
INIT_WORK(&nfit_test->work, uc_error_notify);
}
rc = platform_driver_register(&nfit_test_driver);
@@ -1974,6 +2226,7 @@ static __init int nfit_test_init(void)
return 0;
err_register:
destroy_workqueue(nfit_wq);
for (i = 0; i < NUM_NFITS; i++)
if (instances[i])
platform_device_unregister(&instances[i]->pdev);
@@ -1989,6 +2242,8 @@ static __exit void nfit_test_exit(void)
{
int i;
flush_workqueue(nfit_wq);
destroy_workqueue(nfit_wq);
for (i = 0; i < NUM_NFITS; i++)
platform_device_unregister(&instances[i]->pdev);
platform_driver_unregister(&nfit_test_driver);

View File

@@ -32,6 +32,58 @@ struct nfit_test_resource {
void *buf;
};
#define ND_TRANSLATE_SPA_STATUS_INVALID_SPA 2
#define NFIT_ARS_INJECT_INVALID 2
enum err_inj_options {
ND_ARS_ERR_INJ_OPT_NOTIFY = 0,
};
/* nfit commands */
enum nfit_cmd_num {
NFIT_CMD_TRANSLATE_SPA = 5,
NFIT_CMD_ARS_INJECT_SET = 7,
NFIT_CMD_ARS_INJECT_CLEAR = 8,
NFIT_CMD_ARS_INJECT_GET = 9,
};
struct nd_cmd_translate_spa {
__u64 spa;
__u32 status;
__u8 flags;
__u8 _reserved[3];
__u64 translate_length;
__u32 num_nvdimms;
struct nd_nvdimm_device {
__u32 nfit_device_handle;
__u32 _reserved;
__u64 dpa;
} __packed devices[0];
} __packed;
struct nd_cmd_ars_err_inj {
__u64 err_inj_spa_range_base;
__u64 err_inj_spa_range_length;
__u8 err_inj_options;
__u32 status;
} __packed;
struct nd_cmd_ars_err_inj_clr {
__u64 err_inj_clr_spa_range_base;
__u64 err_inj_clr_spa_range_length;
__u32 status;
} __packed;
struct nd_cmd_ars_err_inj_stat {
__u32 status;
__u32 inj_err_rec_count;
struct nd_error_stat_query_record {
__u64 err_inj_stat_spa_range_base;
__u64 err_inj_stat_spa_range_length;
} __packed record[0];
} __packed;
union acpi_object;
typedef void *acpi_handle;