nd_blk: change aperture mapping from WC to WB

This should result in a pretty sizeable performance gain for reads.  For
rough comparison I did some simple read testing using PMEM to compare
reads of write combining (WC) mappings vs write-back (WB).  This was
done on a random lab machine.

PMEM reads from a write combining mapping:
	# dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000
	100000+0 records in
	100000+0 records out
	409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s

PMEM reads from a write-back mapping:
	# dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000
	1000000+0 records in
	1000000+0 records out
	4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s

To be able to safely support a write-back aperture I needed to add
support for the "read flush" _DSM flag, as outlined in the DSM spec:

http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

This flag tells the ND BLK driver that it needs to flush the cache lines
associated with the aperture after the aperture is moved but before any
new data is read.  This ensures that any stale cache lines from the
previous contents of the aperture will be discarded from the processor
cache, and the new data will be read properly from the DIMM.  We know
that the cache lines are clean and will be discarded without any
writeback because either a) the previous aperture operation was a read,
and we never modified the contents of the aperture, or b) the previous
aperture operation was a write and we must have written back the dirtied
contents of the aperture to the DIMM before the I/O was completed.

In order to add support for the "read flush" flag I needed to add a
generic routine to invalidate cache lines, mmio_flush_range().  This is
protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently
only supported on x86.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
Ross Zwisler
2015-08-27 13:14:20 -06:00
committed by Dan Williams
parent e2e05394e4
commit 67a3e8fe90
11 changed files with 88 additions and 36 deletions

View File

@@ -1017,7 +1017,7 @@ static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
if (mmio->num_lines)
offset = to_interleave_offset(offset, mmio);
return readq(mmio->base + offset);
return readq(mmio->addr.base + offset);
}
static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
@@ -1042,11 +1042,11 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
if (mmio->num_lines)
offset = to_interleave_offset(offset, mmio);
writeq(cmd, mmio->base + offset);
writeq(cmd, mmio->addr.base + offset);
wmb_blk(nfit_blk);
if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
readq(mmio->base + offset);
readq(mmio->addr.base + offset);
}
static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
@@ -1078,11 +1078,16 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
}
if (rw)
memcpy_to_pmem(mmio->aperture + offset,
memcpy_to_pmem(mmio->addr.aperture + offset,
iobuf + copied, c);
else
else {
if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
mmio_flush_range((void __force *)
mmio->addr.aperture + offset, c);
memcpy_from_pmem(iobuf + copied,
mmio->aperture + offset, c);
mmio->addr.aperture + offset, c);
}
copied += c;
len -= c;
@@ -1129,7 +1134,10 @@ static void nfit_spa_mapping_release(struct kref *kref)
WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
iounmap(spa_map->iomem);
if (spa_map->type == SPA_MAP_APERTURE)
memunmap((void __force *)spa_map->addr.aperture);
else
iounmap(spa_map->addr.base);
release_mem_region(spa->address, spa->length);
list_del(&spa_map->list);
kfree(spa_map);
@@ -1175,7 +1183,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
spa_map = find_spa_mapping(acpi_desc, spa);
if (spa_map) {
kref_get(&spa_map->kref);
return spa_map->iomem;
return spa_map->addr.base;
}
spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
@@ -1191,20 +1199,19 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
if (!res)
goto err_mem;
if (type == SPA_MAP_APERTURE) {
/*
* TODO: memremap_pmem() support, but that requires cache
* flushing when the aperture is moved.
*/
spa_map->iomem = ioremap_wc(start, n);
} else
spa_map->iomem = ioremap_nocache(start, n);
spa_map->type = type;
if (type == SPA_MAP_APERTURE)
spa_map->addr.aperture = (void __pmem *)memremap(start, n,
ARCH_MEMREMAP_PMEM);
else
spa_map->addr.base = ioremap_nocache(start, n);
if (!spa_map->iomem)
if (!spa_map->addr.base)
goto err_map;
list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
return spa_map->iomem;
return spa_map->addr.base;
err_map:
release_mem_region(start, n);
@@ -1267,7 +1274,7 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
nfit_blk->dimm_flags = flags.flags;
else if (rc == -ENOTTY) {
/* fall back to a conservative default */
nfit_blk->dimm_flags = ND_BLK_DCR_LATCH;
nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
rc = 0;
} else
rc = -ENXIO;
@@ -1307,9 +1314,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
/* map block aperture memory */
nfit_blk->bdw_offset = nfit_mem->bdw->offset;
mmio = &nfit_blk->mmio[BDW];
mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
SPA_MAP_APERTURE);
if (!mmio->base) {
if (!mmio->addr.base) {
dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
nvdimm_name(nvdimm));
return -ENOMEM;
@@ -1330,9 +1337,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
mmio = &nfit_blk->mmio[DCR];
mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
SPA_MAP_CONTROL);
if (!mmio->base) {
if (!mmio->addr.base) {
dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
nvdimm_name(nvdimm));
return -ENOMEM;
@@ -1399,7 +1406,7 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
for (i = 0; i < 2; i++) {
struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
if (mmio->base)
if (mmio->addr.base)
nfit_spa_unmap(acpi_desc, mmio->spa);
}
nd_blk_region_set_provider_data(ndbr, NULL);