lightnvm: pblk: add padding distribution sysfs attribute

When pblk receives a sync, all data up to that point in the write buffer
must be comitted to persistent storage, and as flash memory comes with a
minimal write size there is a significant cost involved both in terms
of time for completing the sync and in terms of write amplification
padded sectors for filling up to the minimal write size.

In order to get a better understanding of the costs involved for syncs,
Add a sysfs attribute to pblk: padded_dist, showing a normalized
distribution of sectors padded. In order to facilitate measurements of
specific workloads during the lifetime of the pblk instance, the
distribution can be reset by writing 0 to the attribute.

Do this by introducing counters for each possible padding:
{0..(minimal write size - 1)} and calculate the normalized distribution
when showing the attribute.

Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
Signed-off-by: Javier González <javier@cnexlabs.com>
Rearranged total_buckets statement in pblk_sysfs_get_padding_dist
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Tento commit je obsažen v:
Hans Holmberg
2018-03-30 00:04:55 +02:00
odevzdal Jens Axboe
rodič ff12581ec7
revize 5d149bfabe
4 změnil soubory, kde provedl 113 přidání a 13 odebrání

Zobrazit soubor

@@ -340,15 +340,62 @@ static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
}
static long long bucket_percentage(unsigned long long bucket,
unsigned long long total)
{
int p = bucket * 100;
p = div_u64(p, total);
return p;
}
static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
{
int sz = 0;
unsigned long long total;
unsigned long long total_buckets = 0;
int buckets = pblk->min_write_pgs - 1;
int i;
total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
if (!total) {
for (i = 0; i < (buckets + 1); i++)
sz += snprintf(page + sz, PAGE_SIZE - sz,
"%d:0 ", i);
sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
return sz;
}
for (i = 0; i < buckets; i++)
total_buckets += atomic64_read(&pblk->pad_dist[i]);
sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
bucket_percentage(total - total_buckets, total));
for (i = 0; i < buckets; i++) {
unsigned long long p;
p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
total);
sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
i + 1, p);
}
sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
return sz;
}
#ifdef CONFIG_NVM_DEBUG
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
{
return snprintf(page, PAGE_SIZE,
"%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
"%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
atomic_long_read(&pblk->inflight_writes),
atomic_long_read(&pblk->inflight_reads),
atomic_long_read(&pblk->req_writes),
atomic_long_read(&pblk->nr_flush),
(u64)atomic64_read(&pblk->nr_flush),
atomic_long_read(&pblk->padded_writes),
atomic_long_read(&pblk->padded_wb),
atomic_long_read(&pblk->sub_writes),
@@ -426,6 +473,32 @@ static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
}
static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
const char *page, size_t len)
{
size_t c_len;
int reset_value;
int buckets = pblk->min_write_pgs - 1;
int i;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
if (kstrtouint(page, 0, &reset_value))
return -EINVAL;
if (reset_value != 0)
return -EINVAL;
for (i = 0; i < buckets; i++)
atomic64_set(&pblk->pad_dist[i], 0);
pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
return len;
}
static struct attribute sys_write_luns = {
.name = "write_luns",
.mode = 0444,
@@ -486,6 +559,11 @@ static struct attribute sys_write_amp_trip = {
.mode = 0644,
};
static struct attribute sys_padding_dist = {
.name = "padding_dist",
.mode = 0644,
};
#ifdef CONFIG_NVM_DEBUG
static struct attribute sys_stats_debug_attr = {
.name = "stats",
@@ -506,6 +584,7 @@ static struct attribute *pblk_attrs[] = {
&sys_lines_info_attr,
&sys_write_amp_mileage,
&sys_write_amp_trip,
&sys_padding_dist,
#ifdef CONFIG_NVM_DEBUG
&sys_stats_debug_attr,
#endif
@@ -539,6 +618,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
return pblk_sysfs_get_write_amp_mileage(pblk, buf);
else if (strcmp(attr->name, "write_amp_trip") == 0)
return pblk_sysfs_get_write_amp_trip(pblk, buf);
else if (strcmp(attr->name, "padding_dist") == 0)
return pblk_sysfs_get_padding_dist(pblk, buf);
#ifdef CONFIG_NVM_DEBUG
else if (strcmp(attr->name, "stats") == 0)
return pblk_sysfs_stats_debug(pblk, buf);
@@ -557,6 +638,8 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
return pblk_sysfs_set_sec_per_write(pblk, buf, len);
else if (strcmp(attr->name, "write_amp_trip") == 0)
return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
else if (strcmp(attr->name, "padding_dist") == 0)
return pblk_sysfs_set_padding_dist(pblk, buf, len);
return 0;
}