dm kcopyd: introduce configurable throttling

This patch allows the administrator to reduce the rate at which kcopyd
issues I/O.

Each module that uses kcopyd acquires a throttle parameter that can be
set in /sys/module/*/parameters.

We maintain a history of kcopyd usage by each module in the variables
io_period and total_period in struct dm_kcopyd_throttle. The actual
kcopyd activity is calculated as a percentage of time equal to
"(100 * io_period / total_period)".  This is compared with the user-defined
throttle percentage threshold and if it is exceeded, we sleep.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
This commit is contained in:
Mikulas Patocka
2013-03-01 22:45:49 +00:00
committed by Alasdair G Kergon
parent a26062416e
commit df5d2e9089
5 changed files with 156 additions and 5 deletions

View File

@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/delay.h>
#include <linux/device-mapper.h>
#include <linux/dm-kcopyd.h>
@@ -51,6 +52,8 @@ struct dm_kcopyd_client {
struct workqueue_struct *kcopyd_wq;
struct work_struct kcopyd_work;
struct dm_kcopyd_throttle *throttle;
/*
* We maintain three lists of jobs:
*
@@ -68,6 +71,117 @@ struct dm_kcopyd_client {
static struct page_list zero_page_list;
static DEFINE_SPINLOCK(throttle_spinlock);
/*
* IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
* When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
* by 2.
*/
#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ
/*
* Sleep this number of milliseconds.
*
* The value was decided experimentally.
* Smaller values seem to cause an increased copy rate above the limit.
* The reason for this is unknown but possibly due to jiffies rounding errors
* or read/write cache inside the disk.
*/
#define SLEEP_MSEC 100
/*
* Maximum number of sleep events. There is a theoretical livelock if more
* kcopyd clients do work simultaneously which this limit avoids.
*/
#define MAX_SLEEPS 10
static void io_job_start(struct dm_kcopyd_throttle *t)
{
unsigned throttle, now, difference;
int slept = 0, skew;
if (unlikely(!t))
return;
try_again:
spin_lock_irq(&throttle_spinlock);
throttle = ACCESS_ONCE(t->throttle);
if (likely(throttle >= 100))
goto skip_limit;
now = jiffies;
difference = now - t->last_jiffies;
t->last_jiffies = now;
if (t->num_io_jobs)
t->io_period += difference;
t->total_period += difference;
/*
* Maintain sane values if we got a temporary overflow.
*/
if (unlikely(t->io_period > t->total_period))
t->io_period = t->total_period;
if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
t->total_period >>= shift;
t->io_period >>= shift;
}
skew = t->io_period - throttle * t->total_period / 100;
if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
slept++;
spin_unlock_irq(&throttle_spinlock);
msleep(SLEEP_MSEC);
goto try_again;
}
skip_limit:
t->num_io_jobs++;
spin_unlock_irq(&throttle_spinlock);
}
static void io_job_finish(struct dm_kcopyd_throttle *t)
{
unsigned long flags;
if (unlikely(!t))
return;
spin_lock_irqsave(&throttle_spinlock, flags);
t->num_io_jobs--;
if (likely(ACCESS_ONCE(t->throttle) >= 100))
goto skip_limit;
if (!t->num_io_jobs) {
unsigned now, difference;
now = jiffies;
difference = now - t->last_jiffies;
t->last_jiffies = now;
t->io_period += difference;
t->total_period += difference;
/*
* Maintain sane values if we got a temporary overflow.
*/
if (unlikely(t->io_period > t->total_period))
t->io_period = t->total_period;
}
skip_limit:
spin_unlock_irqrestore(&throttle_spinlock, flags);
}
static void wake(struct dm_kcopyd_client *kc)
{
queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
@@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context)
struct kcopyd_job *job = (struct kcopyd_job *) context;
struct dm_kcopyd_client *kc = job->kc;
io_job_finish(kc->throttle);
if (error) {
if (job->rw & WRITE)
job->write_err |= error;
@@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job)
.client = job->kc->io_client,
};
io_job_start(job->kc->throttle);
if (job->rw == READ)
r = dm_io(&io_req, 1, &job->source, NULL);
else
@@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
/*-----------------------------------------------------------------
* Client setup
*---------------------------------------------------------------*/
struct dm_kcopyd_client *dm_kcopyd_client_create(void)
struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
{
int r = -ENOMEM;
struct dm_kcopyd_client *kc;
@@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void)
INIT_LIST_HEAD(&kc->complete_jobs);
INIT_LIST_HEAD(&kc->io_jobs);
INIT_LIST_HEAD(&kc->pages_jobs);
kc->throttle = throttle;
kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
if (!kc->job_pool)