workqueue: fix possible livelock with concurrent mod_delayed_work()
When mod_delayed_work() is concurrently executed, there a potential
live lock scenario due to pool->lock contention.
Lets say both CPU#0 and CPU#4 calls mod_delayed_work() on the same
work item with 0 delay on a bounded workqueue. This workitem has
run on CPU#4 previously. CPU#0 wins the work item PENDING bit race
and proceeds to queueing. As this work has previously run on CPU#4,
it tries to acquire the corresponding pool->lock to check if it is
still running there. In the meantime, CPU#4 loops in
try_to_grab_pending() for the workitem to be linked with a pwq so
that it can steal it from pwq->pool->worklist. The CPU#4 essentially
acquires and releases the pool->lock in a busy loop and CPU#0 may
never gets this lock.
---------------- --------------------
CPU#0 CPU#4
--------------- --------------------
blk_run_queue_async()
mod_delayed_work_on() queue_unplugged()
--> try_to_grab_pending() returns blk_run_queue_async()
0 indicating PENDING bit is set
now.
__queue_delayed_work() mod_delayed_work_on()
__queue_work() try_to_grab_pending()
{
--> waiting for the CPU#4's acquire pool->lock()
pool->lock release pool->lock()
}
Change-Id: I9aeab111f55a19478a9d045c8e3576bce3b7a7c5
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Elliot Berman <eberman@codeaurora.org>
This commit is contained in:
committed by
Elliot Berman
parent
243513a690
commit
82a4f9ff8b
@@ -50,6 +50,8 @@
|
|||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/sched/isolation.h>
|
#include <linux/sched/isolation.h>
|
||||||
#include <linux/nmi.h>
|
#include <linux/nmi.h>
|
||||||
|
#include <linux/bug.h>
|
||||||
|
#include <linux/delay.h>
|
||||||
|
|
||||||
#include "workqueue_internal.h"
|
#include "workqueue_internal.h"
|
||||||
|
|
||||||
@@ -1305,6 +1307,12 @@ fail:
|
|||||||
if (work_is_canceling(work))
|
if (work_is_canceling(work))
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
|
/*
|
||||||
|
* The queueing is in progress in another context. If we keep
|
||||||
|
* taking the pool->lock in a busy loop, the other context may
|
||||||
|
* never get the lock. Give 1 usec delay to avoid this contention.
|
||||||
|
*/
|
||||||
|
udelay(1);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user