IPoIB: Fix deadlock on RTNL between bcast join comp and ipoib_stop()
Taking rtnl_lock in ipoib_mcast_join_complete() causes a deadlock with
ipoib_stop(). We avoid it by scheduling the piece of code that takes
the lock on ipoib_workqueue instead of executing it directly. This
works because we only flush the ipoib_workqueue with the RTNL not held.
The deadlock happens because ipoib_stop() calls ipoib_ib_dev_down()
which calls ipoib_mcast_dev_flush(), which calls ipoib_mcast_free(),
which calls ipoib_mcast_leave(). The latter calls
ib_sa_free_multicast(), and this waits until the multicast completion
handler finishes. This handler is ipoib_mcast_join_complete(), which
waits for the rtnl_lock(), which was already taken by ipoib_stop().
This bug was introduced in commit a77a57a1
("IPoIB: Fix deadlock on
RTNL in ipoib_stop()").
Signed-off-by: Yossi Etigin <yosefe@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:

committed by
Roland Dreier

parent
1941246dd9
commit
e8224e4b80
@@ -366,6 +366,21 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ipoib_mcast_carrier_on_task(struct work_struct *work)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
|
||||
carrier_on_task);
|
||||
|
||||
/*
|
||||
* Take rtnl_lock to avoid racing with ipoib_stop() and
|
||||
* turning the carrier back on while a device is being
|
||||
* removed.
|
||||
*/
|
||||
rtnl_lock();
|
||||
netif_carrier_on(priv->dev);
|
||||
rtnl_unlock();
|
||||
}
|
||||
|
||||
static int ipoib_mcast_join_complete(int status,
|
||||
struct ib_sa_multicast *multicast)
|
||||
{
|
||||
@@ -392,16 +407,12 @@ static int ipoib_mcast_join_complete(int status,
|
||||
&priv->mcast_task, 0);
|
||||
mutex_unlock(&mcast_mutex);
|
||||
|
||||
if (mcast == priv->broadcast) {
|
||||
/*
|
||||
* Take RTNL lock here to avoid racing with
|
||||
* ipoib_stop() and turning the carrier back
|
||||
* on while a device is being removed.
|
||||
*/
|
||||
rtnl_lock();
|
||||
netif_carrier_on(dev);
|
||||
rtnl_unlock();
|
||||
}
|
||||
/*
|
||||
* Defer carrier on work to ipoib_workqueue to avoid a
|
||||
* deadlock on rtnl_lock here.
|
||||
*/
|
||||
if (mcast == priv->broadcast)
|
||||
queue_work(ipoib_workqueue, &priv->carrier_on_task);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user