net/mlx4_core: Enable device recovery flow with SRIOV

In SRIOV, both the PF and the VF may attempt device recovery whenever they
assume that the device is not functioning.  When the PF driver resets the
device, the VF should detect this and attempt to reinitialize itself.

The VF must be able to reset itself under all circumstances, even
if the PF is not responsive.

The VF shall reset itself in the following cases:

1. Commands are not processed within reasonable time over the communication channel.
This is done considering device state and the correct return code based on
the command as was done in the native mode, done in the next patch.

2. The VF driver receives an internal error event reported by the PF on the
communication channel. This occurs when the PF driver resets the device or
when VF is out of sync with the PF.

Add 'VF reset' capability, which allows the VF to reinitialize itself even when the
PF is not responsive.

As PF and VF may run their reset flow simulantanisly, there are several cases
that are handled:
- Prevent freeing VF resources upon FLR, when PF is in its unloading stage.
- Prevent PF getting VF commands before it has finished initializing its resources.
- Upon VF startup, check that comm-channel is online before sending
  commands to the PF and getting timed-out.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Yishai Hadas
2015-01-25 16:59:42 +02:00
committed by David S. Miller
parent 2ba5fbd62b
commit 55ad359225
8 changed files with 290 additions and 68 deletions

View File

@@ -42,6 +42,7 @@
#include <linux/mlx4/device.h>
#include <linux/semaphore.h>
#include <rdma/ib_smi.h>
#include <linux/delay.h>
#include <asm/io.h>
@@ -729,7 +730,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
EXPORT_SYMBOL_GPL(__mlx4_cmd);
static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
{
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
@@ -1945,8 +1946,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
break;
case MLX4_COMM_CMD_VHCR_POST:
if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
(slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST))
(slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) {
mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n",
slave, cmd, slave_state[slave].last_cmd);
goto reset_slave;
}
mutex_lock(&priv->cmd.slave_cmd_mutex);
if (mlx4_master_process_vhcr(dev, slave, NULL)) {
@@ -1980,7 +1984,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
reset_slave:
/* cleanup any slave resources */
mlx4_delete_all_resources_for_slave(dev, slave);
if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)
mlx4_delete_all_resources_for_slave(dev, slave);
if (cmd != MLX4_COMM_CMD_RESET) {
mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n",
slave, cmd);
/* Turn on internal error letting slave reset itself immeditaly,
* otherwise it might take till timeout on command is passed
*/
reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR);
}
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
if (!slave_state[slave].is_slave_going_down)
slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
@@ -2056,17 +2071,28 @@ void mlx4_master_comm_channel(struct work_struct *work)
static int sync_toggles(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int wr_toggle;
int rd_toggle;
u32 wr_toggle;
u32 rd_toggle;
unsigned long end;
wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31;
end = jiffies + msecs_to_jiffies(5000);
wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write));
if (wr_toggle == 0xffffffff)
end = jiffies + msecs_to_jiffies(30000);
else
end = jiffies + msecs_to_jiffies(5000);
while (time_before(jiffies, end)) {
rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31;
if (rd_toggle == wr_toggle) {
priv->cmd.comm_toggle = rd_toggle;
rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
/* PCI might be offline */
msleep(100);
wr_toggle = swab32(readl(&priv->mfunc.comm->
slave_write));
continue;
}
if (rd_toggle >> 31 == wr_toggle >> 31) {
priv->cmd.comm_toggle = rd_toggle >> 31;
return 0;
}
@@ -2172,13 +2198,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
if (mlx4_init_resource_tracker(dev))
goto err_thread;
err = mlx4_ARM_COMM_CHANNEL(dev);
if (err) {
mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
err);
goto err_resource;
}
} else {
err = sync_toggles(dev);
if (err) {
@@ -2188,8 +2207,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
}
return 0;
err_resource:
mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL);
err_thread:
flush_workqueue(priv->mfunc.master.comm_wq);
destroy_workqueue(priv->mfunc.master.comm_wq);
@@ -2266,6 +2283,27 @@ err:
return -ENOMEM;
}
void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int slave;
u32 slave_read;
/* Report an internal error event to all
* communication channels.
*/
for (slave = 0; slave < dev->num_slaves; slave++) {
slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read));
slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
__raw_writel((__force u32)cpu_to_be32(slave_read),
&priv->mfunc.comm[slave].slave_read);
/* Make sure that our comm channel write doesn't
* get mixed in with writes from another CPU.
*/
mmiowb();
}
}
void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2281,6 +2319,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
kfree(priv->mfunc.master.slave_state);
kfree(priv->mfunc.master.vf_admin);
kfree(priv->mfunc.master.vf_oper);
dev->num_slaves = 0;
}
iounmap(priv->mfunc.comm);