mlx4: Adjust QP1 multiplexing for RoCE/SRIOV
This requires the following modifications: 1. Fix build_mlx4_header to properly fill in the ETH fields 2. Adjust mux and demux QP1 flow to support RoCE. This commit still assumes only one GID per slave for RoCE. The commit enabling multiple GIDs is a subsequent commit, and is done separately because of its complexity. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

父節點
36f6fdb749
當前提交
6ee51a4e86
@@ -315,7 +315,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
|
||||
}
|
||||
|
||||
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
|
||||
struct ib_mad *mad)
|
||||
struct ib_mad *mad)
|
||||
{
|
||||
u32 pv_cm_id;
|
||||
struct id_map_entry *id;
|
||||
@@ -323,6 +323,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
|
||||
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
|
||||
union ib_gid gid;
|
||||
|
||||
if (!slave)
|
||||
return 0;
|
||||
|
||||
gid = gid_from_req_msg(ibdev, mad);
|
||||
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
|
||||
if (*slave < 0) {
|
||||
@@ -341,7 +344,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
*slave = id->slave_id;
|
||||
if (slave)
|
||||
*slave = id->slave_id;
|
||||
set_remote_comm_id(mad, id->sl_cm_id);
|
||||
|
||||
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
|
||||
|
@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
|
||||
int ret = 0;
|
||||
u16 tun_pkey_ix;
|
||||
u16 cached_pkey;
|
||||
u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
|
||||
|
||||
if (dest_qpt > IB_QPT_GSI)
|
||||
return -EINVAL;
|
||||
@@ -509,6 +510,12 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
|
||||
* The driver will set the force loopback bit in post_send */
|
||||
memset(&attr, 0, sizeof attr);
|
||||
attr.port_num = port;
|
||||
if (is_eth) {
|
||||
ret = mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw);
|
||||
if (ret)
|
||||
return ret;
|
||||
attr.ah_flags = IB_AH_GRH;
|
||||
}
|
||||
ah = ib_create_ah(tun_ctx->pd, &attr);
|
||||
if (IS_ERR(ah))
|
||||
return -ENOMEM;
|
||||
@@ -580,6 +587,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
|
||||
int err;
|
||||
int slave;
|
||||
u8 *slave_id;
|
||||
int is_eth = 0;
|
||||
|
||||
if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
|
||||
is_eth = 0;
|
||||
else
|
||||
is_eth = 1;
|
||||
|
||||
if (is_eth) {
|
||||
if (!(wc->wc_flags & IB_WC_GRH)) {
|
||||
mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
|
||||
mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
|
||||
mlx4_ib_warn(ibdev, "failed matching grh\n");
|
||||
return -ENOENT;
|
||||
}
|
||||
if (slave >= dev->dev->caps.sqp_demux) {
|
||||
mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
|
||||
slave, dev->dev->caps.sqp_demux);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
|
||||
return 0;
|
||||
|
||||
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
|
||||
if (err)
|
||||
pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
|
||||
slave, err);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initially assume that this mad is for us */
|
||||
slave = mlx4_master_func_num(dev->dev);
|
||||
@@ -1260,12 +1302,8 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
|
||||
memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
|
||||
ah.ibah.device = ctx->ib_dev;
|
||||
mlx4_ib_query_ah(&ah.ibah, &ah_attr);
|
||||
if ((ah_attr.ah_flags & IB_AH_GRH) &&
|
||||
(ah_attr.grh.sgid_index != slave)) {
|
||||
mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
|
||||
slave, ah_attr.grh.sgid_index);
|
||||
return;
|
||||
}
|
||||
if (ah_attr.ah_flags & IB_AH_GRH)
|
||||
ah_attr.grh.sgid_index = slave;
|
||||
|
||||
mlx4_ib_send_to_wire(dev, slave, ctx->port,
|
||||
is_proxy_qp0(dev, wc->src_qp, slave) ?
|
||||
|
@@ -1842,9 +1842,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
||||
{
|
||||
struct ib_device *ib_dev = sqp->qp.ibqp.device;
|
||||
struct mlx4_wqe_mlx_seg *mlx = wqe;
|
||||
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
|
||||
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
|
||||
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
|
||||
struct net_device *ndev;
|
||||
union ib_gid sgid;
|
||||
u16 pkey;
|
||||
int send_size;
|
||||
@@ -1868,12 +1868,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
||||
/* When multi-function is enabled, the ib_core gid
|
||||
* indexes don't necessarily match the hw ones, so
|
||||
* we must use our own cache */
|
||||
sgid.global.subnet_prefix =
|
||||
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
|
||||
subnet_prefix;
|
||||
sgid.global.interface_id =
|
||||
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
|
||||
guid_cache[ah->av.ib.gid_index];
|
||||
err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
|
||||
be32_to_cpu(ah->av.ib.port_pd) >> 24,
|
||||
ah->av.ib.gid_index, &sgid.raw[0]);
|
||||
if (err)
|
||||
return err;
|
||||
} else {
|
||||
err = ib_get_cached_gid(ib_dev,
|
||||
be32_to_cpu(ah->av.ib.port_pd) >> 24,
|
||||
@@ -1902,6 +1901,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
||||
sqp->ud_header.grh.flow_label =
|
||||
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
|
||||
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
|
||||
if (is_eth)
|
||||
memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
|
||||
else {
|
||||
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
|
||||
/* When multi-function is enabled, the ib_core gid
|
||||
* indexes don't necessarily match the hw ones, so
|
||||
@@ -1917,6 +1919,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
||||
be32_to_cpu(ah->av.ib.port_pd) >> 24,
|
||||
ah->av.ib.gid_index,
|
||||
&sqp->ud_header.grh.source_gid);
|
||||
}
|
||||
memcpy(sqp->ud_header.grh.destination_gid.raw,
|
||||
ah->av.ib.dgid, 16);
|
||||
}
|
||||
@@ -1948,17 +1951,19 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
||||
}
|
||||
|
||||
if (is_eth) {
|
||||
u8 *smac;
|
||||
u8 smac[6];
|
||||
struct in6_addr in6;
|
||||
|
||||
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
|
||||
|
||||
mlx->sched_prio = cpu_to_be16(pcp);
|
||||
|
||||
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
|
||||
/* FIXME: cache smac value? */
|
||||
ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1];
|
||||
if (!ndev)
|
||||
return -ENODEV;
|
||||
smac = ndev->dev_addr;
|
||||
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
|
||||
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
|
||||
memcpy(&in6, sgid.raw, sizeof(in6));
|
||||
rdma_get_ll_mac(&in6, smac);
|
||||
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
|
||||
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
|
||||
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
|
||||
|
Reference in New Issue
Block a user