IB/mlx4: Add support for IBoE
Add support for IBoE to mlx4_ib. The bulk of the code is handling the new address vector fields; mlx4 needs the MAC address of a remote node to include it in a WQE (for datagrams) or in the QP context (for connected QPs). Address resolution is done by assuming all unicast GIDs are either link-local IPv6 addresses. Multicast group attach/detach needs to update the NIC's multicast filters; but since attaching a QP to a multicast group can be done before the QP is bound to a port, for IBoE we need to keep track of all multicast groups that a QP is attached too before it transitions from INIT to RTR (since it does not have a port in the INIT state). Signed-off-by: Eli Cohen <eli@mellanox.co.il> [ Many things cleaned up and otherwise monkeyed with; hope I didn't introduce too many bugs. - Roland ] Signed-off-by: Roland Dreier <rolandd@cisco.com>
Tento commit je obsažen v:
@@ -30,66 +30,153 @@
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <rdma/ib_addr.h>
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include "mlx4_ib.h"
|
||||
|
||||
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
|
||||
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
|
||||
u8 *mac, int *is_mcast, u8 port)
|
||||
{
|
||||
struct in6_addr in6;
|
||||
|
||||
*is_mcast = 0;
|
||||
|
||||
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
|
||||
if (rdma_link_local_addr(&in6))
|
||||
rdma_get_ll_mac(&in6, mac);
|
||||
else if (rdma_is_multicast_addr(&in6)) {
|
||||
rdma_get_mcast_mac(&in6, mac);
|
||||
*is_mcast = 1;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
|
||||
struct mlx4_ib_ah *ah)
|
||||
{
|
||||
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
|
||||
struct mlx4_ib_ah *ah;
|
||||
|
||||
ah = kmalloc(sizeof *ah, GFP_ATOMIC);
|
||||
ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
|
||||
ah->av.ib.g_slid = ah_attr->src_path_bits;
|
||||
if (ah_attr->ah_flags & IB_AH_GRH) {
|
||||
ah->av.ib.g_slid |= 0x80;
|
||||
ah->av.ib.gid_index = ah_attr->grh.sgid_index;
|
||||
ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
|
||||
ah->av.ib.sl_tclass_flowlabel |=
|
||||
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
|
||||
ah_attr->grh.flow_label);
|
||||
memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
|
||||
}
|
||||
|
||||
ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
|
||||
if (ah_attr->static_rate) {
|
||||
ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
|
||||
while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
|
||||
!(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
|
||||
--ah->av.ib.stat_rate;
|
||||
}
|
||||
ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
|
||||
|
||||
return &ah->ibah;
|
||||
}
|
||||
|
||||
static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
|
||||
struct mlx4_ib_ah *ah)
|
||||
{
|
||||
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
|
||||
struct mlx4_dev *dev = ibdev->dev;
|
||||
u8 mac[6];
|
||||
int err;
|
||||
int is_mcast;
|
||||
|
||||
err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
memcpy(ah->av.eth.mac, mac, 6);
|
||||
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
|
||||
ah->av.eth.gid_index = ah_attr->grh.sgid_index;
|
||||
if (ah_attr->static_rate) {
|
||||
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
|
||||
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
|
||||
!(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
|
||||
--ah->av.eth.stat_rate;
|
||||
}
|
||||
|
||||
/*
|
||||
* HW requires multicast LID so we just choose one.
|
||||
*/
|
||||
if (is_mcast)
|
||||
ah->av.ib.dlid = cpu_to_be16(0xc000);
|
||||
|
||||
memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
|
||||
ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
|
||||
|
||||
return &ah->ibah;
|
||||
}
|
||||
|
||||
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
|
||||
{
|
||||
struct mlx4_ib_ah *ah;
|
||||
struct ib_ah *ret;
|
||||
|
||||
ah = kzalloc(sizeof *ah, GFP_ATOMIC);
|
||||
if (!ah)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
memset(&ah->av, 0, sizeof ah->av);
|
||||
if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
|
||||
if (!(ah_attr->ah_flags & IB_AH_GRH)) {
|
||||
ret = ERR_PTR(-EINVAL);
|
||||
} else {
|
||||
/*
|
||||
* TBD: need to handle the case when we get
|
||||
* called in an atomic context and there we
|
||||
* might sleep. We don't expect this
|
||||
* currently since we're working with link
|
||||
* local addresses which we can translate
|
||||
* without going to sleep.
|
||||
*/
|
||||
ret = create_iboe_ah(pd, ah_attr, ah);
|
||||
}
|
||||
|
||||
ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
|
||||
ah->av.g_slid = ah_attr->src_path_bits;
|
||||
ah->av.dlid = cpu_to_be16(ah_attr->dlid);
|
||||
if (ah_attr->static_rate) {
|
||||
ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
|
||||
while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
|
||||
!(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
|
||||
--ah->av.stat_rate;
|
||||
}
|
||||
ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
|
||||
if (ah_attr->ah_flags & IB_AH_GRH) {
|
||||
ah->av.g_slid |= 0x80;
|
||||
ah->av.gid_index = ah_attr->grh.sgid_index;
|
||||
ah->av.hop_limit = ah_attr->grh.hop_limit;
|
||||
ah->av.sl_tclass_flowlabel |=
|
||||
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
|
||||
ah_attr->grh.flow_label);
|
||||
memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
|
||||
}
|
||||
if (IS_ERR(ret))
|
||||
kfree(ah);
|
||||
|
||||
return &ah->ibah;
|
||||
return ret;
|
||||
} else
|
||||
return create_ib_ah(pd, ah_attr, ah); /* never fails */
|
||||
}
|
||||
|
||||
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
|
||||
{
|
||||
struct mlx4_ib_ah *ah = to_mah(ibah);
|
||||
enum rdma_link_layer ll;
|
||||
|
||||
memset(ah_attr, 0, sizeof *ah_attr);
|
||||
ah_attr->dlid = be16_to_cpu(ah->av.dlid);
|
||||
ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
|
||||
ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
|
||||
if (ah->av.stat_rate)
|
||||
ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
|
||||
ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
|
||||
ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
|
||||
ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
|
||||
ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
|
||||
ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
|
||||
if (ah->av.ib.stat_rate)
|
||||
ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
|
||||
ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
|
||||
|
||||
if (mlx4_ib_ah_grh_present(ah)) {
|
||||
ah_attr->ah_flags = IB_AH_GRH;
|
||||
|
||||
ah_attr->grh.traffic_class =
|
||||
be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
|
||||
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
|
||||
ah_attr->grh.flow_label =
|
||||
be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
|
||||
ah_attr->grh.hop_limit = ah->av.hop_limit;
|
||||
ah_attr->grh.sgid_index = ah->av.gid_index;
|
||||
memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
|
||||
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
|
||||
ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
|
||||
ah_attr->grh.sgid_index = ah->av.ib.gid_index;
|
||||
memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
Odkázat v novém úkolu
Zablokovat Uživatele