Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a slightly more active cycle than normal with ongoing
  core changes and quite a lot of collected driver updates.

   - Various driver fixes for bnxt_re, cxgb4, hns, mlx5, pvrdma, rxe

   - A new data transfer mode for HFI1 giving higher performance

   - Significant functional and bug fix update to the mlx5
     On-Demand-Paging MR feature

   - A chip hang reset recovery system for hns

   - Change mm->pinned_vm to an atomic64

   - Update bnxt_re to support a new 57500 chip

   - A sane netlink 'rdma link add' method for creating rxe devices and
     fixing the various unregistration race conditions in rxe's
     unregister flow

   - Allow lookup up objects by an ID over netlink

   - Various reworking of the core to driver interface:
       - drivers should not assume umem SGLs are in PAGE_SIZE chunks
       - ucontext is accessed via udata not other means
       - start to make the core code responsible for object memory
         allocation
       - drivers should convert struct device to struct ib_device via a
         helper
       - drivers have more tools to avoid use after unregister problems"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (280 commits)
  net/mlx5: ODP support for XRC transport is not enabled by default in FW
  IB/hfi1: Close race condition on user context disable and close
  RDMA/umem: Revert broken 'off by one' fix
  RDMA/umem: minor bug fix in error handling path
  RDMA/hns: Use GFP_ATOMIC in hns_roce_v2_modify_qp
  cxgb4: kfree mhp after the debug print
  IB/rdmavt: Fix concurrency panics in QP post_send and modify to error
  IB/rdmavt: Fix loopback send with invalidate ordering
  IB/iser: Fix dma_nents type definition
  IB/mlx5: Set correct write permissions for implicit ODP MR
  bnxt_re: Clean cq for kernel consumers only
  RDMA/uverbs: Don't do double free of allocated PD
  RDMA: Handle ucontext allocations by IB/core
  RDMA/core: Fix a WARN() message
  bnxt_re: fix the regression due to changes in alloc_pbl
  IB/mlx4: Increase the timeout for CM cache
  IB/core: Abort page fault handler silently during owning process exit
  IB/mlx5: Validate correct PD before prefetch MR
  IB/mlx5: Protect against prefetch of invalid MR
  RDMA/uverbs: Store PR pointer before it is overwritten
  ...
This commit is contained in:
Linus Torvalds
2019-03-09 15:53:03 -08:00
264 changed files with 16724 additions and 5028 deletions

View File

@@ -1,5 +1,6 @@
config INFINIBAND_BNXT_RE
tristate "Broadcom Netxtreme HCA support"
depends on 64BIT
depends on ETHERNET && NETDEVICES && PCI && INET && DCB
select NET_VENDOR_BROADCOM
select BNXT

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y := -Idrivers/net/ethernet/broadcom/bnxt
ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnxt
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o
bnxt_re-y := main.o ib_verbs.o \
qplib_res.o qplib_rcfw.o \

View File

@@ -124,6 +124,7 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
unsigned int version, major, minor;
struct bnxt_qplib_chip_ctx chip_ctx;
struct bnxt_en_dev *en_dev;
struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
int num_msix;

View File

@@ -48,6 +48,7 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cache.h>
#include <rdma/uverbs_ioctl.h>
#include "bnxt_ulp.h"
@@ -563,41 +564,29 @@ fail:
}
/* Protection Domains */
int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
void bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
int rc;
bnxt_re_destroy_fence_mr(pd);
if (pd->qplib_pd.id) {
rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
&rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
if (rc)
dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
}
kfree(pd);
return 0;
if (pd->qplib_pd.id)
bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
}
struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *ucontext,
struct ib_udata *udata)
int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *ucontext,
struct ib_udata *udata)
{
struct ib_device *ibdev = ibpd->device;
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_re_ucontext *ucntx = container_of(ucontext,
struct bnxt_re_ucontext,
ib_uctx);
struct bnxt_re_pd *pd;
struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd);
int rc;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
pd->rdev = rdev;
if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD");
@@ -637,13 +626,12 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
if (bnxt_re_create_fence_mr(pd))
dev_warn(rdev_to_dev(rdev),
"Failed to create Fence-MR\n");
return &pd->ib_pd;
return 0;
dbfail:
(void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
&pd->qplib_pd);
fail:
kfree(pd);
return ERR_PTR(rc);
return rc;
}
/* Address Handles */
@@ -663,17 +651,36 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
return 0;
}
static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
{
u8 nw_type;
switch (ntype) {
case RDMA_NETWORK_IPV4:
nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
break;
case RDMA_NETWORK_IPV6:
nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6;
break;
default:
nw_type = CMDQ_CREATE_AH_TYPE_V1;
break;
}
return nw_type;
}
struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
struct rdma_ah_attr *ah_attr,
u32 flags,
struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_ah *ah;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
int rc;
struct bnxt_re_dev *rdev = pd->rdev;
const struct ib_gid_attr *sgid_attr;
struct bnxt_re_ah *ah;
u8 nw_type;
int rc;
if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
@@ -700,28 +707,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
ah->qplib_ah.flow_label = grh->flow_label;
ah->qplib_ah.hop_limit = grh->hop_limit;
ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
if (udata &&
!rdma_is_multicast_addr((struct in6_addr *)
grh->dgid.raw) &&
!rdma_link_local_addr((struct in6_addr *)
grh->dgid.raw)) {
const struct ib_gid_attr *sgid_attr;
sgid_attr = grh->sgid_attr;
/* Get network header type for this GID */
nw_type = rdma_gid_attr_network_type(sgid_attr);
switch (nw_type) {
case RDMA_NETWORK_IPV4:
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
break;
case RDMA_NETWORK_IPV6:
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6;
break;
default:
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1;
break;
}
}
sgid_attr = grh->sgid_attr;
/* Get network header type for this GID */
nw_type = rdma_gid_attr_network_type(sgid_attr);
ah->qplib_ah.nw_type = bnxt_re_stack_to_dev_nw_type(nw_type);
memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah,
@@ -733,12 +723,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
/* Write AVID to shared page. */
if (udata) {
struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
struct bnxt_re_ucontext *uctx;
struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(
udata, struct bnxt_re_ucontext, ib_uctx);
unsigned long flag;
u32 *wrptr;
uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
spin_lock_irqsave(&uctx->sh_lock, flag);
wrptr = (u32 *)(uctx->shpg + BNXT_RE_AVID_OFFT);
*wrptr = ah->qplib_ah.id;
@@ -804,8 +793,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
{
struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
struct bnxt_re_dev *rdev = qp->rdev;
int rc;
unsigned int flags;
int rc;
bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
@@ -814,9 +803,12 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
return rc;
}
flags = bnxt_re_lock_cqs(qp);
bnxt_qplib_clean_qp(&qp->qplib_qp);
bnxt_re_unlock_cqs(qp, flags);
if (rdma_is_kernel_res(&qp->ib_qp.res)) {
flags = bnxt_re_lock_cqs(qp);
bnxt_qplib_clean_qp(&qp->qplib_qp);
bnxt_re_unlock_cqs(qp, flags);
}
bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
@@ -882,21 +874,23 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
struct bnxt_re_qp_req ureq;
struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
struct ib_umem *umem;
int bytes = 0;
struct ib_ucontext *context = pd->ib_pd.uobject->context;
struct bnxt_re_ucontext *cntx = container_of(context,
struct bnxt_re_ucontext,
ib_uctx);
int bytes = 0, psn_sz;
struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
udata, struct bnxt_re_ucontext, ib_uctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
/* Consider mapping PSN search memory only for RC QPs. */
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC)
bytes += (qplib_qp->sq.max_wqe * sizeof(struct sq_psn_search));
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
psn_sz = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
bytes += (qplib_qp->sq.max_wqe * psn_sz);
}
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(context, ureq.qpsva, bytes,
IB_ACCESS_LOCAL_WRITE, 1);
umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem))
return PTR_ERR(umem);
@@ -908,7 +902,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
if (!qp->qplib_qp.srq) {
bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(context, ureq.qprva, bytes,
umem = ib_umem_get(udata, ureq.qprva, bytes,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem))
goto rqfail;
@@ -1066,12 +1060,17 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
qp->qplib_qp.pd = &pd->qplib_pd;
qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type);
if (qp_init_attr->qp_type == IB_QPT_GSI &&
bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))
qp->qplib_qp.type = CMDQ_CREATE_QP_TYPE_GSI;
if (qp->qplib_qp.type == IB_QPT_MAX) {
dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported",
qp->qplib_qp.type);
rc = -EINVAL;
goto fail;
}
qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data;
qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
IB_SIGNAL_ALL_WR) ? true : false);
@@ -1132,7 +1131,8 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
if (qp_init_attr->qp_type == IB_QPT_GSI) {
if (qp_init_attr->qp_type == IB_QPT_GSI &&
!(bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))) {
/* Allocate 1 more than what's provided */
entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
@@ -1361,17 +1361,15 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
struct ib_umem *umem;
int bytes = 0;
struct ib_ucontext *context = pd->ib_pd.uobject->context;
struct bnxt_re_ucontext *cntx = container_of(context,
struct bnxt_re_ucontext,
ib_uctx);
struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
udata, struct bnxt_re_ucontext, ib_uctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(context, ureq.srqva, bytes,
IB_ACCESS_LOCAL_WRITE, 1);
umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem))
return PTR_ERR(umem);
@@ -1646,6 +1644,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
__from_ib_access_flags(qp_attr->qp_access_flags);
/* LOCAL_WRITE access must be set to allow RC receive */
qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
/* Temp: Set all params on QP as of now */
qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
}
if (qp_attr_mask & IB_QP_PKEY_INDEX) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
@@ -2093,7 +2094,8 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
static int is_ud_qp(struct bnxt_re_qp *qp)
{
return qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD;
return (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD ||
qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI);
}
static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
@@ -2397,7 +2399,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
if (ib_qp->qp_type == IB_QPT_GSI) {
if (qp->qplib_qp.type == CMDQ_CREATE_QP1_TYPE_GSI) {
rc = bnxt_re_build_qp1_send_v2(qp, wr, &wqe,
payload_sz);
if (rc)
@@ -2527,7 +2529,8 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
wqe.wr_id = wr->wr_id;
wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
if (ib_qp->qp_type == IB_QPT_GSI)
if (ib_qp->qp_type == IB_QPT_GSI &&
qp->qplib_qp.type != CMDQ_CREATE_QP_TYPE_GSI)
rc = bnxt_re_build_qp1_shadow_qp_recv(qp, wr, &wqe,
payload_sz);
if (!rc)
@@ -2622,7 +2625,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
goto fail;
}
cq->umem = ib_umem_get(context, req.cq_va,
cq->umem = ib_umem_get(udata, req.cq_va,
entries * sizeof(struct cq_base),
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(cq->umem)) {
@@ -3122,19 +3125,33 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
}
}
static void bnxt_re_process_res_ud_wc(struct ib_wc *wc,
static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
struct ib_wc *wc,
struct bnxt_qplib_cqe *cqe)
{
u8 nw_type;
wc->opcode = IB_WC_RECV;
wc->status = __rc_to_ib_wc_status(cqe->status);
if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
if (cqe->flags & CQ_RES_UD_FLAGS_IMM)
wc->wc_flags |= IB_WC_WITH_IMM;
if (cqe->flags & CQ_RES_RC_FLAGS_INV)
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) ==
(CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM))
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
/* report only on GSI QP for Thor */
if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI) {
wc->wc_flags |= IB_WC_GRH;
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= IB_WC_WITH_SMAC;
if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) {
wc->vlan_id = (cqe->cfa_meta & 0xFFF);
if (wc->vlan_id < 0x1000)
wc->wc_flags |= IB_WC_WITH_VLAN;
}
nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >>
CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT;
wc->network_hdr_type = bnxt_re_to_ib_nw_type(nw_type);
wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
}
static int send_phantom_wqe(struct bnxt_re_qp *qp)
@@ -3226,7 +3243,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
switch (cqe->opcode) {
case CQ_BASE_CQE_TYPE_REQ:
if (qp->qplib_qp.id ==
if (qp->rdev->qp1_sqp && qp->qplib_qp.id ==
qp->rdev->qp1_sqp->qplib_qp.id) {
/* Handle this completion with
* the stored completion
@@ -3261,7 +3278,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
bnxt_re_process_res_rc_wc(wc, cqe);
break;
case CQ_BASE_CQE_TYPE_RES_UD:
if (qp->qplib_qp.id ==
if (qp->rdev->qp1_sqp && qp->qplib_qp.id ==
qp->rdev->qp1_sqp->qplib_qp.id) {
/* Handle this completion with
* the stored completion
@@ -3274,7 +3291,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
break;
}
}
bnxt_re_process_res_ud_wc(wc, cqe);
bnxt_re_process_res_ud_wc(qp, wc, cqe);
break;
default:
dev_err(rdev_to_dev(cq->rdev),
@@ -3301,10 +3318,10 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq,
spin_lock_irqsave(&cq->cq_lock, flags);
/* Trigger on the very next completion */
if (ib_cqn_flags & IB_CQ_NEXT_COMP)
type = DBR_DBR_TYPE_CQ_ARMALL;
type = DBC_DBC_TYPE_CQ_ARMALL;
/* Trigger on the next solicited completion */
else if (ib_cqn_flags & IB_CQ_SOLICITED)
type = DBR_DBR_TYPE_CQ_ARMSE;
type = DBC_DBC_TYPE_CQ_ARMSE;
/* Poll to see if there are missed events */
if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
@@ -3537,19 +3554,14 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
u64 *pbl_tbl = pbl_tbl_orig;
u64 paddr;
u64 page_mask = (1ULL << page_shift) - 1;
int i, pages;
struct scatterlist *sg;
int entry;
struct sg_dma_page_iter sg_iter;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
pages = sg_dma_len(sg) >> PAGE_SHIFT;
for (i = 0; i < pages; i++) {
paddr = sg_dma_address(sg) + (i << PAGE_SHIFT);
if (pbl_tbl == pbl_tbl_orig)
*pbl_tbl++ = paddr & ~page_mask;
else if ((paddr & page_mask) == 0)
*pbl_tbl++ = paddr;
}
for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
paddr = sg_page_iter_dma_address(&sg_iter);
if (pbl_tbl == pbl_tbl_orig)
*pbl_tbl++ = paddr & ~page_mask;
else if ((paddr & page_mask) == 0)
*pbl_tbl++ = paddr;
}
return pbl_tbl - pbl_tbl_orig;
}
@@ -3589,8 +3601,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
/* The fixed portion of the rkey is the same as the lkey */
mr->ib_mr.rkey = mr->qplib_mr.rkey;
umem = ib_umem_get(ib_pd->uobject->context, start, length,
mr_access_flags, 0);
umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
if (IS_ERR(umem)) {
dev_err(rdev_to_dev(rdev), "Failed to get umem");
rc = -EFAULT;
@@ -3613,7 +3624,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
goto free_umem;
}
page_shift = umem->page_shift;
page_shift = PAGE_SHIFT;
if (!bnxt_re_page_size_ok(page_shift)) {
dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
@@ -3660,13 +3671,15 @@ free_mr:
return ERR_PTR(rc);
}
struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
{
struct ib_device *ibdev = ctx->device;
struct bnxt_re_ucontext *uctx =
container_of(ctx, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_re_uctx_resp resp;
struct bnxt_re_ucontext *uctx;
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
struct bnxt_re_uctx_resp resp;
u32 chip_met_rev_num = 0;
int rc;
dev_dbg(rdev_to_dev(rdev), "ABI version requested %d",
@@ -3675,13 +3688,9 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
BNXT_RE_ABI_VERSION);
return ERR_PTR(-EPERM);
return -EPERM;
}
uctx = kzalloc(sizeof(*uctx), GFP_KERNEL);
if (!uctx)
return ERR_PTR(-ENOMEM);
uctx->rdev = rdev;
uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
@@ -3691,37 +3700,45 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
}
spin_lock_init(&uctx->sh_lock);
resp.dev_id = rdev->en_dev->pdev->devfn; /*Temp, Use idr_alloc instead*/
resp.comp_mask = BNXT_RE_UCNTX_CMASK_HAVE_CCTX;
chip_met_rev_num = rdev->chip_ctx.chip_num;
chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_rev & 0xFF) <<
BNXT_RE_CHIP_ID0_CHIP_REV_SFT;
chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_metal & 0xFF) <<
BNXT_RE_CHIP_ID0_CHIP_MET_SFT;
resp.chip_id0 = chip_met_rev_num;
/* Future extension of chip info */
resp.chip_id1 = 0;
/*Temp, Use idr_alloc instead */
resp.dev_id = rdev->en_dev->pdev->devfn;
resp.max_qp = rdev->qplib_ctx.qpc_count;
resp.pg_size = PAGE_SIZE;
resp.cqe_sz = sizeof(struct cq_base);
resp.max_cqd = dev_attr->max_cq_wqes;
resp.rsvd = 0;
rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to copy user context");
rc = -EFAULT;
goto cfail;
}
return &uctx->ib_uctx;
return 0;
cfail:
free_page((unsigned long)uctx->shpg);
uctx->shpg = NULL;
fail:
kfree(uctx);
return ERR_PTR(rc);
return rc;
}
int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
{
struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
struct bnxt_re_ucontext,
ib_uctx);
struct bnxt_re_dev *rdev = uctx->rdev;
int rc = 0;
if (uctx->shpg)
free_page((unsigned long)uctx->shpg);
@@ -3730,17 +3747,10 @@ int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
/* Free DPI only if this is the first PD allocated by the
* application and mark the context dpi as NULL
*/
rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->qplib_res.dpi_tbl,
&uctx->dpi);
if (rc)
dev_err(rdev_to_dev(rdev), "Deallocate HW DPI failed!");
/* Don't fail, continue*/
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->qplib_res.dpi_tbl, &uctx->dpi);
uctx->dpi.dbr = NULL;
}
kfree(uctx);
return 0;
}
/* Helper function to mmap the virtual memory from user app */

View File

@@ -56,8 +56,8 @@ struct bnxt_re_fence_data {
};
struct bnxt_re_pd {
struct ib_pd ib_pd;
struct bnxt_re_dev *rdev;
struct ib_pd ib_pd;
struct bnxt_qplib_pd qplib_pd;
struct bnxt_re_fence_data fence;
};
@@ -135,8 +135,8 @@ struct bnxt_re_mw {
};
struct bnxt_re_ucontext {
struct ib_ucontext ib_uctx;
struct bnxt_re_dev *rdev;
struct ib_ucontext ib_uctx;
struct bnxt_qplib_dpi dpi;
void *shpg;
spinlock_t sh_lock; /* protect shpg */
@@ -163,10 +163,9 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
int index, union ib_gid *gid);
enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
u8 port_num);
struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata);
int bnxt_re_dealloc_pd(struct ib_pd *pd);
int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
struct ib_udata *udata);
void bnxt_re_dealloc_pd(struct ib_pd *pd);
struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
u32 flags,
@@ -216,9 +215,8 @@ int bnxt_re_dealloc_mw(struct ib_mw *mw);
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata);
int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);

View File

@@ -80,6 +80,29 @@ static DEFINE_MUTEX(bnxt_re_dev_lock);
static struct workqueue_struct *bnxt_re_wq;
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev);
static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
{
rdev->rcfw.res = NULL;
rdev->qplib_res.cctx = NULL;
}
static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev;
struct bnxt *bp;
en_dev = rdev->en_dev;
bp = netdev_priv(en_dev->net);
rdev->chip_ctx.chip_num = bp->chip_num;
/* rest members to follow eventually */
rdev->qplib_res.cctx = &rdev->chip_ctx;
rdev->rcfw.res = &rdev->qplib_res;
return 0;
}
/* SR-IOV helper functions */
static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
@@ -278,6 +301,7 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
&bnxt_re_ulp_ops, rdev);
rdev->qplib_res.pdev = rdev->en_dev->pdev;
return rc;
}
@@ -345,7 +369,8 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
fw_msg->timeout = timeout;
}
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
u16 fw_ring_id, int type)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_ring_free_input req = {0};
@@ -359,7 +384,7 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
memset(&fw_msg, 0, sizeof(fw_msg));
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
req.ring_type = type;
req.ring_id = cpu_to_le16(fw_ring_id);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
@@ -396,7 +421,7 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
/* Association of ring index with doorbell index and MSIX number */
req.logical_id = cpu_to_le16(map_index);
req.length = cpu_to_le32(ring_mask + 1);
req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
req.ring_type = type;
req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
@@ -538,7 +563,8 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
struct bnxt_re_dev *rdev =
rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
}
@@ -547,7 +573,8 @@ static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
struct bnxt_re_dev *rdev =
rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
}
@@ -610,6 +637,8 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.query_srq = bnxt_re_query_srq,
.reg_user_mr = bnxt_re_reg_user_mr,
.req_notify_cq = bnxt_re_req_notify_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
};
static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
@@ -662,7 +691,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
return ib_register_device(ibdev, "bnxt_re%d", NULL);
return ib_register_device(ibdev, "bnxt_re%d");
}
static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
@@ -686,7 +715,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
struct bnxt_re_dev *rdev;
/* Allocate bnxt_re_dev instance here */
rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev));
rdev = ib_alloc_device(bnxt_re_dev, ibdev);
if (!rdev) {
dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
ROCE_DRV_MODULE_NAME);
@@ -858,6 +887,12 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
return 0;
}
static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
{
return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
0x10000 : rdev->msix_entries[indx].db_offset;
}
static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
@@ -871,18 +906,18 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
int num_vec_enabled = 0;
int rc = 0, i;
u32 db_offt;
bnxt_qplib_init_res(&rdev->qplib_res);
for (i = 1; i < rdev->num_msix ; i++) {
db_offt = bnxt_re_get_nqdb_offset(rdev, i);
rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
i - 1, rdev->msix_entries[i].vector,
rdev->msix_entries[i].db_offset,
&bnxt_re_cqn_handler,
db_offt, &bnxt_re_cqn_handler,
&bnxt_re_srqn_handler);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to enable NQ with rc = 0x%x", rc);
@@ -894,16 +929,18 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
fail:
for (i = num_vec_enabled; i >= 0; i--)
bnxt_qplib_disable_nq(&rdev->nq[i]);
return rc;
}
static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
{
u8 type;
int i;
for (i = 0; i < rdev->num_msix - 1; i++) {
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
rdev->nq[i].res = NULL;
bnxt_qplib_free_nq(&rdev->nq[i]);
}
}
@@ -925,8 +962,11 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
{
int rc = 0, i;
int num_vec_created = 0;
dma_addr_t *pg_map;
int rc = 0, i;
int pages;
u8 type;
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
@@ -947,6 +987,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
goto dealloc_res;
for (i = 0; i < rdev->num_msix - 1; i++) {
rdev->nq[i].res = &rdev->qplib_res;
rdev->nq[i].hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
BNXT_RE_MAX_SRQC_COUNT + 2;
rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq[i]);
@@ -955,13 +996,13 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
i, rc);
goto free_nq;
}
rc = bnxt_re_net_ring_alloc
(rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count,
HWRM_RING_ALLOC_CMPL,
BNXT_QPLIB_NQE_MAX_CNT - 1,
rdev->msix_entries[i + 1].ring_idx,
&rdev->nq[i].ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
pg_map = rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr;
pages = rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count;
rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type,
BNXT_QPLIB_NQE_MAX_CNT - 1,
rdev->msix_entries[i + 1].ring_idx,
&rdev->nq[i].ring_id);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to allocate NQ fw id with rc = 0x%x",
@@ -974,7 +1015,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
return 0;
free_nq:
for (i = num_vec_created; i >= 0; i--) {
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
bnxt_qplib_free_nq(&rdev->nq[i]);
}
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
@@ -1228,6 +1270,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
{
u8 type;
int rc;
if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
@@ -1251,7 +1294,8 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type);
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
}
if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
@@ -1260,6 +1304,8 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
dev_warn(rdev_to_dev(rdev),
"Failed to free MSI-X vectors: %#x", rc);
}
bnxt_re_destroy_chip_ctx(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
rc = bnxt_re_unregister_netdev(rdev);
if (rc)
@@ -1280,9 +1326,12 @@ static void bnxt_re_worker(struct work_struct *work)
static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
{
int rc;
dma_addr_t *pg_map;
u32 db_offt, ridx;
int pages, vid;
bool locked;
u8 type;
int rc;
/* Acquire rtnl lock through out this function */
rtnl_lock();
@@ -1297,6 +1346,12 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
rc = bnxt_re_setup_chip_ctx(rdev);
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to get chip context\n");
return -EINVAL;
}
/* Check whether VF or PF */
bnxt_re_get_sriov_func_type(rdev);
@@ -1320,21 +1375,22 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
pr_err("Failed to allocate RCFW Channel: %#x\n", rc);
goto fail;
}
rc = bnxt_re_net_ring_alloc
(rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr,
rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count,
HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1,
rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx,
&rdev->rcfw.creq_ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
pg_map = rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr;
pages = rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count;
ridx = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type,
BNXT_QPLIB_CREQE_MAX_CNT - 1,
ridx, &rdev->rcfw.creq_ring_id);
if (rc) {
pr_err("Failed to allocate CREQ: %#x\n", rc);
goto free_rcfw;
}
rc = bnxt_qplib_enable_rcfw_channel
(rdev->en_dev->pdev, &rdev->rcfw,
rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
rdev->is_virtfn, &bnxt_re_aeq_handler);
db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX);
vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector;
rc = bnxt_qplib_enable_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
vid, db_offt, rdev->is_virtfn,
&bnxt_re_aeq_handler);
if (rc) {
pr_err("Failed to enable RCFW channel: %#x\n", rc);
goto free_ring;
@@ -1347,7 +1403,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
if (!rdev->is_virtfn)
bnxt_re_set_resource_limits(rdev);
rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0,
bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx));
if (rc) {
pr_err("Failed to allocate QPLIB context: %#x\n", rc);
goto disable_rcfw;
@@ -1418,7 +1475,8 @@ free_ctx:
disable_rcfw:
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
free_ring:
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type);
free_rcfw:
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
fail:

View File

@@ -44,6 +44,7 @@
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/prefetch.h>
#include <linux/if_ether.h>
#include "roce_hsi.h"
@@ -244,6 +245,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
u16 type;
int budget = nq->budget;
uintptr_t q_handle;
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
/* Service the NQ until empty */
raw_cons = hwq->cons;
@@ -290,7 +292,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high)
<< 32;
bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle,
DBR_DBR_TYPE_SRQ_ARMENA);
DBC_DBC_TYPE_SRQ_ARMENA);
if (!nq->srqn_handler(nq,
(struct bnxt_qplib_srq *)q_handle,
nqsrqe->event))
@@ -312,7 +314,9 @@ static void bnxt_qplib_service_nq(unsigned long data)
}
if (hwq->cons != raw_cons) {
hwq->cons = raw_cons;
NQ_DB_REARM(nq->bar_reg_iomem, hwq->cons, hwq->max_elements);
bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, hwq->cons,
hwq->max_elements, nq->ring_id,
gen_p5);
}
}
@@ -336,9 +340,11 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
{
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
tasklet_disable(&nq->worker);
/* Mask h/w interrupt */
NQ_DB(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
bnxt_qplib_ring_nq_db(nq->bar_reg_iomem, nq->hwq.cons,
nq->hwq.max_elements, nq->ring_id, gen_p5);
/* Sync with last running IRQ handler */
synchronize_irq(nq->vector);
if (kill)
@@ -373,6 +379,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
int msix_vector, bool need_init)
{
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
int rc;
if (nq->requested)
@@ -399,7 +406,8 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
nq->vector, nq_indx);
}
nq->requested = true;
NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, nq->hwq.cons,
nq->hwq.max_elements, nq->ring_id, gen_p5);
return rc;
}
@@ -433,7 +441,8 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
rc = -ENOMEM;
goto fail;
}
nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 4);
/* Unconditionally map 8 bytes to support 57500 series */
nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 8);
if (!nq->bar_reg_iomem) {
rc = -ENOMEM;
goto fail;
@@ -462,15 +471,17 @@ void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq)
int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
{
u8 hwq_type;
nq->pdev = pdev;
if (!nq->hwq.max_elements ||
nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT)
nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
hwq_type = bnxt_qplib_get_hwq_type(nq->res);
if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0,
&nq->hwq.max_elements,
BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0,
PAGE_SIZE, HWQ_TYPE_L2_CMPL))
PAGE_SIZE, hwq_type))
return -ENOMEM;
nq->budget = 8;
@@ -481,21 +492,19 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type)
{
struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
struct dbr_dbr db_msg = { 0 };
void __iomem *db;
u32 sw_prod = 0;
u32 sw_prod;
u64 val = 0;
/* Ring DB */
sw_prod = (arm_type == DBR_DBR_TYPE_SRQ_ARM) ? srq->threshold :
HWQ_CMP(srq_hwq->prod, srq_hwq);
db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
DBR_DBR_INDEX_MASK);
db_msg.type_xid = cpu_to_le32(((srq->id << DBR_DBR_XID_SFT) &
DBR_DBR_XID_MASK) | arm_type);
db = (arm_type == DBR_DBR_TYPE_SRQ_ARMENA) ?
srq->dbr_base : srq->dpi->dbr;
wmb(); /* barrier before db ring */
__iowrite64_copy(db, &db_msg, sizeof(db_msg) / sizeof(u64));
sw_prod = (arm_type == DBC_DBC_TYPE_SRQ_ARM) ?
srq->threshold : HWQ_CMP(srq_hwq->prod, srq_hwq);
db = (arm_type == DBC_DBC_TYPE_SRQ_ARMENA) ? srq->dbr_base :
srq->dpi->dbr;
val = ((srq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type;
val <<= 32;
val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
writeq(val, db);
}
int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
@@ -590,7 +599,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
srq->id = le32_to_cpu(resp.xid);
srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
if (srq->threshold)
bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARMENA);
bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARMENA);
srq->arm_req = false;
return 0;
@@ -614,7 +623,7 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
srq_hwq->max_elements - sw_cons + sw_prod;
if (count > srq->threshold) {
srq->arm_req = false;
bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM);
bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM);
} else {
/* Deferred arming */
srq->arm_req = true;
@@ -702,10 +711,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
srq_hwq->max_elements - sw_cons + sw_prod;
spin_unlock(&srq_hwq->lock);
/* Ring DB */
bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ);
bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ);
if (srq->arm_req == true && count > srq->threshold) {
srq->arm_req = false;
bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM);
bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM);
}
done:
return rc;
@@ -853,18 +862,19 @@ exit:
int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
struct cmdq_create_qp req;
struct creq_create_qp_resp resp;
struct bnxt_qplib_pbl *pbl;
struct sq_psn_search **psn_search_ptr;
unsigned long int psn_search, poff = 0;
struct sq_psn_search **psn_search_ptr;
struct bnxt_qplib_q *sq = &qp->sq;
struct bnxt_qplib_q *rq = &qp->rq;
int i, rc, req_size, psn_sz = 0;
struct sq_send **hw_sq_send_ptr;
struct creq_create_qp_resp resp;
struct bnxt_qplib_hwq *xrrq;
int i, rc, req_size, psn_sz;
u16 cmd_flags = 0, max_ssge;
u32 sw_prod, qp_flags = 0;
struct cmdq_create_qp req;
struct bnxt_qplib_pbl *pbl;
u32 qp_flags = 0;
u16 max_rsge;
RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
@@ -874,8 +884,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
req.qp_handle = cpu_to_le64(qp->qp_handle);
/* SQ */
psn_sz = (qp->type == CMDQ_CREATE_QP_TYPE_RC) ?
sizeof(struct sq_psn_search) : 0;
if (qp->type == CMDQ_CREATE_QP_TYPE_RC) {
psn_sz = bnxt_qplib_is_chip_gen_p5(res->cctx) ?
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
}
sq->hwq.max_elements = sq->max_wqe;
rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist,
sq->nmap, &sq->hwq.max_elements,
@@ -905,10 +918,16 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
poff = (psn_search & ~PAGE_MASK) /
BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE;
}
for (i = 0; i < sq->hwq.max_elements; i++)
for (i = 0; i < sq->hwq.max_elements; i++) {
sq->swq[i].psn_search =
&psn_search_ptr[get_psne_pg(i + poff)]
[get_psne_idx(i + poff)];
/*psns_ext will be used only for P5 chips. */
sq->swq[i].psn_ext =
(struct sq_psn_search_ext *)
&psn_search_ptr[get_psne_pg(i + poff)]
[get_psne_idx(i + poff)];
}
}
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
@@ -929,14 +948,6 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G :
CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K);
/* initialize all SQ WQEs to LOCAL_INVALID (sq prep for hw fetch) */
hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
for (sw_prod = 0; sw_prod < sq->hwq.max_elements; sw_prod++) {
hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
[get_sqe_idx(sw_prod)];
hw_sq_send_hdr->wqe_type = SQ_BASE_WQE_TYPE_LOCAL_INVALID;
}
if (qp->scq)
req.scq_cid = cpu_to_le32(qp->scq->id);
@@ -1007,8 +1018,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
req.sq_fwo_sq_sge = cpu_to_le16(
((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK)
<< CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge;
req.rq_fwo_rq_sge = cpu_to_le16(
((rq->max_sge & CMDQ_CREATE_QP_RQ_SGE_MASK)
((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK)
<< CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
/* ORRQ and IRRQ */
if (psn_sz) {
@@ -1053,6 +1065,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp->id = le32_to_cpu(resp.xid);
qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
qp->cctx = res->cctx;
INIT_LIST_HEAD(&qp->sq_flush);
INIT_LIST_HEAD(&qp->rq_flush);
rcfw->qp_tbl[qp->id].qp_id = qp->id;
@@ -1494,19 +1507,16 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *sq = &qp->sq;
struct dbr_dbr db_msg = { 0 };
u32 sw_prod;
u64 val = 0;
val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
DBC_DBC_TYPE_SQ);
val <<= 32;
sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
DBR_DBR_INDEX_MASK);
db_msg.type_xid =
cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
DBR_DBR_TYPE_SQ);
val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
/* Flush all the WQE writes to HW */
wmb();
__iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
writeq(val, qp->dpi->dbr);
}
int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
@@ -1617,7 +1627,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
((offsetof(typeof(*sqe), data) + 15) >> 4);
sqe->inv_key_or_imm_data = cpu_to_le32(
wqe->send.inv_key);
if (qp->type == CMDQ_CREATE_QP_TYPE_UD) {
if (qp->type == CMDQ_CREATE_QP_TYPE_UD ||
qp->type == CMDQ_CREATE_QP_TYPE_GSI) {
sqe->q_key = cpu_to_le32(wqe->send.q_key);
sqe->dst_qp = cpu_to_le32(
wqe->send.dst_qp & SQ_SEND_DST_QP_MASK);
@@ -1741,14 +1752,26 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
swq->next_psn = sq->psn & BTH_PSN_MASK;
if (swq->psn_search) {
swq->psn_search->opcode_start_psn = cpu_to_le32(
((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
SQ_PSN_SEARCH_START_PSN_MASK) |
((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
SQ_PSN_SEARCH_OPCODE_MASK));
swq->psn_search->flags_next_psn = cpu_to_le32(
((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
SQ_PSN_SEARCH_NEXT_PSN_MASK));
u32 opcd_spsn;
u32 flg_npsn;
opcd_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
SQ_PSN_SEARCH_START_PSN_MASK);
opcd_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
SQ_PSN_SEARCH_OPCODE_MASK);
flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
SQ_PSN_SEARCH_NEXT_PSN_MASK);
if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) {
swq->psn_ext->opcode_start_psn =
cpu_to_le32(opcd_spsn);
swq->psn_ext->flags_next_psn =
cpu_to_le32(flg_npsn);
} else {
swq->psn_search->opcode_start_psn =
cpu_to_le32(opcd_spsn);
swq->psn_search->flags_next_psn =
cpu_to_le32(flg_npsn);
}
}
queue_err:
if (sch_handler) {
@@ -1785,19 +1808,16 @@ done:
void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *rq = &qp->rq;
struct dbr_dbr db_msg = { 0 };
u32 sw_prod;
u64 val = 0;
val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
DBC_DBC_TYPE_RQ);
val <<= 32;
sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
DBR_DBR_INDEX_MASK);
db_msg.type_xid =
cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
DBR_DBR_TYPE_RQ);
val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
/* Flush the writes to HW Rx WQE before the ringing Rx DB */
wmb();
__iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
writeq(val, qp->dpi->dbr);
}
int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
@@ -1881,32 +1901,28 @@ done:
/* Spinlock must be held */
static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq)
{
struct dbr_dbr db_msg = { 0 };
u64 val = 0;
db_msg.type_xid =
cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
DBR_DBR_TYPE_CQ_ARMENA);
val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
DBC_DBC_TYPE_CQ_ARMENA;
val <<= 32;
/* Flush memory writes before enabling the CQ */
wmb();
__iowrite64_copy(cq->dbr_base, &db_msg, sizeof(db_msg) / sizeof(u64));
writeq(val, cq->dbr_base);
}
static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
{
struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
struct dbr_dbr db_msg = { 0 };
u32 sw_cons;
u64 val = 0;
/* Ring DB */
val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type;
val <<= 32;
sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq);
db_msg.index = cpu_to_le32((sw_cons << DBR_DBR_INDEX_SFT) &
DBR_DBR_INDEX_MASK);
db_msg.type_xid =
cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
arm_type);
val |= (sw_cons << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
/* flush memory writes before arming the CQ */
wmb();
__iowrite64_copy(cq->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
writeq(val, cq->dpi->dbr);
}
int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
@@ -2053,6 +2069,7 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
opcode = CQ_BASE_CQE_TYPE_RES_RC;
break;
case CMDQ_CREATE_QP_TYPE_UD:
case CMDQ_CREATE_QP_TYPE_GSI:
opcode = CQ_BASE_CQE_TYPE_RES_UD;
break;
}
@@ -2125,7 +2142,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
sq->send_phantom = true;
/* TODO: Only ARM if the previous SQE is ARMALL */
bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL);
bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ_ARMALL);
rc = -EAGAIN;
goto out;
@@ -2410,12 +2427,14 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
cqe->length = le32_to_cpu(hwcqe->length);
cqe->length = (u32)le16_to_cpu(hwcqe->length);
cqe->cfa_meta = le16_to_cpu(hwcqe->cfa_metadata);
cqe->invrkey = le32_to_cpu(hwcqe->imm_data);
cqe->flags = le16_to_cpu(hwcqe->flags);
cqe->status = hwcqe->status;
cqe->qp_handle = (u64)(unsigned long)qp;
memcpy(cqe->smac, hwcqe->src_mac, 6);
/*FIXME: Endianness fix needed for smace */
memcpy(cqe->smac, hwcqe->src_mac, ETH_ALEN);
wr_id_idx = le32_to_cpu(hwcqe->src_qp_high_srq_or_rq_wr_id)
& CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK;
cqe->src_qp = le16_to_cpu(hwcqe->src_qp_low) |
@@ -2794,7 +2813,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
}
if (cq->hwq.cons != raw_cons) {
cq->hwq.cons = raw_cons;
bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ);
}
exit:
return num_cqes - budget;

View File

@@ -106,6 +106,7 @@ struct bnxt_qplib_swq {
u32 start_psn;
u32 next_psn;
struct sq_psn_search *psn_search;
struct sq_psn_search_ext *psn_ext;
};
struct bnxt_qplib_swqe {
@@ -254,6 +255,7 @@ struct bnxt_qplib_q {
struct bnxt_qplib_qp {
struct bnxt_qplib_pd *pd;
struct bnxt_qplib_dpi *dpi;
struct bnxt_qplib_chip_ctx *cctx;
u64 qp_handle;
#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF
u32 id;
@@ -347,6 +349,7 @@ struct bnxt_qplib_cqe {
u8 type;
u8 opcode;
u32 length;
u16 cfa_meta;
u64 wr_id;
union {
__be32 immdata;
@@ -432,13 +435,47 @@ struct bnxt_qplib_cq {
#define NQ_DB_CP_FLAGS (NQ_DB_KEY_CP | \
NQ_DB_IDX_VALID | \
NQ_DB_IRQ_DIS)
#define NQ_DB_REARM(db, raw_cons, cp_bit) \
writel(NQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
#define NQ_DB(db, raw_cons, cp_bit) \
writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
static inline void bnxt_qplib_ring_nq_db64(void __iomem *db, u32 index,
u32 xid, bool arm)
{
u64 val;
val = xid & DBC_DBC_XID_MASK;
val |= DBC_DBC_PATH_ROCE;
val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
val <<= 32;
val |= index & DBC_DBC_INDEX_MASK;
writeq(val, db);
}
static inline void bnxt_qplib_ring_nq_db_rearm(void __iomem *db, u32 raw_cons,
u32 max_elements, u32 xid,
bool gen_p5)
{
u32 index = raw_cons & (max_elements - 1);
if (gen_p5)
bnxt_qplib_ring_nq_db64(db, index, xid, true);
else
writel(NQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), db);
}
static inline void bnxt_qplib_ring_nq_db(void __iomem *db, u32 raw_cons,
u32 max_elements, u32 xid,
bool gen_p5)
{
u32 index = raw_cons & (max_elements - 1);
if (gen_p5)
bnxt_qplib_ring_nq_db64(db, index, xid, false);
else
writel(NQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), db);
}
struct bnxt_qplib_nq {
struct pci_dev *pdev;
struct bnxt_qplib_res *res;
int vector;
cpumask_t mask;
@@ -448,7 +485,7 @@ struct bnxt_qplib_nq {
struct bnxt_qplib_hwq hwq;
u16 bar_reg;
u16 bar_reg_off;
u32 bar_reg_off;
u16 ring_id;
void __iomem *bar_reg_iomem;

View File

@@ -359,11 +359,12 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
static void bnxt_qplib_service_creq(unsigned long data)
{
struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
struct bnxt_qplib_hwq *creq = &rcfw->creq;
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
struct creq_base *creqe, **creq_ptr;
u32 sw_cons, raw_cons;
unsigned long flags;
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
/* Service the CREQ until budget is over */
spin_lock_irqsave(&creq->lock, flags);
@@ -407,8 +408,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
if (creq->cons != raw_cons) {
creq->cons = raw_cons;
CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
creq->max_elements);
bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem,
raw_cons, creq->max_elements,
rcfw->creq_ring_id, gen_p5);
}
spin_unlock_irqrestore(&creq->lock, flags);
}
@@ -480,11 +482,13 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
RCFW_DBR_BASE_PAGE_SHIFT);
/*
* VFs need not setup the HW context area, PF
* Gen P5 devices doesn't require this allocation
* as the L2 driver does the same for RoCE also.
* Also, VFs need not setup the HW context area, PF
* shall setup this area for VF. Skipping the
* HW programming
*/
if (is_virtfn)
if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
goto skip_ctx_setup;
level = ctx->qpc_tbl.level;
@@ -560,12 +564,15 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_ctx *ctx,
int qp_tbl_sz)
{
u8 hwq_type;
rcfw->pdev = pdev;
rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT;
hwq_type = bnxt_qplib_get_hwq_type(rcfw->res);
if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0,
&rcfw->creq.max_elements,
BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_L2_CMPL)) {
BNXT_QPLIB_CREQE_UNITS,
0, PAGE_SIZE, hwq_type)) {
dev_err(&rcfw->pdev->dev,
"HW channel CREQ allocation failed\n");
goto fail;
@@ -607,10 +614,13 @@ fail:
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
{
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
tasklet_disable(&rcfw->worker);
/* Mask h/w interrupts */
CREQ_DB(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
rcfw->creq.max_elements);
bnxt_qplib_ring_creq_db(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
rcfw->creq.max_elements, rcfw->creq_ring_id,
gen_p5);
/* Sync with last running IRQ-handler */
synchronize_irq(rcfw->vector);
if (kill)
@@ -647,6 +657,7 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
bool need_init)
{
bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
int rc;
if (rcfw->requested)
@@ -663,8 +674,9 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
if (rc)
return rc;
rcfw->requested = true;
CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
rcfw->creq.max_elements);
bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem,
rcfw->creq.cons, rcfw->creq.max_elements,
rcfw->creq_ring_id, gen_p5);
return 0;
}
@@ -684,8 +696,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
/* General */
rcfw->seq_num = 0;
set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth *
sizeof(unsigned long));
bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long);
rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
if (!rcfw->cmdq_bitmap)
return -ENOMEM;
@@ -718,8 +729,9 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
dev_err(&rcfw->pdev->dev,
"CREQ BAR region %d resc start is 0!\n",
rcfw->creq_bar_reg);
/* Unconditionally map 8 bytes to support 57500 series */
rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
4);
8);
if (!rcfw->creq_bar_reg_iomem) {
dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
rcfw->creq_bar_reg);

View File

@@ -157,10 +157,46 @@ static inline u32 get_creq_idx(u32 val)
#define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \
CREQ_DB_IDX_VALID | \
CREQ_DB_IRQ_DIS)
#define CREQ_DB_REARM(db, raw_cons, cp_bit) \
writel(CREQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
#define CREQ_DB(db, raw_cons, cp_bit) \
writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
static inline void bnxt_qplib_ring_creq_db64(void __iomem *db, u32 index,
u32 xid, bool arm)
{
u64 val = 0;
val = xid & DBC_DBC_XID_MASK;
val |= DBC_DBC_PATH_ROCE;
val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
val <<= 32;
val |= index & DBC_DBC_INDEX_MASK;
writeq(val, db);
}
static inline void bnxt_qplib_ring_creq_db_rearm(void __iomem *db, u32 raw_cons,
u32 max_elements, u32 xid,
bool gen_p5)
{
u32 index = raw_cons & (max_elements - 1);
if (gen_p5)
bnxt_qplib_ring_creq_db64(db, index, xid, true);
else
writel(CREQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK),
db);
}
static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons,
u32 max_elements, u32 xid,
bool gen_p5)
{
u32 index = raw_cons & (max_elements - 1);
if (gen_p5)
bnxt_qplib_ring_creq_db64(db, index, xid, true);
else
writel(CREQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK),
db);
}
#define CREQ_ENTRY_POLL_BUDGET 0x100
@@ -187,6 +223,7 @@ struct bnxt_qplib_qp_node {
/* RCFW Communication Channels */
struct bnxt_qplib_rcfw {
struct pci_dev *pdev;
struct bnxt_qplib_res *res;
int vector;
struct tasklet_struct worker;
bool requested;

View File

@@ -85,7 +85,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
struct scatterlist *sghead, u32 pages, u32 pg_size)
{
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
bool is_umem = false;
int i;
@@ -116,13 +116,11 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
} else {
i = 0;
is_umem = true;
for_each_sg(sghead, sg, pages, i) {
pbl->pg_map_arr[i] = sg_dma_address(sg);
pbl->pg_arr[i] = sg_virt(sg);
if (!pbl->pg_arr[i])
goto fail;
for_each_sg_dma_page (sghead, &sg_iter, pages, 0) {
pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter);
pbl->pg_arr[i] = NULL;
pbl->pg_count++;
i++;
}
}
@@ -330,13 +328,13 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev,
*/
int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
struct bnxt_qplib_ctx *ctx,
bool virt_fn)
bool virt_fn, bool is_p5)
{
int i, j, k, rc = 0;
int fnz_idx = -1;
__le64 **pbl_ptr;
if (virt_fn)
if (virt_fn || is_p5)
goto stats_alloc;
/* QPC Tables */
@@ -762,7 +760,11 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
{
memset(stats, 0, sizeof(*stats));
stats->fw_id = -1;
stats->size = sizeof(struct ctx_hw_stats);
/* 128 byte aligned context memory is required only for 57500.
* However making this unconditional, it does not harm previous
* generation.
*/
stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128);
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL);
if (!stats->dma) {

View File

@@ -180,12 +180,20 @@ struct bnxt_qplib_ctx {
u64 hwrm_intf_ver;
};
struct bnxt_qplib_chip_ctx {
u16 chip_num;
u8 chip_rev;
u8 chip_metal;
};
#define CHIP_NUM_57500 0x1750
struct bnxt_qplib_res {
struct pci_dev *pdev;
struct bnxt_qplib_chip_ctx *cctx;
struct net_device *netdev;
struct bnxt_qplib_rcfw *rcfw;
struct bnxt_qplib_pd_tbl pd_tbl;
struct bnxt_qplib_sgid_tbl sgid_tbl;
struct bnxt_qplib_pkey_tbl pkey_tbl;
@@ -193,6 +201,24 @@ struct bnxt_qplib_res {
bool prio;
};
static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
{
return (cctx->chip_num == CHIP_NUM_57500);
}
static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res)
{
return bnxt_qplib_is_chip_gen_p5(res->cctx) ?
HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL;
}
static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx)
{
return bnxt_qplib_is_chip_gen_p5(cctx) ?
RING_ALLOC_REQ_RING_TYPE_NQ :
RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL;
}
#define to_bnxt_qplib(ptr, type, member) \
container_of(ptr, type, member)
@@ -226,5 +252,5 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev,
struct bnxt_qplib_ctx *ctx);
int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
struct bnxt_qplib_ctx *ctx,
bool virt_fn);
bool virt_fn, bool is_p5);
#endif /* __BNXT_QPLIB_RES_H__ */

View File

@@ -119,7 +119,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
* reporting the max number
*/
attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
attr->max_qp_sges = sb->max_sge;
attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ?
6 : sb->max_sge;
attr->max_cq = le32_to_cpu(sb->max_cq);
attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
attr->max_cq_sges = attr->max_qp_sges;

View File

@@ -49,11 +49,11 @@ struct cmpl_doorbell {
#define CMPL_DOORBELL_IDX_SFT 0
#define CMPL_DOORBELL_RESERVED_MASK 0x3000000UL
#define CMPL_DOORBELL_RESERVED_SFT 24
#define CMPL_DOORBELL_IDX_VALID 0x4000000UL
#define CMPL_DOORBELL_IDX_VALID 0x4000000UL
#define CMPL_DOORBELL_MASK 0x8000000UL
#define CMPL_DOORBELL_KEY_MASK 0xf0000000UL
#define CMPL_DOORBELL_KEY_SFT 28
#define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
#define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
};
/* Status Door Bell Format (4 bytes) */
@@ -71,46 +71,56 @@ struct status_doorbell {
/* RoCE Host Structures */
/* Doorbell Structures */
/* 64b Doorbell Format (8 bytes) */
struct dbr_dbr {
__le32 index;
#define DBR_DBR_INDEX_MASK 0xfffffUL
#define DBR_DBR_INDEX_SFT 0
#define DBR_DBR_RESERVED12_MASK 0xfff00000UL
#define DBR_DBR_RESERVED12_SFT 20
__le32 type_xid;
#define DBR_DBR_XID_MASK 0xfffffUL
#define DBR_DBR_XID_SFT 0
#define DBR_DBR_RESERVED8_MASK 0xff00000UL
#define DBR_DBR_RESERVED8_SFT 20
#define DBR_DBR_TYPE_MASK 0xf0000000UL
#define DBR_DBR_TYPE_SFT 28
#define DBR_DBR_TYPE_SQ (0x0UL << 28)
#define DBR_DBR_TYPE_RQ (0x1UL << 28)
#define DBR_DBR_TYPE_SRQ (0x2UL << 28)
#define DBR_DBR_TYPE_SRQ_ARM (0x3UL << 28)
#define DBR_DBR_TYPE_CQ (0x4UL << 28)
#define DBR_DBR_TYPE_CQ_ARMSE (0x5UL << 28)
#define DBR_DBR_TYPE_CQ_ARMALL (0x6UL << 28)
#define DBR_DBR_TYPE_CQ_ARMENA (0x7UL << 28)
#define DBR_DBR_TYPE_SRQ_ARMENA (0x8UL << 28)
#define DBR_DBR_TYPE_CQ_CUTOFF_ACK (0x9UL << 28)
#define DBR_DBR_TYPE_NULL (0xfUL << 28)
/* dbc_dbc (size:64b/8B) */
struct dbc_dbc {
__le32 index;
#define DBC_DBC_INDEX_MASK 0xffffffUL
#define DBC_DBC_INDEX_SFT 0
__le32 type_path_xid;
#define DBC_DBC_XID_MASK 0xfffffUL
#define DBC_DBC_XID_SFT 0
#define DBC_DBC_PATH_MASK 0x3000000UL
#define DBC_DBC_PATH_SFT 24
#define DBC_DBC_PATH_ROCE (0x0UL << 24)
#define DBC_DBC_PATH_L2 (0x1UL << 24)
#define DBC_DBC_PATH_ENGINE (0x2UL << 24)
#define DBC_DBC_PATH_LAST DBC_DBC_PATH_ENGINE
#define DBC_DBC_DEBUG_TRACE 0x8000000UL
#define DBC_DBC_TYPE_MASK 0xf0000000UL
#define DBC_DBC_TYPE_SFT 28
#define DBC_DBC_TYPE_SQ (0x0UL << 28)
#define DBC_DBC_TYPE_RQ (0x1UL << 28)
#define DBC_DBC_TYPE_SRQ (0x2UL << 28)
#define DBC_DBC_TYPE_SRQ_ARM (0x3UL << 28)
#define DBC_DBC_TYPE_CQ (0x4UL << 28)
#define DBC_DBC_TYPE_CQ_ARMSE (0x5UL << 28)
#define DBC_DBC_TYPE_CQ_ARMALL (0x6UL << 28)
#define DBC_DBC_TYPE_CQ_ARMENA (0x7UL << 28)
#define DBC_DBC_TYPE_SRQ_ARMENA (0x8UL << 28)
#define DBC_DBC_TYPE_CQ_CUTOFF_ACK (0x9UL << 28)
#define DBC_DBC_TYPE_NQ (0xaUL << 28)
#define DBC_DBC_TYPE_NQ_ARM (0xbUL << 28)
#define DBC_DBC_TYPE_NULL (0xfUL << 28)
#define DBC_DBC_TYPE_LAST DBC_DBC_TYPE_NULL
};
/* 32b Doorbell Format (4 bytes) */
struct dbr_dbr32 {
__le32 type_abs_incr_xid;
#define DBR_DBR32_XID_MASK 0xfffffUL
#define DBR_DBR32_XID_SFT 0
#define DBR_DBR32_RESERVED4_MASK 0xf00000UL
#define DBR_DBR32_RESERVED4_SFT 20
#define DBR_DBR32_INCR_MASK 0xf000000UL
#define DBR_DBR32_INCR_SFT 24
#define DBR_DBR32_ABS 0x10000000UL
#define DBR_DBR32_TYPE_MASK 0xe0000000UL
#define DBR_DBR32_TYPE_SFT 29
#define DBR_DBR32_TYPE_SQ (0x0UL << 29)
/* dbc_dbc32 (size:32b/4B) */
struct dbc_dbc32 {
__le32 type_abs_incr_xid;
#define DBC_DBC32_XID_MASK 0xfffffUL
#define DBC_DBC32_XID_SFT 0
#define DBC_DBC32_PATH_MASK 0xc00000UL
#define DBC_DBC32_PATH_SFT 22
#define DBC_DBC32_PATH_ROCE (0x0UL << 22)
#define DBC_DBC32_PATH_L2 (0x1UL << 22)
#define DBC_DBC32_PATH_LAST DBC_DBC32_PATH_L2
#define DBC_DBC32_INCR_MASK 0xf000000UL
#define DBC_DBC32_INCR_SFT 24
#define DBC_DBC32_ABS 0x10000000UL
#define DBC_DBC32_TYPE_MASK 0xe0000000UL
#define DBC_DBC32_TYPE_SFT 29
#define DBC_DBC32_TYPE_SQ (0x0UL << 29)
#define DBC_DBC32_TYPE_LAST DBC_DBC32_TYPE_SQ
};
/* SQ WQE Structures */
@@ -149,7 +159,24 @@ struct sq_psn_search {
#define SQ_PSN_SEARCH_NEXT_PSN_MASK 0xffffffUL
#define SQ_PSN_SEARCH_NEXT_PSN_SFT 0
#define SQ_PSN_SEARCH_FLAGS_MASK 0xff000000UL
#define SQ_PSN_SEARCH_FLAGS_SFT 24
#define SQ_PSN_SEARCH_FLAGS_SFT 24
};
/* sq_psn_search_ext (size:128b/16B) */
struct sq_psn_search_ext {
__le32 opcode_start_psn;
#define SQ_PSN_SEARCH_EXT_START_PSN_MASK 0xffffffUL
#define SQ_PSN_SEARCH_EXT_START_PSN_SFT 0
#define SQ_PSN_SEARCH_EXT_OPCODE_MASK 0xff000000UL
#define SQ_PSN_SEARCH_EXT_OPCODE_SFT 24
__le32 flags_next_psn;
#define SQ_PSN_SEARCH_EXT_NEXT_PSN_MASK 0xffffffUL
#define SQ_PSN_SEARCH_EXT_NEXT_PSN_SFT 0
#define SQ_PSN_SEARCH_EXT_FLAGS_MASK 0xff000000UL
#define SQ_PSN_SEARCH_EXT_FLAGS_SFT 24
__le16 start_slot_idx;
__le16 reserved16;
__le32 reserved32;
};
/* Send SQ WQE (40 bytes) */
@@ -505,22 +532,24 @@ struct cq_res_rc {
/* Responder UD CQE (32 bytes) */
struct cq_res_ud {
__le32 length;
__le16 length;
#define CQ_RES_UD_LENGTH_MASK 0x3fffUL
#define CQ_RES_UD_LENGTH_SFT 0
#define CQ_RES_UD_RESERVED18_MASK 0xffffc000UL
#define CQ_RES_UD_RESERVED18_SFT 14
__le16 cfa_metadata;
#define CQ_RES_UD_CFA_METADATA_VID_MASK 0xfffUL
#define CQ_RES_UD_CFA_METADATA_VID_SFT 0
#define CQ_RES_UD_CFA_METADATA_DE 0x1000UL
#define CQ_RES_UD_CFA_METADATA_PRI_MASK 0xe000UL
#define CQ_RES_UD_CFA_METADATA_PRI_SFT 13
__le32 imm_data;
__le64 qp_handle;
__le16 src_mac[3];
__le16 src_qp_low;
u8 cqe_type_toggle;
#define CQ_RES_UD_TOGGLE 0x1UL
#define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
#define CQ_RES_UD_CQE_TYPE_SFT 1
#define CQ_RES_UD_TOGGLE 0x1UL
#define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
#define CQ_RES_UD_CQE_TYPE_SFT 1
#define CQ_RES_UD_CQE_TYPE_RES_UD (0x2UL << 1)
#define CQ_RES_UD_RESERVED3_MASK 0xe0UL
#define CQ_RES_UD_RESERVED3_SFT 5
u8 status;
#define CQ_RES_UD_STATUS_OK 0x0UL
#define CQ_RES_UD_STATUS_LOCAL_ACCESS_ERROR 0x1UL
@@ -536,18 +565,30 @@ struct cq_res_ud {
#define CQ_RES_UD_FLAGS_SRQ_SRQ (0x1UL << 0)
#define CQ_RES_UD_FLAGS_SRQ_LAST CQ_RES_UD_FLAGS_SRQ_SRQ
#define CQ_RES_UD_FLAGS_IMM 0x2UL
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0xcUL
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 2
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 2)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 2)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 2)
#define CQ_RES_UD_FLAGS_UNUSED_MASK 0xcUL
#define CQ_RES_UD_FLAGS_UNUSED_SFT 2
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0x30UL
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 4
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
#define CQ_RES_UD_FLAGS_ROCE_IP_VER_LAST \
CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6
#define CQ_RES_UD_FLAGS_META_FORMAT_MASK 0x3c0UL
#define CQ_RES_UD_FLAGS_META_FORMAT_SFT 6
#define CQ_RES_UD_FLAGS_META_FORMAT_NONE (0x0UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_VLAN (0x1UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
#define CQ_RES_UD_FLAGS_META_FORMAT_LAST \
CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET
#define CQ_RES_UD_FLAGS_EXT_META_FORMAT_MASK 0xc00UL
#define CQ_RES_UD_FLAGS_EXT_META_FORMAT_SFT 10
__le32 src_qp_high_srq_or_rq_wr_id;
#define CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
#define CQ_RES_UD_SRQ_OR_RQ_WR_ID_SFT 0
#define CQ_RES_UD_RESERVED4_MASK 0xf00000UL
#define CQ_RES_UD_RESERVED4_SFT 20
#define CQ_RES_UD_SRC_QP_HIGH_MASK 0xff000000UL
#define CQ_RES_UD_SRC_QP_HIGH_SFT 24
};
@@ -983,6 +1024,7 @@ struct cmdq_create_qp {
#define CMDQ_CREATE_QP_TYPE_RC 0x2UL
#define CMDQ_CREATE_QP_TYPE_UD 0x4UL
#define CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE 0x6UL
#define CMDQ_CREATE_QP_TYPE_GSI 0x7UL
u8 sq_pg_size_sq_lvl;
#define CMDQ_CREATE_QP_SQ_LVL_MASK 0xfUL
#define CMDQ_CREATE_QP_SQ_LVL_SFT 0
@@ -2719,6 +2761,8 @@ struct creq_query_func_resp_sb {
__le16 max_srq;
__le32 max_gid;
__le32 tqm_alloc_reqs[12];
__le32 max_dpi;
__le32 reserved_32;
};
/* Set resources command response (16 bytes) */

View File

@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3
ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3
obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o

View File

@@ -146,7 +146,7 @@ static void open_rnic_dev(struct t3cdev *tdev)
pr_debug("%s t3cdev %p\n", __func__, tdev);
pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
rnicp = ib_alloc_device(iwch_dev, ibdev);
if (!rnicp) {
pr_err("Cannot allocate ib device\n");
return;

View File

@@ -53,6 +53,7 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/uverbs_ioctl.h>
#include "cxio_hal.h"
#include "iwch.h"
@@ -61,7 +62,7 @@
#include <rdma/cxgb3-abi.h>
#include "common.h"
static int iwch_dealloc_ucontext(struct ib_ucontext *context)
static void iwch_dealloc_ucontext(struct ib_ucontext *context)
{
struct iwch_dev *rhp = to_iwch_dev(context->device);
struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
@@ -71,24 +72,20 @@ static int iwch_dealloc_ucontext(struct ib_ucontext *context)
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
kfree(ucontext);
return 0;
}
static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
static int iwch_alloc_ucontext(struct ib_ucontext *ucontext,
struct ib_udata *udata)
{
struct iwch_ucontext *context;
struct ib_device *ibdev = ucontext->device;
struct iwch_ucontext *context = to_iwch_ucontext(ucontext);
struct iwch_dev *rhp = to_iwch_dev(ibdev);
pr_debug("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
cxio_init_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
return &context->ibucontext;
return 0;
}
static int iwch_destroy_cq(struct ib_cq *ib_cq)
@@ -370,7 +367,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return ret;
}
static int iwch_deallocate_pd(struct ib_pd *pd)
static void iwch_deallocate_pd(struct ib_pd *pd)
{
struct iwch_dev *rhp;
struct iwch_pd *php;
@@ -379,15 +376,13 @@ static int iwch_deallocate_pd(struct ib_pd *pd)
rhp = php->rhp;
pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
kfree(php);
return 0;
}
static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct iwch_pd *php;
struct iwch_pd *php = to_iwch_pd(pd);
struct ib_device *ibdev = pd->device;
u32 pdid;
struct iwch_dev *rhp;
@@ -395,12 +390,8 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
rhp = (struct iwch_dev *) ibdev;
pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
if (!pdid)
return ERR_PTR(-EINVAL);
php = kzalloc(sizeof(*php), GFP_KERNEL);
if (!php) {
cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
return ERR_PTR(-ENOMEM);
}
return -EINVAL;
php->pdid = pdid;
php->rhp = rhp;
if (context) {
@@ -408,11 +399,11 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
iwch_deallocate_pd(&php->ibpd);
return ERR_PTR(-EFAULT);
return -EFAULT;
}
}
pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
return &php->ibpd;
return 0;
}
static int iwch_dereg_mr(struct ib_mr *ib_mr)
@@ -522,14 +513,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
__be64 *pages;
int shift, n, len;
int i, k, entry;
int shift, n, i;
int err = 0;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
@@ -540,14 +530,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
mhp->umem = ib_umem_get(udata, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
kfree(mhp);
return ERR_PTR(err);
}
shift = mhp->umem->page_shift;
shift = PAGE_SHIFT;
n = mhp->umem->nmap;
@@ -563,19 +553,15 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
(k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
if (i)
@@ -836,7 +822,8 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
* Kernel users need more wq space for fastreg WRs which can take
* 2 WR fragments.
*/
ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL;
ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
ibucontext);
if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
wqsize = roundup_pow_of_two(rqsize +
roundup_pow_of_two(attrs->cap.max_send_wr * 2));
@@ -1130,8 +1117,9 @@ static int iwch_query_port(struct ib_device *ibdev,
static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
struct iwch_dev *iwch_dev =
rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
@@ -1140,8 +1128,8 @@ static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
struct iwch_dev *iwch_dev =
rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
struct ethtool_drvinfo info;
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
@@ -1154,8 +1142,9 @@ static DEVICE_ATTR_RO(hca_type);
static ssize_t board_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev);
struct iwch_dev *iwch_dev =
rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device);
@@ -1348,6 +1337,8 @@ static const struct ib_device_ops iwch_dev_ops = {
.reg_user_mr = iwch_reg_user_mr,
.req_notify_cq = iwch_arm_cq,
.resize_cq = iwch_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
};
int iwch_register_device(struct iwch_dev *dev)
@@ -1391,7 +1382,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
return -ENOMEM;
@@ -1409,7 +1400,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
ret = ib_register_device(&dev->ibdev, "cxgb3_%d");
if (ret)
kfree(dev->ibdev.iwcm);
return ret;

View File

@@ -1,5 +1,5 @@
ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4
ccflags-y += -I $(srctree)/drivers/net/ethernet/chelsio/libcxgb
obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o

View File

@@ -655,7 +655,33 @@ static int send_halfclose(struct c4iw_ep *ep)
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
static int send_abort(struct c4iw_ep *ep)
static void read_tcb(struct c4iw_ep *ep)
{
struct sk_buff *skb;
struct cpl_get_tcb *req;
int wrlen = roundup(sizeof(*req), 16);
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
if (WARN_ON(!skb))
return;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
/*
* keep a ref on the ep so the tcb is not unlocked before this
* cpl completes. The ref is released in read_tcb_rpl().
*/
c4iw_get_ep(&ep->com);
if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
c4iw_put_ep(&ep->com);
}
static int send_abort_req(struct c4iw_ep *ep)
{
u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
@@ -670,6 +696,17 @@ static int send_abort(struct c4iw_ep *ep)
return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
static int send_abort(struct c4iw_ep *ep)
{
if (!ep->com.qp || !ep->com.qp->srq) {
send_abort_req(ep);
return 0;
}
set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
read_tcb(ep);
return 0;
}
static int send_connect(struct c4iw_ep *ep)
{
struct cpl_act_open_req *req = NULL;
@@ -1851,14 +1888,11 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
static void complete_cached_srq_buffers(struct c4iw_ep *ep,
__be32 srqidx_status)
static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
{
enum chip_type adapter_type;
u32 srqidx;
adapter_type = ep->com.dev->rdev.lldi.adapter_type;
srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
/*
* If this TCB had a srq buffer cached, then we must complete
@@ -1876,6 +1910,7 @@ static void complete_cached_srq_buffers(struct c4iw_ep *ep,
static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
u32 srqidx;
struct c4iw_ep *ep;
struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
int release = 0;
@@ -1887,7 +1922,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
complete_cached_srq_buffers(ep, rpl->srqidx_status);
if (ep->com.qp && ep->com.qp->srq) {
srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
}
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
@@ -1903,8 +1941,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
}
mutex_unlock(&ep->com.mutex);
if (release)
if (release) {
close_complete_upcall(ep, -ECONNRESET);
release_ep_resources(ep);
}
c4iw_put_ep(&ep->com);
return 0;
}
@@ -2072,7 +2112,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
} else {
pdev = get_real_dev(n->dev);
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
n, pdev, 0);
n, pdev, rt_tos2priority(tos));
if (!ep->l2t)
goto out;
ep->mtu = dst_mtu(dst);
@@ -2161,7 +2201,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
laddr6->sin6_port,
raddr6->sin6_port, 0,
raddr6->sin6_port,
ep->com.cm_id->tos,
raddr6->sin6_scope_id);
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
@@ -2476,7 +2517,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
u16 peer_mss = ntohs(req->tcpopt.mss);
int iptype;
unsigned short hdrs;
u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
u8 tos;
parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!parent_ep) {
@@ -2490,6 +2531,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
if (parent_ep->com.cm_id->tos_set)
tos = parent_ep->com.cm_id->tos;
else
tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
&iptype, local_ip, peer_ip, &local_port, &peer_port);
@@ -2509,7 +2555,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
ntohs(peer_port), peer_mss);
dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
local_ip, peer_ip, local_port, peer_port,
PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
tos,
((struct sockaddr_in6 *)
&parent_ep->com.local_addr)->sin6_scope_id);
}
@@ -2740,6 +2786,21 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
{
complete_cached_srq_buffers(ep, ep->srqe_idx);
if (ep->com.cm_id && ep->com.qp) {
struct c4iw_qp_attributes attrs;
attrs.next_state = C4IW_QP_STATE_ERROR;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
peer_abort_upcall(ep);
release_ep_resources(ep);
c4iw_put_ep(&ep->com);
}
static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_abort_req_rss6 *req = cplhdr(skb);
@@ -2750,6 +2811,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
int release = 0;
unsigned int tid = GET_TID(req);
u8 status;
u32 srqidx;
u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
@@ -2769,8 +2831,6 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
goto deref_ep;
}
complete_cached_srq_buffers(ep, req->srqidx_status);
pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
ep->com.state);
set_bit(PEER_ABORT, &ep->com.history);
@@ -2819,6 +2879,23 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
stop_ep_timer(ep);
/*FALLTHROUGH*/
case FPDU_MODE:
if (ep->com.qp && ep->com.qp->srq) {
srqidx = ABORT_RSS_SRQIDX_G(
be32_to_cpu(req->srqidx_status));
if (srqidx) {
complete_cached_srq_buffers(ep,
req->srqidx_status);
} else {
/* Hold ep ref until finish_peer_abort() */
c4iw_get_ep(&ep->com);
__state_set(&ep->com, ABORTING);
set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
read_tcb(ep);
break;
}
}
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_ERROR;
ret = c4iw_modify_qp(ep->com.qp->rhp,
@@ -2942,15 +3019,18 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (ep && ep->com.qp) {
pr_warn("TERM received tid %u qpid %u\n",
tid, ep->com.qp->wq.sq.qid);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
if (ep) {
if (ep->com.qp) {
pr_warn("TERM received tid %u qpid %u\n", tid,
ep->com.qp->wq.sq.qid);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
c4iw_put_ep(&ep->com);
} else
pr_warn("TERM received tid %u no ep/qp\n", tid);
c4iw_put_ep(&ep->com);
return 0;
}
@@ -3318,7 +3398,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
laddr6->sin6_port,
raddr6->sin6_port, 0,
raddr6->sin6_port, cm_id->tos,
raddr6->sin6_scope_id);
}
if (!ep->dst) {
@@ -3606,7 +3686,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (close) {
if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
close_complete_upcall(ep, -ECONNRESET);
ret = send_abort(ep);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
@@ -3717,6 +3796,80 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
return;
}
static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
{
u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
u64 t;
u32 shift = 32;
t = (thi << shift) | (tlo >> shift);
return t;
}
static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
{
u32 v;
u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
if (word & 0x1)
shift += 32;
v = (t >> shift) & mask;
return v;
}
static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
__be64 *tcb = (__be64 *)(rpl + 1);
unsigned int tid = GET_TID(rpl);
struct c4iw_ep *ep;
u64 t_flags_64;
u32 rx_pdu_out;
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
/* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
* determine if there's a rx PDU feedback event pending.
*
* If that bit is set, it means we'll need to re-read the TCB's
* rq_start value. The final value is the one present in a TCB
* with the TF_RX_PDU_OUT bit cleared.
*/
t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
c4iw_put_ep(&ep->com); /* from read_tcb() */
/* If TF_RX_PDU_OUT bit is set, re-read the TCB */
if (rx_pdu_out) {
if (++ep->rx_pdu_out_cnt >= 2) {
WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
goto cleanup;
}
read_tcb(ep);
return 0;
}
ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
TCB_RQ_START_S);
cleanup:
pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
finish_peer_abort(dev, ep);
else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
send_abort_req(ep);
else
WARN_ONCE(1, "unexpected state!");
return 0;
}
static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_fw6_msg *rpl = cplhdr(skb);
@@ -4037,6 +4190,7 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
[CPL_CLOSE_CON_RPL] = close_con_rpl,
[CPL_RDMA_TERMINATE] = terminate,
[CPL_FW4_ACK] = fw4_ack,
[CPL_GET_TCB_RPL] = read_tcb_rpl,
[CPL_FW6_MSG] = deferred_fw6_msg,
[CPL_RX_PKT] = rx_pkt,
[FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
@@ -4268,6 +4422,7 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
[CPL_RDMA_TERMINATE] = sched,
[CPL_FW4_ACK] = sched,
[CPL_SET_TCB_RPL] = set_tcb_rpl,
[CPL_GET_TCB_RPL] = sched,
[CPL_FW6_MSG] = fw6_msg,
[CPL_RX_PKT] = sched
};

View File

@@ -720,11 +720,8 @@ static const struct file_operations ep_debugfs_fops = {
.read = debugfs_read,
};
static int setup_debugfs(struct c4iw_dev *devp)
static void setup_debugfs(struct c4iw_dev *devp)
{
if (!devp->debugfs_root)
return -1;
debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
(void *)devp, &qp_debugfs_fops, 4096);
@@ -740,7 +737,6 @@ static int setup_debugfs(struct c4iw_dev *devp)
if (c4iw_wr_log)
debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
(void *)devp, &wr_log_debugfs_fops, 4096);
return 0;
}
void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
@@ -981,7 +977,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
pr_info("%s: On-Chip Queues not supported on this device\n",
pci_name(infop->pdev));
devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
devp = ib_alloc_device(c4iw_dev, ibdev);
if (!devp) {
pr_err("Cannot allocate ib device\n");
return ERR_PTR(-ENOMEM);
@@ -1564,8 +1560,6 @@ static int __init c4iw_init_module(void)
return err;
c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
if (!c4iw_debugfs_root)
pr_warn("could not create debugfs entry, continuing\n");
reg_workq = create_singlethread_workqueue("Register_iWARP_device");
if (!reg_workq) {

View File

@@ -589,7 +589,6 @@ struct c4iw_ucontext {
u32 key;
spinlock_t mmap_lock;
struct list_head mmaps;
struct kref kref;
bool is_32b_cqe;
};
@@ -598,18 +597,6 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
return container_of(c, struct c4iw_ucontext, ibucontext);
}
void _c4iw_free_ucontext(struct kref *kref);
static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext)
{
kref_put(&ucontext->kref, _c4iw_free_ucontext);
}
static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext)
{
kref_get(&ucontext->kref);
}
struct c4iw_mm_entry {
struct list_head entry;
u64 addr;
@@ -982,6 +969,9 @@ struct c4iw_ep {
int rcv_win;
u32 snd_wscale;
struct c4iw_ep_stats stats;
u32 srqe_idx;
u32 rx_pdu_out_cnt;
struct sk_buff *peer_abort_skb;
};
static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)

View File

@@ -502,10 +502,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
__be64 *pages;
int shift, n, len;
int i, k, entry;
int shift, n, i;
int err = -ENOMEM;
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
@@ -537,11 +536,11 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp->rhp = rhp;
mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
mhp->umem = ib_umem_get(udata, start, length, acc, 0);
if (IS_ERR(mhp->umem))
goto err_free_skb;
shift = mhp->umem->page_shift;
shift = PAGE_SHIFT;
n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
@@ -556,21 +555,16 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
(k << shift));
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
mhp->attr.pbl_addr + (n << 3), i,
mhp->wr_waitp);
if (err)
goto pbl_done;
n += i;
i = 0;
}
for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
if (i == PAGE_SIZE / sizeof(*pages)) {
err = write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (n << 3), i,
mhp->wr_waitp);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
@@ -684,8 +678,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
mhp->wr_waitp);
kfree_skb(mhp->dereg_skb);
c4iw_put_wr_wait(mhp->wr_waitp);
kfree(mhp);
pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp);
kfree(mhp);
return 0;
}

View File

@@ -58,51 +58,34 @@ static int fastreg_support = 1;
module_param(fastreg_support, int, 0644);
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
void _c4iw_free_ucontext(struct kref *kref)
static void c4iw_dealloc_ucontext(struct ib_ucontext *context)
{
struct c4iw_ucontext *ucontext;
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
struct c4iw_dev *rhp;
struct c4iw_mm_entry *mm, *tmp;
ucontext = container_of(kref, struct c4iw_ucontext, kref);
pr_debug("context %p\n", context);
rhp = to_c4iw_dev(ucontext->ibucontext.device);
pr_debug("ucontext %p\n", ucontext);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
kfree(ucontext);
}
static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
struct ib_udata *udata)
{
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
pr_debug("context %p\n", context);
c4iw_put_ucontext(ucontext);
return 0;
}
static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct c4iw_ucontext *context;
struct ib_device *ibdev = ucontext->device;
struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext);
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
struct c4iw_alloc_ucontext_resp uresp;
int ret = 0;
struct c4iw_mm_entry *mm = NULL;
pr_debug("ibdev %p\n", ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context) {
ret = -ENOMEM;
goto err;
}
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
kref_init(&context->kref);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n");
@@ -111,7 +94,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
mm = kmalloc(sizeof(*mm), GFP_KERNEL);
if (!mm) {
ret = -ENOMEM;
goto err_free;
goto err;
}
uresp.status_page_size = PAGE_SIZE;
@@ -131,13 +114,11 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
mm->len = PAGE_SIZE;
insert_mmap(context, mm);
}
return &context->ibucontext;
return 0;
err_mm:
kfree(mm);
err_free:
kfree(context);
err:
return ERR_PTR(ret);
return ret;
}
static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -209,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return ret;
}
static int c4iw_deallocate_pd(struct ib_pd *pd)
static void c4iw_deallocate_pd(struct ib_pd *pd)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
@@ -221,15 +202,13 @@ static int c4iw_deallocate_pd(struct ib_pd *pd)
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
mutex_unlock(&rhp->rdev.stats.lock);
kfree(php);
return 0;
}
static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct c4iw_pd *php;
struct c4iw_pd *php = to_c4iw_pd(pd);
struct ib_device *ibdev = pd->device;
u32 pdid;
struct c4iw_dev *rhp;
@@ -237,12 +216,8 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
rhp = (struct c4iw_dev *) ibdev;
pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
if (!pdid)
return ERR_PTR(-EINVAL);
php = kzalloc(sizeof(*php), GFP_KERNEL);
if (!php) {
c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
return ERR_PTR(-ENOMEM);
}
return -EINVAL;
php->pdid = pdid;
php->rhp = rhp;
if (context) {
@@ -250,7 +225,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
c4iw_deallocate_pd(&php->ibpd);
return ERR_PTR(-EFAULT);
return -EFAULT;
}
}
mutex_lock(&rhp->rdev.stats.lock);
@@ -259,7 +234,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
mutex_unlock(&rhp->rdev.stats.lock);
pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php);
return &php->ibpd;
return 0;
}
static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
@@ -376,8 +351,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
struct c4iw_dev *c4iw_dev =
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
pr_debug("dev 0x%p\n", dev);
return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
@@ -387,8 +363,8 @@ static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
struct c4iw_dev *c4iw_dev =
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
struct ethtool_drvinfo info;
struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
@@ -401,8 +377,9 @@ static DEVICE_ATTR_RO(hca_type);
static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev);
struct c4iw_dev *c4iw_dev =
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
pr_debug("dev 0x%p\n", dev);
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device);
@@ -547,6 +524,7 @@ static const struct ib_device_ops c4iw_dev_ops = {
.destroy_cq = c4iw_destroy_cq,
.destroy_qp = c4iw_destroy_qp,
.destroy_srq = c4iw_destroy_srq,
.fill_res_entry = fill_res_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = c4iw_get_dma_mr,
.get_hw_stats = c4iw_get_mib,
@@ -567,6 +545,8 @@ static const struct ib_device_ops c4iw_dev_ops = {
.query_qp = c4iw_ib_query_qp,
.reg_user_mr = c4iw_reg_user_mr,
.req_notify_cq = c4iw_arm_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
};
void c4iw_register_device(struct work_struct *work)
@@ -613,7 +593,7 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm) {
ret = -ENOMEM;
goto err_dealloc_ctx;
@@ -627,14 +607,13 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
dev->ibdev.iwcm->get_qp = c4iw_get_qp;
dev->ibdev.res.fill_res_entry = fill_res_entry;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
ret = ib_register_device(&dev->ibdev, "cxgb4_%d");
if (ret)
goto err_kfree_iwcm;
return;

View File

@@ -31,6 +31,7 @@
*/
#include <linux/module.h>
#include <rdma/uverbs_ioctl.h>
#include "iw_cxgb4.h"
@@ -632,7 +633,10 @@ static void build_rdma_write_cmpl(struct t4_sq *sq,
wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
if (wr->next->opcode == IB_WR_SEND)
wcwr->stag_inv = 0;
else
wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
wcwr->r2 = 0;
wcwr->r3 = 0;
@@ -726,7 +730,10 @@ static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr)
/* SEND_WITH_INV swsqe */
swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
swsqe->opcode = FW_RI_SEND_WITH_INV;
if (wr->next->opcode == IB_WR_SEND)
swsqe->opcode = FW_RI_SEND;
else
swsqe->opcode = FW_RI_SEND_WITH_INV;
swsqe->idx = qhp->wq.sq.pidx;
swsqe->complete = 0;
swsqe->signaled = send_signaled;
@@ -897,8 +904,6 @@ static void free_qp_work(struct work_struct *work)
destroy_qp(&rhp->rdev, &qhp->wq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
if (ucontext)
c4iw_put_ucontext(ucontext);
c4iw_put_wr_wait(qhp->wr_waitp);
kfree(qhp);
}
@@ -1133,9 +1138,9 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
/*
* Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
* the response for small NVMEe-oF READ requests. If the chain is
* exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths
* meet the requirements of the fw_ri_write_cmpl_wr work request,
* then build and post the write_cmpl WR. If any of the tests
* exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths
* and lengths meet the requirements of the fw_ri_write_cmpl_wr work
* request, then build and post the write_cmpl WR. If any of the tests
* below are not true, then we continue on with the tradtional WRITE
* and SEND WRs.
*/
@@ -1145,7 +1150,8 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
wr && wr->next && !wr->next->next &&
wr->opcode == IB_WR_RDMA_WRITE &&
wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
wr->next->opcode == IB_WR_SEND_WITH_INV &&
(wr->next->opcode == IB_WR_SEND ||
wr->next->opcode == IB_WR_SEND_WITH_INV) &&
wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
wr->next->num_sge == 1 && num_wrs >= 2) {
post_write_cmpl(qhp, wr);
@@ -2129,7 +2135,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct c4iw_cq *rchp;
struct c4iw_create_qp_resp uresp;
unsigned int sqsize, rqsize = 0;
struct c4iw_ucontext *ucontext;
struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct c4iw_ucontext, ibucontext);
int ret;
struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
@@ -2163,8 +2170,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
if (sqsize < 8)
sqsize = 8;
ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
if (!qhp)
return ERR_PTR(-ENOMEM);
@@ -2331,7 +2336,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
insert_mmap(ucontext, ma_sync_key_mm);
}
c4iw_get_ucontext(ucontext);
qhp->ucontext = ucontext;
}
if (!attrs->srq) {
@@ -2589,7 +2593,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
/* build fw_ri_res_wr */
wr_len = sizeof(*res_wr) + sizeof(*res);
skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb)
goto err_free_queue;
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
@@ -2711,7 +2715,8 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
rqsize = attrs->attr.max_wr + 1;
rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
ibucontext);
srq = kzalloc(sizeof(*srq), GFP_KERNEL);
if (!srq)

View File

@@ -35,6 +35,7 @@
#include "t4_regs.h"
#include "t4_values.h"
#include "t4_msg.h"
#include "t4_tcb.h"
#include "t4fw_ri_api.h"
#define T4_MAX_NUM_PD 65536

View File

@@ -24,6 +24,7 @@ hfi1-y := \
mad.o \
mmu_rb.o \
msix.o \
opfn.o \
pcie.o \
pio.o \
pio_copy.o \

View File

@@ -4253,6 +4253,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
access_sw_kmem_wait),
[C_SW_TID_WAIT] = CNTR_ELEM("TidWait", 0, 0, CNTR_NORMAL,
hfi1_access_sw_tid_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
access_sw_send_schedule),
[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
@@ -5222,6 +5224,17 @@ int is_bx(struct hfi1_devdata *dd)
return (chip_rev_minor & 0xF0) == 0x10;
}
/* return true is kernel urg disabled for rcd */
bool is_urg_masked(struct hfi1_ctxtdata *rcd)
{
u64 mask;
u32 is = IS_RCVURGENT_START + rcd->ctxt;
u8 bit = is % 64;
mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
return !(mask & BIT_ULL(bit));
}
/*
* Append string s to buffer buf. Arguments curp and len are the current
* position and remaining length, respectively.

View File

@@ -1,7 +1,7 @@
#ifndef _CHIP_H
#define _CHIP_H
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -804,6 +804,7 @@ void clear_linkup_counters(struct hfi1_devdata *dd);
u32 hdrqempty(struct hfi1_ctxtdata *rcd);
int is_ax(struct hfi1_devdata *dd);
int is_bx(struct hfi1_devdata *dd);
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
const char *opa_lstate_name(u32 lstate);
@@ -926,6 +927,7 @@ enum {
C_SW_PIO_WAIT,
C_SW_PIO_DRAIN,
C_SW_KMEM_WAIT,
C_SW_TID_WAIT,
C_SW_SEND_SCHED,
C_SDMA_DESC_FETCHED_CNT,
C_SDMA_INT_CNT,

View File

@@ -340,6 +340,10 @@ struct diag_pkt {
#define HFI1_PSM_IOC_BASE_SEQ 0x0
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
#define HFI1_KDETH_BTH_SEQ_SHIFT 11
#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{
return __le64_to_cpu(*((__le64 *)rbuf));

View File

@@ -1167,6 +1167,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
char link[10];
struct hfi1_devdata *dd = dd_from_dev(ibd);
struct hfi1_pportdata *ppd;
struct dentry *root;
int unit = dd->unit;
int i, j;
@@ -1174,31 +1175,29 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
return;
snprintf(name, sizeof(name), "%s_%d", class_name(), unit);
snprintf(link, sizeof(link), "%d", unit);
ibd->hfi1_ibdev_dbg = debugfs_create_dir(name, hfi1_dbg_root);
if (!ibd->hfi1_ibdev_dbg) {
pr_warn("create of %s failed\n", name);
return;
}
root = debugfs_create_dir(name, hfi1_dbg_root);
ibd->hfi1_ibdev_dbg = root;
ibd->hfi1_ibdev_link =
debugfs_create_symlink(link, hfi1_dbg_root, name);
if (!ibd->hfi1_ibdev_link) {
pr_warn("create of %s symlink failed\n", name);
return;
}
DEBUGFS_SEQ_FILE_CREATE(opcode_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(tx_opcode_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
debugfs_create_file("opcode_stats", 0444, root, ibd,
&_opcode_stats_file_ops);
debugfs_create_file("tx_opcode_stats", 0444, root, ibd,
&_tx_opcode_stats_file_ops);
debugfs_create_file("ctx_stats", 0444, root, ibd, &_ctx_stats_file_ops);
debugfs_create_file("qp_stats", 0444, root, ibd, &_qp_stats_file_ops);
debugfs_create_file("sdes", 0444, root, ibd, &_sdes_file_ops);
debugfs_create_file("rcds", 0444, root, ibd, &_rcds_file_ops);
debugfs_create_file("pios", 0444, root, ibd, &_pios_file_ops);
debugfs_create_file("sdma_cpu_list", 0444, root, ibd,
&_sdma_cpu_list_file_ops);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
DEBUGFS_FILE_CREATE(cntr_ops[i].name,
ibd->hfi1_ibdev_dbg,
dd,
&cntr_ops[i].ops, S_IRUGO);
debugfs_create_file(cntr_ops[i].name, 0444, root, dd,
&cntr_ops[i].ops);
/* per port files */
for (ppd = dd->pport, j = 0; j < dd->num_pports; j++, ppd++)
for (i = 0; i < ARRAY_SIZE(port_cntr_ops); i++) {
@@ -1206,12 +1205,11 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
sizeof(name),
port_cntr_ops[i].name,
j + 1);
DEBUGFS_FILE_CREATE(name,
ibd->hfi1_ibdev_dbg,
ppd,
&port_cntr_ops[i].ops,
debugfs_create_file(name,
!port_cntr_ops[i].ops.write ?
S_IRUGO : S_IRUGO | S_IWUSR);
S_IRUGO :
S_IRUGO | S_IWUSR,
root, ppd, &port_cntr_ops[i].ops);
}
hfi1_fault_init_debugfs(ibd);
@@ -1341,10 +1339,10 @@ DEBUGFS_FILE_OPS(driver_stats);
void hfi1_dbg_init(void)
{
hfi1_dbg_root = debugfs_create_dir(DRIVER_NAME, NULL);
if (!hfi1_dbg_root)
pr_warn("init of debugfs failed\n");
DEBUGFS_SEQ_FILE_CREATE(driver_stats_names, hfi1_dbg_root, NULL);
DEBUGFS_SEQ_FILE_CREATE(driver_stats, hfi1_dbg_root, NULL);
debugfs_create_file("driver_stats_names", 0444, hfi1_dbg_root, NULL,
&_driver_stats_names_file_ops);
debugfs_create_file("driver_stats", 0444, hfi1_dbg_root, NULL,
&_driver_stats_file_ops);
}
void hfi1_dbg_exit(void)

View File

@@ -49,16 +49,6 @@
struct hfi1_ibdev;
#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \
do { \
struct dentry *ent; \
const char *__name = name; \
ent = debugfs_create_file(__name, mode, parent, \
data, ops); \
if (!ent) \
pr_warn("create of %s failed\n", __name); \
} while (0)
#define DEBUGFS_SEQ_FILE_OPS(name) \
static const struct seq_operations _##name##_seq_ops = { \
.start = _##name##_seq_start, \
@@ -89,8 +79,6 @@ static const struct file_operations _##name##_file_ops = { \
.release = seq_release \
}
#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, 0444)
ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size,
loff_t *ppos);

View File

@@ -1575,25 +1575,32 @@ drop:
return -EINVAL;
}
void handle_eflags(struct hfi1_packet *packet)
static void show_eflags_errs(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
u32 rte = rhf_rcv_type_err(packet->rhf);
dd_dev_err(rcd->dd,
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
rcd->ctxt, packet->rhf,
packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
packet->rhf & RHF_DC_ERR ? "dc " : "",
packet->rhf & RHF_TID_ERR ? "tid " : "",
packet->rhf & RHF_LEN_ERR ? "len " : "",
packet->rhf & RHF_ECC_ERR ? "ecc " : "",
packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
rte);
}
void handle_eflags(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
rcv_hdrerr(rcd, rcd->ppd, packet);
if (rhf_err_flags(packet->rhf))
dd_dev_err(rcd->dd,
"receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
rcd->ctxt, packet->rhf,
packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
packet->rhf & RHF_DC_ERR ? "dc " : "",
packet->rhf & RHF_TID_ERR ? "tid " : "",
packet->rhf & RHF_LEN_ERR ? "len " : "",
packet->rhf & RHF_ECC_ERR ? "ecc " : "",
packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
rte);
show_eflags_errs(packet);
}
/*
@@ -1699,11 +1706,14 @@ static int kdeth_process_expected(struct hfi1_packet *packet)
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
dd_dev_err(packet->rcd->dd,
"Unhandled expected packet received. Dropping.\n");
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
return RHF_RCV_CONTINUE;
}
hfi1_kdeth_expected_rcv(packet);
return RHF_RCV_CONTINUE;
}
@@ -1712,11 +1722,17 @@ static int kdeth_process_eager(struct hfi1_packet *packet)
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
show_eflags_errs(packet);
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
return RHF_RCV_CONTINUE;
}
hfi1_kdeth_eager_rcv(packet);
return RHF_RCV_CONTINUE;
}

View File

@@ -250,6 +250,7 @@ void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
{
struct dentry *parent = ibd->hfi1_ibdev_dbg;
struct dentry *fault_dir;
ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
if (!ibd->fault)
@@ -269,45 +270,31 @@ int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
bitmap_zero(ibd->fault->opcodes,
sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
ibd->fault->dir =
fault_create_debugfs_attr("fault", parent,
&ibd->fault->attr);
if (IS_ERR(ibd->fault->dir)) {
fault_dir =
fault_create_debugfs_attr("fault", parent, &ibd->fault->attr);
if (IS_ERR(fault_dir)) {
kfree(ibd->fault);
ibd->fault = NULL;
return -ENOENT;
}
ibd->fault->dir = fault_dir;
DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd);
if (!debugfs_create_bool("enable", 0600, ibd->fault->dir,
&ibd->fault->enable))
goto fail;
if (!debugfs_create_bool("suppress_err", 0600,
ibd->fault->dir,
&ibd->fault->suppress_err))
goto fail;
if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir,
&ibd->fault->opcode))
goto fail;
if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir,
ibd->fault, &__fault_opcodes_fops))
goto fail;
if (!debugfs_create_u64("skip_pkts", 0600,
ibd->fault->dir,
&ibd->fault->fault_skip))
goto fail;
if (!debugfs_create_u64("skip_usec", 0600,
ibd->fault->dir,
&ibd->fault->fault_skip_usec))
goto fail;
if (!debugfs_create_u8("direction", 0600, ibd->fault->dir,
&ibd->fault->direction))
goto fail;
debugfs_create_file("fault_stats", 0444, fault_dir, ibd,
&_fault_stats_file_ops);
debugfs_create_bool("enable", 0600, fault_dir, &ibd->fault->enable);
debugfs_create_bool("suppress_err", 0600, fault_dir,
&ibd->fault->suppress_err);
debugfs_create_bool("opcode_mode", 0600, fault_dir,
&ibd->fault->opcode);
debugfs_create_file("opcodes", 0600, fault_dir, ibd->fault,
&__fault_opcodes_fops);
debugfs_create_u64("skip_pkts", 0600, fault_dir,
&ibd->fault->fault_skip);
debugfs_create_u64("skip_usec", 0600, fault_dir,
&ibd->fault->fault_skip_usec);
debugfs_create_u8("direction", 0600, fault_dir, &ibd->fault->direction);
return 0;
fail:
hfi1_fault_exit_debugfs(ibd);
return -ENOMEM;
}
bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)

View File

@@ -73,6 +73,7 @@
#include "chip_registers.h"
#include "common.h"
#include "opfn.h"
#include "verbs.h"
#include "pio.h"
#include "chip.h"
@@ -98,6 +99,8 @@
#define NEIGHBOR_TYPE_HFI 0
#define NEIGHBOR_TYPE_SWITCH 1
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
extern unsigned long hfi1_cap_mask;
#define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
#define HFI1_CAP_UGET_MASK(mask, cap) \
@@ -195,6 +198,14 @@ struct exp_tid_set {
};
typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
struct tid_queue {
struct list_head queue_head;
/* queue head for QP TID resource waiters */
u32 enqueue; /* count of tid enqueues */
u32 dequeue; /* count of tid dequeues */
};
struct hfi1_ctxtdata {
/* rcvhdrq base, needs mmap before useful */
void *rcvhdrq;
@@ -288,6 +299,12 @@ struct hfi1_ctxtdata {
/* PSM Specific fields */
/* lock protecting all Expected TID data */
struct mutex exp_mutex;
/* lock protecting all Expected TID data of kernel contexts */
spinlock_t exp_lock;
/* Queue for QP's waiting for HW TID flows */
struct tid_queue flow_queue;
/* Queue for QP's waiting for HW receive array entries */
struct tid_queue rarr_queue;
/* when waiting for rcv or pioavail */
wait_queue_head_t wait;
/* uuid from PSM */
@@ -320,6 +337,9 @@ struct hfi1_ctxtdata {
*/
u8 subctxt_cnt;
/* Bit mask to track free TID RDMA HW flows */
unsigned long flow_mask;
struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
};
/**
@@ -1435,7 +1455,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
void hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
int hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
u16 ctxt);
struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
@@ -2100,7 +2120,7 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
#endif
HFI1_PKT_USER_SC_INTEGRITY;
else
else if (ctxt_type != SC_KERNEL)
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
/* turn on send-side job key checks if !A0 */

View File

@@ -73,7 +73,6 @@
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/*
* min buffers we want to have per context, after driver
*/
@@ -216,12 +215,12 @@ static void hfi1_rcd_free(struct kref *kref)
struct hfi1_ctxtdata *rcd =
container_of(kref, struct hfi1_ctxtdata, kref);
hfi1_free_ctxtdata(rcd->dd, rcd);
spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
rcd->dd->rcd[rcd->ctxt] = NULL;
spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
hfi1_free_ctxtdata(rcd->dd, rcd);
kfree(rcd);
}
@@ -244,10 +243,13 @@ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
* @rcd: pointer to an initialized rcd data structure
*
* Use this to get a reference after the init.
*
* Return : reflect kref_get_unless_zero(), which returns non-zero on
* increment, otherwise 0.
*/
void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
int hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
{
kref_get(&rcd->kref);
return kref_get_unless_zero(&rcd->kref);
}
/**
@@ -327,7 +329,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt)
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd[ctxt]) {
rcd = dd->rcd[ctxt];
hfi1_rcd_get(rcd);
if (!hfi1_rcd_get(rcd))
rcd = NULL;
}
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
@@ -372,6 +375,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
mutex_init(&rcd->exp_mutex);
spin_lock_init(&rcd->exp_lock);
INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
@@ -474,6 +480,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
/* Initialize TID flow generations for the context */
hfi1_kern_init_ctxt_generations(rcd);
}
*context = rcd;
@@ -773,6 +782,8 @@ static void enable_chip(struct hfi1_devdata *dd)
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
if (HFI1_CAP_IS_KSET(TID_RDMA))
rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
hfi1_rcvctrl(dd, rcvmask, rcd);
sc_enable(rcd->sc);
hfi1_rcd_put(rcd);
@@ -928,6 +939,8 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
if (!lastfail)
lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
if (lastfail) {
dd_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
@@ -1498,6 +1511,13 @@ static int __init hfi1_mod_init(void)
/* sanitize link CRC options */
link_crc_mask &= SUPPORTED_CRCS;
ret = opfn_init();
if (ret < 0) {
pr_err("Failed to allocate opfn_wq");
goto bail_dev;
}
hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
@@ -1528,6 +1548,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
opfn_exit();
node_affinity_destroy_all();
hfi1_dbg_exit();
@@ -1582,7 +1603,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
if (rcd) {
hfi1_clear_tids(rcd);
hfi1_free_ctxt_rcv_groups(rcd);
hfi1_free_ctxt(rcd);
}
}

View File

@@ -6,6 +6,9 @@
#include "iowait.h"
#include "trace_iowait.h"
/* 1 priority == 16 starve_cnt */
#define IOWAIT_PRIORITY_STARVE_SHIFT 4
void iowait_set_flag(struct iowait *wait, u32 flag)
{
trace_hfi1_iowait_set(wait, flag);
@@ -44,7 +47,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
void (*sdma_drained)(struct iowait *wait),
void (*init_priority)(struct iowait *wait))
{
int i;
@@ -58,6 +62,7 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
wait->sleep = sleep;
wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained;
wait->init_priority = init_priority;
wait->flags = 0;
for (i = 0; i < IOWAIT_SES; i++) {
wait->wait[i].iow = wait;
@@ -92,3 +97,30 @@ int iowait_set_work_flag(struct iowait_work *w)
iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
return IOWAIT_TID_SE;
}
/**
* iowait_priority_update_top - update the top priority entry
* @w: the iowait struct
* @top: a pointer to the top priority entry
* @idx: the index of the current iowait in an array
* @top_idx: the array index for the iowait entry that has the top priority
*
* This function is called to compare the priority of a given
* iowait with the given top priority entry. The top index will
* be returned.
*/
uint iowait_priority_update_top(struct iowait *w,
struct iowait *top,
uint idx, uint top_idx)
{
u8 cnt, tcnt;
/* Convert priority into starve_cnt and compare the total.*/
cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
top->starved_cnt;
if (cnt > tcnt)
return idx;
else
return top_idx;
}

View File

@@ -100,6 +100,7 @@ struct iowait_work {
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
* @init_priority: callback to manipulate priority
* @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
@@ -109,7 +110,7 @@ struct iowait_work {
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
* @flags: wait flags (one per QP)
* @wait: SE array
* @wait: SE array for multiple legs
*
* This is to be embedded in user's state structure
* (QP or PQ).
@@ -120,10 +121,13 @@ struct iowait_work {
* are callbacks for the ULP to implement
* what ever queuing/dequeuing of
* the embedded iowait and its containing struct
* when a resource shortage like SDMA ring space is seen.
* when a resource shortage like SDMA ring space
* or PIO credit space is seen.
*
* Both potentially have locks help
* so sleeping is not allowed.
* so sleeping is not allowed and it is not
* supported to submit txreqs from the wakeup
* call directly because of lock conflicts.
*
* The wait_dma member along with the iow
*
@@ -143,6 +147,7 @@ struct iowait {
);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
void (*init_priority)(struct iowait *wait);
seqlock_t *lock;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
@@ -152,6 +157,7 @@ struct iowait {
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
u8 priority;
unsigned long flags;
struct iowait_work wait[IOWAIT_SES];
};
@@ -171,7 +177,8 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait));
void (*sdma_drained)(struct iowait *wait),
void (*init_priority)(struct iowait *wait));
/**
* iowait_schedule() - schedule the default send engine work
@@ -185,6 +192,18 @@ static inline bool iowait_schedule(struct iowait *wait,
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
}
/**
* iowait_tid_schedule - schedule the tid SE
* @wait: the iowait structure
* @wq: the work queue
* @cpu: the cpu
*/
static inline bool iowait_tid_schedule(struct iowait *wait,
struct workqueue_struct *wq, int cpu)
{
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_TID_SE].iowork);
}
/**
* iowait_sdma_drain() - wait for DMAs to drain
*
@@ -327,6 +346,8 @@ static inline u16 iowait_get_desc(struct iowait_work *w)
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
num_desc = tx->num_desc;
if (tx->flags & SDMA_TXREQ_F_VIP)
w->iow->priority++;
}
return num_desc;
}
@@ -340,6 +361,37 @@ static inline u32 iowait_get_all_desc(struct iowait *w)
return num_desc;
}
static inline void iowait_update_priority(struct iowait_work *w)
{
struct sdma_txreq *tx = NULL;
if (!list_empty(&w->tx_head)) {
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
if (tx->flags & SDMA_TXREQ_F_VIP)
w->iow->priority++;
}
}
static inline void iowait_update_all_priority(struct iowait *w)
{
iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
}
static inline void iowait_init_priority(struct iowait *w)
{
w->priority = 0;
if (w->init_priority)
w->init_priority(w);
}
static inline void iowait_get_priority(struct iowait *w)
{
iowait_init_priority(w);
iowait_update_all_priority(w);
}
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
@@ -356,14 +408,18 @@ static inline void iowait_queue(bool pkts_sent, struct iowait *w,
/*
* To play fair, insert the iowait at the tail of the wait queue if it
* has already sent some packets; Otherwise, put it at the head.
* However, if it has priority packets to send, also put it at the
* head.
*/
if (pkts_sent) {
list_add_tail(&w->list, wait_head);
if (pkts_sent)
w->starved_cnt = 0;
} else {
list_add(&w->list, wait_head);
else
w->starved_cnt++;
}
if (w->priority > 0 || !pkts_sent)
list_add(&w->list, wait_head);
else
list_add_tail(&w->list, wait_head);
}
/**
@@ -380,27 +436,10 @@ static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
w->starved_cnt = 0;
}
/**
* iowait_starve_find_max - Find the maximum of the starve count
* @w: the iowait struct
* @max: a variable containing the max starve count
* @idx: the index of the current iowait in an array
* @max_idx: a variable containing the array index for the
* iowait entry that has the max starve count
*
* This function is called to compare the starve count of a
* given iowait with the given max starve count. The max starve
* count and the index will be updated if the iowait's start
* count is larger.
*/
static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
uint idx, uint *max_idx)
{
if (w->starved_cnt > *max) {
*max = w->starved_cnt;
*max_idx = idx;
}
}
/* Update the top priority index */
uint iowait_priority_update_top(struct iowait *w,
struct iowait *top,
uint idx, uint top_idx);
/**
* iowait_packet_queued() - determine if a packet is queued

View File

@@ -0,0 +1,323 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#include "hfi.h"
#include "trace.h"
#include "qp.h"
#include "opfn.h"
#define IB_BTHE_E BIT(IB_BTHE_E_SHIFT)
#define OPFN_CODE(code) BIT((code) - 1)
#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
struct hfi1_opfn_type {
bool (*request)(struct rvt_qp *qp, u64 *data);
bool (*response)(struct rvt_qp *qp, u64 *data);
bool (*reply)(struct rvt_qp *qp, u64 data);
void (*error)(struct rvt_qp *qp);
};
static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
[STL_VERBS_EXTD_TID_RDMA] = {
.request = tid_rdma_conn_req,
.response = tid_rdma_conn_resp,
.reply = tid_rdma_conn_reply,
.error = tid_rdma_conn_error,
},
};
static struct workqueue_struct *opfn_wq;
static void opfn_schedule_conn_request(struct rvt_qp *qp);
static bool hfi1_opfn_extended(u32 bth1)
{
return !!(bth1 & IB_BTHE_E);
}
static void opfn_conn_request(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_atomic_wr wr;
u16 mask, capcode;
struct hfi1_opfn_type *extd;
u64 data;
unsigned long flags;
int ret = 0;
trace_hfi1_opfn_state_conn_request(qp);
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* Exit if the extended bit is not set, or if nothing is requested, or
* if we have completed all requests, or if a previous request is in
* progress
*/
if (!priv->opfn.extended || !priv->opfn.requested ||
priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
goto done;
mask = priv->opfn.requested & ~priv->opfn.completed;
capcode = ilog2(mask & ~(mask - 1)) + 1;
if (capcode >= STL_VERBS_EXTD_MAX) {
priv->opfn.completed |= OPFN_CODE(capcode);
goto done;
}
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->request || !extd->request(qp, &data)) {
/*
* Either there is no handler for this capability or the request
* packet could not be generated. Either way, mark it as done so
* we don't keep attempting to complete it.
*/
priv->opfn.completed |= OPFN_CODE(capcode);
goto done;
}
trace_hfi1_opfn_data_conn_request(qp, capcode, data);
data = (data & ~0xf) | capcode;
memset(&wr, 0, sizeof(wr));
wr.wr.opcode = IB_WR_OPFN;
wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
wr.compare_add = data;
priv->opfn.curr = capcode; /* A new request is now in progress */
/* Drop opfn.lock before calling ib_post_send() */
spin_unlock_irqrestore(&priv->opfn.lock, flags);
ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
if (ret)
goto err;
trace_hfi1_opfn_state_conn_request(qp);
return;
err:
trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
(u64)ret);
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* In case of an unexpected error return from ib_post_send
* clear opfn.curr and reschedule to try again
*/
priv->opfn.curr = STL_VERBS_EXTD_NONE;
opfn_schedule_conn_request(qp);
done:
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_send_conn_request(struct work_struct *work)
{
struct hfi1_opfn_data *od;
struct hfi1_qp_priv *qpriv;
od = container_of(work, struct hfi1_opfn_data, opfn_work);
qpriv = container_of(od, struct hfi1_qp_priv, opfn);
opfn_conn_request(qpriv->owner);
}
/*
* When QP s_lock is held in the caller, the OPFN request must be scheduled
* to a different workqueue to avoid double locking QP s_lock in call to
* ib_post_send in opfn_conn_request
*/
static void opfn_schedule_conn_request(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
trace_hfi1_opfn_state_sched_conn_request(qp);
queue_work(opfn_wq, &priv->opfn.opfn_work);
}
void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_atomic_eth *ateth)
{
struct hfi1_qp_priv *priv = qp->priv;
u64 data = be64_to_cpu(ateth->compare_data);
struct hfi1_opfn_type *extd;
u8 capcode;
unsigned long flags;
trace_hfi1_opfn_state_conn_response(qp);
capcode = data & 0xf;
trace_hfi1_opfn_data_conn_response(qp, capcode, data);
if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
return;
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->response) {
e->atomic_data = capcode;
return;
}
spin_lock_irqsave(&priv->opfn.lock, flags);
if (priv->opfn.completed & OPFN_CODE(capcode)) {
/*
* We are receiving a request for a feature that has already
* been negotiated. This may mean that the other side has reset
*/
priv->opfn.completed &= ~OPFN_CODE(capcode);
if (extd->error)
extd->error(qp);
}
if (extd->response(qp, &data))
priv->opfn.completed |= OPFN_CODE(capcode);
e->atomic_data = (data & ~0xf) | capcode;
trace_hfi1_opfn_state_conn_response(qp);
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_conn_reply(struct rvt_qp *qp, u64 data)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_opfn_type *extd;
u8 capcode;
unsigned long flags;
trace_hfi1_opfn_state_conn_reply(qp);
capcode = data & 0xf;
trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
return;
spin_lock_irqsave(&priv->opfn.lock, flags);
/*
* Either there is no previous request or the reply is not for the
* current request
*/
if (!priv->opfn.curr || capcode != priv->opfn.curr)
goto done;
extd = &hfi1_opfn_handlers[capcode];
if (!extd || !extd->reply)
goto clear;
if (extd->reply(qp, data))
priv->opfn.completed |= OPFN_CODE(capcode);
clear:
/*
* Clear opfn.curr to indicate that the previous request is no longer in
* progress
*/
priv->opfn.curr = STL_VERBS_EXTD_NONE;
trace_hfi1_opfn_state_conn_reply(qp);
done:
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_conn_error(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_opfn_type *extd = NULL;
unsigned long flags;
u16 capcode;
trace_hfi1_opfn_state_conn_error(qp);
trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
/*
* The QP has gone into the Error state. We have to invalidate all
* negotiated feature, including the one in progress (if any). The RC
* QP handling will clean the WQE for the connection request.
*/
spin_lock_irqsave(&priv->opfn.lock, flags);
while (priv->opfn.completed) {
capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
if (extd->error)
extd->error(qp);
priv->opfn.completed &= ~OPFN_CODE(capcode);
}
priv->opfn.extended = 0;
priv->opfn.requested = 0;
priv->opfn.curr = STL_VERBS_EXTD_NONE;
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
{
struct ib_qp *ibqp = &qp->ibqp;
struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
if (attr_mask & IB_QP_RETRY_CNT)
priv->s_retry = attr->retry_cnt;
spin_lock_irqsave(&priv->opfn.lock, flags);
if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
struct tid_rdma_params *local = &priv->tid_rdma.local;
if (attr_mask & IB_QP_TIMEOUT)
priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
tid_rdma_opfn_init(qp, local);
/*
* We only want to set the OPFN requested bit when the
* QP transitions to RTS.
*/
if (attr_mask & IB_QP_STATE &&
attr->qp_state == IB_QPS_RTS) {
priv->opfn.requested |= OPFN_MASK(TID_RDMA);
/*
* If the QP is transitioning to RTS and the
* opfn.completed for TID RDMA has already been
* set, the QP is being moved *back* into RTS.
* We can now renegotiate the TID RDMA
* parameters.
*/
if (priv->opfn.completed &
OPFN_MASK(TID_RDMA)) {
priv->opfn.completed &=
~OPFN_MASK(TID_RDMA);
/*
* Since the opfn.completed bit was
* already set, it is safe to assume
* that the opfn.extended is also set.
*/
opfn_schedule_conn_request(qp);
}
}
} else {
memset(local, 0, sizeof(*local));
}
}
spin_unlock_irqrestore(&priv->opfn.lock, flags);
}
void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
{
struct hfi1_qp_priv *priv = qp->priv;
if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
HFI1_CAP_IS_KSET(OPFN)) {
priv->opfn.extended = 1;
if (qp->state == IB_QPS_RTS)
opfn_conn_request(qp);
}
}
int opfn_init(void)
{
opfn_wq = alloc_workqueue("hfi_opfn",
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
WQ_MEM_RECLAIM,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
if (!opfn_wq)
return -ENOMEM;
return 0;
}
void opfn_exit(void)
{
if (opfn_wq) {
destroy_workqueue(opfn_wq);
opfn_wq = NULL;
}
}

View File

@@ -0,0 +1,85 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef _HFI1_OPFN_H
#define _HFI1_OPFN_H
/**
* DOC: Omni Path Feature Negotion (OPFN)
*
* OPFN is a discovery protocol for Intel Omni-Path fabric that
* allows two RC QPs to negotiate a common feature that both QPs
* can support. Currently, the only OPA feature that OPFN
* supports is TID RDMA.
*
* Architecture
*
* OPFN involves the communication between two QPs on the HFI
* level on an Omni-Path fabric, and ULPs have no knowledge of
* OPFN at all.
*
* Implementation
*
* OPFN extends the existing IB RC protocol with the following
* changes:
* -- Uses Bit 24 (reserved) of DWORD 1 of Base Transport
* Header (BTH1) to indicate that the RC QP supports OPFN;
* -- Uses a combination of RC COMPARE_SWAP opcode (0x13) and
* the address U64_MAX (0xFFFFFFFFFFFFFFFF) as an OPFN
* request; The 64-bit data carried with the request/response
* contains the parameters for negotiation and will be
* defined in tid_rdma.c file;
* -- Defines IB_WR_RESERVED3 as IB_WR_OPFN.
*
* The OPFN communication will be triggered when an RC QP
* receives a request with Bit 24 of BTH1 set. The responder QP
* will then post send an OPFN request with its local
* parameters, which will be sent to the requester QP once all
* existing requests on the responder QP side have been sent.
* Once the requester QP receives the OPFN request, it will
* keep a copy of the responder QP's parameters, and return a
* response packet with its own local parameters. The responder
* QP receives the response packet and keeps a copy of the requester
* QP's parameters. After this exchange, each side has the parameters
* for both sides and therefore can select the right parameters
* for future transactions
*/
/* STL Verbs Extended */
#define IB_BTHE_E_SHIFT 24
#define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX
struct ib_atomic_eth;
enum hfi1_opfn_codes {
STL_VERBS_EXTD_NONE = 0,
STL_VERBS_EXTD_TID_RDMA,
STL_VERBS_EXTD_MAX
};
struct hfi1_opfn_data {
u8 extended;
u16 requested;
u16 completed;
enum hfi1_opfn_codes curr;
/* serialize opfn function calls */
spinlock_t lock;
struct work_struct opfn_work;
};
/* WR opcode for OPFN */
#define IB_WR_OPFN IB_WR_RESERVED3
void opfn_send_conn_request(struct work_struct *work);
void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_atomic_eth *ateth);
void opfn_conn_reply(struct rvt_qp *qp, u64 data);
void opfn_conn_error(struct rvt_qp *qp);
void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask);
void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1);
int opfn_init(void);
void opfn_exit(void);
#endif /* _HFI1_OPFN_H */

View File

@@ -1599,8 +1599,7 @@ static void sc_piobufavail(struct send_context *sc)
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
uint i, n = 0, max_idx = 0;
u8 max_starved_cnt = 0;
uint i, n = 0, top_idx = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
@@ -1619,11 +1618,18 @@ static void sc_piobufavail(struct send_context *sc)
if (n == ARRAY_SIZE(qps))
break;
wait = list_first_entry(list, struct iowait, list);
iowait_get_priority(wait);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx);
if (n) {
priv = qps[top_idx]->priv;
top_idx = iowait_priority_update_top(wait,
&priv->s_iowait,
n, top_idx);
}
/* refcount held until actual wake up */
qps[n++] = qp;
}
@@ -1638,12 +1644,12 @@ static void sc_piobufavail(struct send_context *sc)
}
write_sequnlock_irqrestore(&sc->waitlock, flags);
/* Wake up the most starved one first */
/* Wake up the top-priority one first */
if (n)
hfi1_qp_wakeup(qps[max_idx],
hfi1_qp_wakeup(qps[top_idx],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
for (i = 0; i < n; i++)
if (i != max_idx)
if (i != top_idx)
hfi1_qp_wakeup(qps[i],
RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
}

View File

@@ -132,6 +132,18 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
.qpt_support = BIT(IB_QPT_RC),
},
[IB_WR_OPFN] = {
.length = sizeof(struct ib_atomic_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_USE_RESERVE,
},
[IB_WR_TID_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
.qpt_support = BIT(IB_QPT_RC),
.flags = RVT_OPERATION_IGN_RNR_CNT,
},
};
static void flush_list_head(struct list_head *l)
@@ -285,6 +297,8 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
qp_set_16b(qp);
}
opfn_qp_init(qp, attr, attr_mask);
}
/**
@@ -311,6 +325,8 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
hfi1_setup_tid_rdma_wqe(qp, wqe);
/* fall through */
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
@@ -422,6 +438,11 @@ static void hfi1_qp_schedule(struct rvt_qp *qp)
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
}
if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
ret = hfi1_schedule_tid_send(qp);
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
}
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -441,8 +462,27 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
{
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
struct hfi1_qp_priv *priv = qp->priv;
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
qp->s_flags &= ~RVT_S_BUSY;
/*
* If we are sending a first-leg packet from the second leg,
* we need to clear the busy flag from priv->s_flags to
* avoid a race condition when the qp wakes up before
* the call to hfi1_verbs_send() returns to the second
* leg. In that case, the second leg will terminate without
* being re-scheduled, resulting in failure to send TID RDMA
* WRITE DATA and TID RDMA ACK packets.
*/
if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
RVT_S_BUSY);
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
} else {
priv->s_flags &= ~RVT_S_BUSY;
}
}
static int iowait_sleep(
@@ -479,6 +519,7 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
iowait_get_priority(&priv->s_iowait);
iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait);
priv->s_iowait.lock = &sde->waitlock;
@@ -528,6 +569,17 @@ static void iowait_sdma_drained(struct iowait *wait)
spin_unlock_irqrestore(&qp->s_lock, flags);
}
static void hfi1_init_priority(struct iowait *w)
{
struct rvt_qp *qp = iowait_to_qp(w);
struct hfi1_qp_priv *priv = qp->priv;
if (qp->s_flags & RVT_S_ACK_PENDING)
w->priority++;
if (priv->s_flags & RVT_S_ACK_PENDING)
w->priority++;
}
/**
* qp_to_sdma_engine - map a qp to a send engine
* @qp: the QP
@@ -685,10 +737,11 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait,
1,
_hfi1_do_send,
NULL,
_hfi1_do_tid_send,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
iowait_sdma_drained,
hfi1_init_priority);
return priv;
}
@@ -696,6 +749,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
hfi1_qp_priv_tid_free(rdi, qp);
kfree(priv->s_ahg);
kfree(priv);
}
@@ -729,6 +783,7 @@ void flush_qp_waiters(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
hfi1_tid_rdma_flush_wait(qp);
}
void stop_send_queue(struct rvt_qp *qp)
@@ -736,12 +791,16 @@ void stop_send_queue(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv;
iowait_cancel_work(&priv->s_iowait);
if (cancel_work_sync(&priv->tid_rdma.trigger_work))
rvt_put_qp(qp);
}
void quiesce_qp(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
hfi1_del_tid_reap_timer(qp);
hfi1_del_tid_retry_timer(qp);
iowait_sdma_drain(&priv->s_iowait);
qp_pio_drain(qp);
flush_tx_list(qp);
@@ -749,8 +808,13 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp)
{
hfi1_qp_kern_exp_rcv_clear_all(qp);
qp->r_adefered = 0;
clear_ahg(qp);
/* Clear any OPFN state */
if (qp->ibqp.qp_type == IB_QPT_RC)
opfn_conn_error(qp);
}
/*
@@ -832,7 +896,8 @@ void notify_error_qp(struct rvt_qp *qp)
if (lock) {
write_seqlock(lock);
if (!list_empty(&priv->s_iowait.list) &&
!(qp->s_flags & RVT_S_BUSY)) {
!(qp->s_flags & RVT_S_BUSY) &&
!(priv->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
@@ -841,7 +906,8 @@ void notify_error_qp(struct rvt_qp *qp)
write_sequnlock(lock);
}
if (!(qp->s_flags & RVT_S_BUSY)) {
if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
rvt_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;

View File

@@ -63,11 +63,17 @@ extern const struct rvt_operation_params hfi1_post_parms[];
* HFI1_S_AHG_VALID - ahg header valid on chip
* HFI1_S_AHG_CLEAR - have send engine clear ahg state
* HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
* HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
* HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
* HFI1_S_WAIT_HALT - halt the first leg send engine
* HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
*/
#define HFI1_S_AHG_VALID 0x80000000
#define HFI1_S_AHG_CLEAR 0x40000000
#define HFI1_S_WAIT_PIO_DRAIN 0x20000000
#define HFI1_S_WAIT_TID_SPACE 0x10000000
#define HFI1_S_WAIT_TID_RESP 0x08000000
#define HFI1_S_WAIT_HALT 0x04000000
#define HFI1_S_MIN_BIT_MASK 0x01000000
/*
@@ -76,6 +82,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
#define HFI1_S_ANY_TID_WAIT_SEND (RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA)
/*
* Send if not busy or waiting for I/O and either

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,51 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#ifndef HFI1_RC_H
#define HFI1_RC_H
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
{
unsigned int next;
next = n + 1;
if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
qp->s_tail_ack_queue = next;
qp->s_acked_ack_queue = next;
qp->s_ack_state = OP(ACKNOWLEDGE);
}
static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
struct rvt_qp *qp)
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
u32 len;
len = delta_psn(psn, wqe->psn) * pmtu;
return rvt_restart_sge(ss, wqe, len);
}
struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
u8 *prev_ack, bool *scheduled);
int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
struct hfi1_ctxtdata *rcd);
struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct hfi1_ibport *ibp);
#endif /* HFI1_RC_H */

View File

@@ -250,7 +250,6 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth1, u32 bth2)
{
bth1 |= qp->remote_qpn;
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(bth1);
ohdr->bth[2] = cpu_to_be32(bth2);
@@ -272,13 +271,13 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
*/
static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2,
int middle,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = ps->ibp;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 bth1 = 0;
u32 slid;
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
u8 l4 = OPA_16B_L4_IB_LOCAL;
@@ -360,12 +359,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
*/
static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2,
int middle,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = ps->ibp;
u32 bth1 = 0;
u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
u16 lrh0 = HFI1_LRH_BTH;
u8 extra_bytes = -ps->s_txreq->s_cur_size & 3;
@@ -415,7 +414,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp,
struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
/* We support only two types - 9B and 16B for now */
@@ -425,7 +424,7 @@ static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = {
};
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
@@ -446,18 +445,21 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
priv->s_ahg->ahgidx = 0;
/* Make the appropriate header */
hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth2, middle, ps);
hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth1, bth2, middle,
ps);
}
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
/**
* schedule_send_yield - test for a yield required for QP send engine
* hfi1_schedule_send_yield - test for a yield required for QP
* send engine
* @timeout: Final time for timeout slice for jiffies
* @qp: a pointer to QP
* @ps: a pointer to a structure with commonly lookup values for
* the the send engine progress
* @tid - true if it is the tid leg
*
* This routine checks if the time slice for the QP has expired
* for RC QPs, if so an additional work entry is queued. At this
@@ -465,8 +467,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
* returns true if a yield is required, otherwise, false
* is returned.
*/
static bool schedule_send_yield(struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
bool tid)
{
ps->pkts_sent = true;
@@ -474,8 +476,24 @@ static bool schedule_send_yield(struct rvt_qp *qp,
if (!ps->in_thread ||
workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
spin_lock_irqsave(&qp->s_lock, ps->flags);
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
if (!tid) {
qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp);
} else {
struct hfi1_qp_priv *priv = qp->priv;
if (priv->s_flags &
HFI1_S_TID_BUSY_SET) {
qp->s_flags &= ~RVT_S_BUSY;
priv->s_flags &=
~(HFI1_S_TID_BUSY_SET |
RVT_S_BUSY);
} else {
priv->s_flags &= ~RVT_S_BUSY;
}
hfi1_schedule_tid_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, ps->flags);
this_cpu_inc(*ps->ppd->dd->send_schedule);
trace_hfi1_rc_expired_time_slice(qp, true);
@@ -576,6 +594,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
do {
/* Check for a constructed packet to be sent. */
if (ps.s_txreq) {
if (priv->s_flags & HFI1_S_TID_BUSY_SET)
qp->s_flags |= RVT_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
@@ -585,7 +605,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
return;
/* allow other tasks to run */
if (schedule_send_yield(qp, &ps))
if (hfi1_schedule_send_yield(qp, &ps, false))
return;
spin_lock_irqsave(&qp->s_lock, ps.flags);

View File

@@ -1747,10 +1747,9 @@ retry:
*/
static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
struct iowait *wait, *nw;
struct iowait *wait, *nw, *twait;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
u8 max_starved_cnt = 0;
uint i, n = 0, seq, tidx = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@@ -1775,13 +1774,20 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
continue;
if (n == ARRAY_SIZE(waits))
break;
iowait_init_priority(wait);
num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
/* Find the most starved wait memeber */
iowait_starve_find_max(wait, &max_starved_cnt,
n, &max_idx);
/* Find the top-priority wait memeber */
if (n) {
twait = waits[tidx];
tidx =
iowait_priority_update_top(wait,
twait,
n,
tidx);
}
list_del_init(&wait->list);
waits[n++] = wait;
}
@@ -1790,12 +1796,12 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
}
} while (read_seqretry(&sde->waitlock, seq));
/* Schedule the most starved one first */
/* Schedule the top-priority entry first */
if (n)
waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
for (i = 0; i < n; i++)
if (i != max_idx)
if (i != tidx)
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}

View File

@@ -91,6 +91,7 @@ struct sdma_desc {
#define SDMA_TXREQ_F_URGENT 0x0001
#define SDMA_TXREQ_F_AHG_COPY 0x0002
#define SDMA_TXREQ_F_USE_AHG 0x0004
#define SDMA_TXREQ_F_VIP 0x0010
struct sdma_txreq;
typedef void (*callback_t)(struct sdma_txreq *, int);

View File

@@ -498,7 +498,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
@@ -508,7 +508,7 @@ static ssize_t board_id_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
@@ -524,7 +524,7 @@ static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
@@ -536,7 +536,7 @@ static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/*
@@ -555,7 +555,7 @@ static ssize_t nfreectxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
@@ -567,7 +567,7 @@ static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
@@ -579,7 +579,7 @@ static ssize_t chip_reset_store(struct device *device,
size_t count)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
@@ -609,7 +609,7 @@ static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
struct hfi1_temp temp;
int ret;

File diff suppressed because it is too large Load Diff

View File

@@ -6,8 +6,317 @@
#ifndef HFI1_TID_RDMA_H
#define HFI1_TID_RDMA_H
#include <linux/circ_buf.h>
#include "common.h"
/* Add a convenience helper */
#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
/*
* Bit definitions for priv->s_flags.
* These bit flags overload the bit flags defined for the QP's s_flags.
* Due to the fact that these bit fields are used only for the QP priv
* s_flags, there are no collisions.
*
* HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
* HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
*/
#define HFI1_S_TID_BUSY_SET BIT(0)
/* BIT(1) reserved for RVT_S_BUSY. */
#define HFI1_R_TID_RSC_TIMER BIT(2)
/* BIT(3) reserved for RVT_S_RESP_PENDING. */
/* BIT(4) reserved for RVT_S_ACK_PENDING. */
#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
/* BIT(16) reserved for RVT_S_SEND_ONE */
#define HFI1_S_TID_RETRY_TIMER BIT(17)
/* BIT(18) reserved for RVT_S_ECN. */
#define HFI1_R_TID_SW_PSN BIT(19)
/* BIT(26) reserved for HFI1_S_WAIT_HALT */
/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
/*
* Unlike regular IB RDMA VERBS, which do not require an entry
* in the s_ack_queue, TID RDMA WRITE requests do because they
* generate responses.
* Therefore, the s_ack_queue needs to be extended by a certain
* amount. The key point is that the queue needs to be extended
* without letting the "user" know so they user doesn't end up
* using these extra entries.
*/
#define HFI1_TID_RDMA_WRITE_CNT 8
struct tid_rdma_params {
struct rcu_head rcu_head;
u32 qp;
u32 max_len;
u16 jkey;
u8 max_read;
u8 max_write;
u8 timeout;
u8 urg;
u8 version;
};
struct tid_rdma_qp_params {
struct work_struct trigger_work;
struct tid_rdma_params local;
struct tid_rdma_params __rcu *remote;
};
/* Track state for each hardware flow */
struct tid_flow_state {
u32 generation;
u32 psn;
u32 r_next_psn; /* next PSN to be received (in TID space) */
u8 index;
u8 last_index;
u8 flags;
};
enum tid_rdma_req_state {
TID_REQUEST_INACTIVE = 0,
TID_REQUEST_INIT,
TID_REQUEST_INIT_RESEND,
TID_REQUEST_ACTIVE,
TID_REQUEST_RESEND,
TID_REQUEST_RESEND_ACTIVE,
TID_REQUEST_QUEUED,
TID_REQUEST_SYNC,
TID_REQUEST_RNR_NAK,
TID_REQUEST_COMPLETE,
};
struct tid_rdma_request {
struct rvt_qp *qp;
struct hfi1_ctxtdata *rcd;
union {
struct rvt_swqe *swqe;
struct rvt_ack_entry *ack;
} e;
struct tid_rdma_flow *flows; /* array of tid flows */
struct rvt_sge_state ss; /* SGE state for TID RDMA requests */
u16 n_flows; /* size of the flow buffer window */
u16 setup_head; /* flow index we are setting up */
u16 clear_tail; /* flow index we are clearing */
u16 flow_idx; /* flow index most recently set up */
u16 acked_tail;
u32 seg_len;
u32 total_len;
u32 r_ack_psn; /* next expected ack PSN */
u32 r_flow_psn; /* IB PSN of next segment start */
u32 r_last_acked; /* IB PSN of last ACK'ed packet */
u32 s_next_psn; /* IB PSN of next segment start for read */
u32 total_segs; /* segments required to complete a request */
u32 cur_seg; /* index of current segment */
u32 comp_seg; /* index of last completed segment */
u32 ack_seg; /* index of last ack'ed segment */
u32 alloc_seg; /* index of next segment to be allocated */
u32 isge; /* index of "current" sge */
u32 ack_pending; /* num acks pending for this request */
enum tid_rdma_req_state state;
};
/*
* When header suppression is used, PSNs associated with a "flow" are
* relevant (and not the PSNs maintained by verbs). Track per-flow
* PSNs here for a TID RDMA segment.
*
*/
struct flow_state {
u32 flags;
u32 resp_ib_psn; /* The IB PSN of the response for this flow */
u32 generation; /* generation of flow */
u32 spsn; /* starting PSN in TID space */
u32 lpsn; /* last PSN in TID space */
u32 r_next_psn; /* next PSN to be received (in TID space) */
/* For tid rdma read */
u32 ib_spsn; /* starting PSN in Verbs space */
u32 ib_lpsn; /* last PSn in Verbs space */
};
struct tid_rdma_pageset {
dma_addr_t addr : 48; /* Only needed for the first page */
u8 idx: 8;
u8 count : 7;
u8 mapped: 1;
};
/**
* kern_tid_node - used for managing TID's in TID groups
*
* @grp_idx: rcd relative index to tid_group
* @map: grp->map captured prior to programming this TID group in HW
* @cnt: Only @cnt of available group entries are actually programmed
*/
struct kern_tid_node {
struct tid_group *grp;
u8 map;
u8 cnt;
};
/* Overall info for a TID RDMA segment */
struct tid_rdma_flow {
/*
* While a TID RDMA segment is being transferred, it uses a QP number
* from the "KDETH section of QP numbers" (which is different from the
* QP number that originated the request). Bits 11-15 of these QP
* numbers identify the "TID flow" for the segment.
*/
struct flow_state flow_state;
struct tid_rdma_request *req;
u32 tid_qpn;
u32 tid_offset;
u32 length;
u32 sent;
u8 tnode_cnt;
u8 tidcnt;
u8 tid_idx;
u8 idx;
u8 npagesets;
u8 npkts;
u8 pkt;
u8 resync_npkts;
struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
u32 tid_entry[TID_RDMA_MAX_PAGES];
};
enum tid_rnr_nak_state {
TID_RNR_NAK_INIT = 0,
TID_RNR_NAK_SEND,
TID_RNR_NAK_SENT,
};
bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
void tid_rdma_conn_error(struct rvt_qp *qp);
void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
struct rvt_sge_state *ss, bool *last);
int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req);
void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
/**
* trdma_clean_swqe - clean flows for swqe if large send queue
* @qp: the qp
* @wqe: the send wqe
*/
static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
{
if (!wqe->priv)
return;
__trdma_clean_swqe(qp, wqe);
}
void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp);
int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
struct ib_qp_init_attr *init_attr);
void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp);
int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd);
struct cntr_entry;
u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data);
u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr, u32 *bth1,
u32 *bth2, u32 *len);
void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u32 *bth0,
u32 *bth1, u32 *bth2, u32 *len, bool *last);
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet);
bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
struct hfi1_pportdata *ppd,
struct hfi1_packet *packet);
void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
u32 *bth2);
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);
void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
struct rvt_swqe *wqe)
{
if (wqe->priv &&
(wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_RDMA_WRITE) &&
wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE)
setup_tid_rdma_wqe(qp, wqe);
}
u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
void hfi1_compute_tid_rdma_flow_wt(void);
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u32 *bth1,
u32 bth2, u32 *len,
struct rvt_sge_state **ss);
void hfi1_del_tid_reap_timer(struct rvt_qp *qp);
void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet);
bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
struct ib_other_headers *ohdr, u16 iflow,
u32 *bth1, u32 *bth2);
void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet);
void hfi1_add_tid_retry_timer(struct rvt_qp *qp);
void hfi1_del_tid_retry_timer(struct rvt_qp *qp);
u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr, u32 *bth1,
u32 *bth2, u16 fidx);
void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet);
struct hfi1_pkt_state;
int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
void _hfi1_do_tid_send(struct work_struct *work);
bool hfi1_schedule_tid_send(struct rvt_qp *qp);
bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
#endif /* HFI1_TID_RDMA_H */

View File

@@ -46,6 +46,7 @@
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "exp_rcv.h"
static u8 __get_ib_hdr_len(struct ib_header *hdr)
{
@@ -128,6 +129,15 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
#define TID_RDMA_KDETH "kdeth0 0x%x kdeth1 0x%x"
#define TID_RDMA_KDETH_DATA "kdeth0 0x%x: kver %u sh %u intr %u tidctrl %u tid %x offset %x kdeth1 0x%x: jkey %x"
#define TID_READ_REQ_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_READ_RSP_PRN "verbs_qp 0x%x"
#define TID_WRITE_REQ_PRN "original_qp 0x%x"
#define TID_WRITE_RSP_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_WRITE_DATA_PRN "verbs_qp 0x%x"
#define TID_ACK_PRN "tid_flow_psn 0x%x verbs_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_RESYNC_PRN "verbs_qp 0x%x"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
@@ -322,6 +332,99 @@ const char *parse_everbs_hdrs(
parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
be32_to_cpu(eh->aeth) & IB_MSN_MASK);
break;
case OP(TID_RDMA, WRITE_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_WRITE_REQ_PRN,
le32_to_cpu(eh->tid_rdma.w_req.kdeth0),
le32_to_cpu(eh->tid_rdma.w_req.kdeth1),
ib_u64_get(&eh->tid_rdma.w_req.reth.vaddr),
be32_to_cpu(eh->tid_rdma.w_req.reth.rkey),
be32_to_cpu(eh->tid_rdma.w_req.reth.length),
be32_to_cpu(eh->tid_rdma.w_req.verbs_qp));
break;
case OP(TID_RDMA, WRITE_RESP):
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
TID_WRITE_RSP_PRN,
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth0),
le32_to_cpu(eh->tid_rdma.w_rsp.kdeth1),
be32_to_cpu(eh->tid_rdma.w_rsp.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.w_rsp.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.w_rsp.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.w_rsp.verbs_qp));
break;
case OP(TID_RDMA, WRITE_DATA_LAST):
case OP(TID_RDMA, WRITE_DATA):
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " TID_WRITE_DATA_PRN,
le32_to_cpu(eh->tid_rdma.w_data.kdeth0),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, KVER),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, SH),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, INTR),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TIDCTRL),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, TID),
KDETH_GET(eh->tid_rdma.w_data.kdeth0, OFFSET),
le32_to_cpu(eh->tid_rdma.w_data.kdeth1),
KDETH_GET(eh->tid_rdma.w_data.kdeth1, JKEY),
be32_to_cpu(eh->tid_rdma.w_data.verbs_qp));
break;
case OP(TID_RDMA, READ_REQ):
trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
TID_READ_REQ_PRN,
le32_to_cpu(eh->tid_rdma.r_req.kdeth0),
le32_to_cpu(eh->tid_rdma.r_req.kdeth1),
ib_u64_get(&eh->tid_rdma.r_req.reth.vaddr),
be32_to_cpu(eh->tid_rdma.r_req.reth.rkey),
be32_to_cpu(eh->tid_rdma.r_req.reth.length),
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.r_req.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.r_req.verbs_qp));
break;
case OP(TID_RDMA, READ_RESP):
trace_seq_printf(p, TID_RDMA_KDETH_DATA " " AETH_PRN " "
TID_READ_RSP_PRN,
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth0),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, KVER),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, SH),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, INTR),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TIDCTRL),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TID),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, OFFSET),
le32_to_cpu(eh->tid_rdma.r_rsp.kdeth1),
KDETH_GET(eh->tid_rdma.r_rsp.kdeth1, JKEY),
be32_to_cpu(eh->tid_rdma.r_rsp.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.r_rsp.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.r_rsp.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.r_rsp.verbs_qp));
break;
case OP(TID_RDMA, ACK):
trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
TID_ACK_PRN,
le32_to_cpu(eh->tid_rdma.ack.kdeth0),
le32_to_cpu(eh->tid_rdma.ack.kdeth1),
be32_to_cpu(eh->tid_rdma.ack.aeth) >> 24,
parse_syndrome(/* aeth */
be32_to_cpu(eh->tid_rdma.ack.aeth)
>> 24),
(be32_to_cpu(eh->tid_rdma.ack.aeth) &
IB_MSN_MASK),
be32_to_cpu(eh->tid_rdma.ack.tid_flow_psn),
be32_to_cpu(eh->tid_rdma.ack.verbs_psn),
be32_to_cpu(eh->tid_rdma.ack.tid_flow_qp),
be32_to_cpu(eh->tid_rdma.ack.verbs_qp));
break;
case OP(TID_RDMA, RESYNC):
trace_seq_printf(p, TID_RDMA_KDETH " " TID_RESYNC_PRN,
le32_to_cpu(eh->tid_rdma.resync.kdeth0),
le32_to_cpu(eh->tid_rdma.resync.kdeth1),
be32_to_cpu(eh->tid_rdma.resync.verbs_qp));
break;
/* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
@@ -394,6 +497,21 @@ const char *print_u32_array(
return ret;
}
u8 hfi1_trace_get_tid_ctrl(u32 ent)
{
return EXP_TID_GET(ent, CTRL);
}
u16 hfi1_trace_get_tid_len(u32 ent)
{
return EXP_TID_GET(ent, LEN);
}
u16 hfi1_trace_get_tid_idx(u32 ent)
{
return EXP_TID_GET(ent, IDX);
}
__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);

View File

@@ -63,3 +63,4 @@ __print_symbolic(etype, \
#include "trace_tx.h"
#include "trace_mmu.h"
#include "trace_iowait.h"
#include "trace_tid.h"

View File

@@ -79,6 +79,14 @@ __print_symbolic(opcode, \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
ib_opcode_name(TID_RDMA_WRITE_REQ), \
ib_opcode_name(TID_RDMA_WRITE_RESP), \
ib_opcode_name(TID_RDMA_WRITE_DATA), \
ib_opcode_name(TID_RDMA_WRITE_DATA_LAST), \
ib_opcode_name(TID_RDMA_READ_REQ), \
ib_opcode_name(TID_RDMA_READ_RESP), \
ib_opcode_name(TID_RDMA_RESYNC), \
ib_opcode_name(TID_RDMA_ACK), \
ib_opcode_name(UC_SEND_FIRST), \
ib_opcode_name(UC_SEND_MIDDLE), \
ib_opcode_name(UC_SEND_LAST), \

View File

@@ -109,6 +109,54 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_ARGS(qp, psn)
);
DEFINE_EVENT(/* event */
hfi1_rc_template, hfi1_rc_completion,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
DECLARE_EVENT_CLASS(/* rc_ack */
hfi1_rc_ack_template,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
struct rvt_swqe *wqe),
TP_ARGS(qp, aeth, psn, wqe),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
__field(u32, qpn)
__field(u32, aeth)
__field(u32, psn)
__field(u8, opcode)
__field(u32, spsn)
__field(u32, lpsn)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->qpn = qp->ibqp.qp_num;
__entry->aeth = aeth;
__entry->psn = psn;
__entry->opcode = wqe->wr.opcode;
__entry->spsn = wqe->psn;
__entry->lpsn = wqe->lpsn;
),
TP_printk(/* print */
"[%s] qpn 0x%x aeth 0x%x psn 0x%x opcode 0x%x spsn 0x%x lpsn 0x%x",
__get_str(dev),
__entry->qpn,
__entry->aeth,
__entry->psn,
__entry->opcode,
__entry->spsn,
__entry->lpsn
)
);
DEFINE_EVENT(/* do_rc_ack */
hfi1_rc_ack_template, hfi1_rc_ack_do,
TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
struct rvt_swqe *wqe),
TP_ARGS(qp, aeth, psn, wqe)
);
#endif /* __HFI1_TRACE_RC_H */
#undef TRACE_INCLUDE_PATH

View File

@@ -1,5 +1,5 @@
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -128,111 +128,6 @@ TRACE_EVENT(hfi1_receive_interrupt,
)
);
DECLARE_EVENT_CLASS(
hfi1_exp_tid_reg_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
u32 npages, unsigned long va, unsigned long pa,
dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
TP_STRUCT__entry(
__field(unsigned int, ctxt)
__field(u16, subctxt)
__field(u32, rarr)
__field(u32, npages)
__field(unsigned long, va)
__field(unsigned long, pa)
__field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->rarr = rarr;
__entry->npages = npages;
__entry->va = va;
__entry->pa = pa;
__entry->dma = dma;
),
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
__entry->ctxt,
__entry->subctxt,
__entry->rarr,
__entry->npages,
__entry->pa,
__entry->va,
__entry->dma
)
);
DEFINE_EVENT(
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
DEFINE_EVENT(
hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
unsigned long va, unsigned long pa, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
TRACE_EVENT(
hfi1_put_tid,
TP_PROTO(struct hfi1_devdata *dd,
u32 index, u32 type, unsigned long pa, u16 order),
TP_ARGS(dd, index, type, pa, order),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
__field(unsigned long, pa);
__field(u32, index);
__field(u32, type);
__field(u16, order);
),
TP_fast_assign(
DD_DEV_ASSIGN(dd);
__entry->pa = pa;
__entry->index = index;
__entry->type = type;
__entry->order = order;
),
TP_printk("[%s] type %s pa %lx index %u order %u",
__get_str(dev),
show_tidtype(__entry->type),
__entry->pa,
__entry->index,
__entry->order
)
);
TRACE_EVENT(hfi1_exp_tid_inval,
TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
u32 npages, dma_addr_t dma),
TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
TP_STRUCT__entry(
__field(unsigned int, ctxt)
__field(u16, subctxt)
__field(unsigned long, va)
__field(u32, rarr)
__field(u32, npages)
__field(dma_addr_t, dma)
),
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
__entry->va = va;
__entry->rarr = rarr;
__entry->npages = npages;
__entry->dma = dma;
),
TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
__entry->ctxt,
__entry->subctxt,
__entry->rarr,
__entry->npages,
__entry->va,
__entry->dma
)
);
TRACE_EVENT(hfi1_mmu_invalidate,
TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type,
unsigned long start, unsigned long end),

File diff suppressed because it is too large Load Diff

View File

@@ -114,19 +114,27 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
__field(u32, qpn)
__field(u32, flags)
__field(u32, s_flags)
__field(u32, ps_flags)
__field(unsigned long, iow_flags)
),
TP_fast_assign(
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
__entry->flags = flags;
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->ps_flags =
((struct hfi1_qp_priv *)qp->priv)->s_flags;
__entry->iow_flags =
((struct hfi1_qp_priv *)qp->priv)->s_iowait.flags;
),
TP_printk(
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
"[%s] qpn 0x%x flags 0x%x s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx",
__get_str(dev),
__entry->qpn,
__entry->flags,
__entry->s_flags
__entry->s_flags,
__entry->ps_flags,
__entry->iow_flags
)
);
@@ -838,6 +846,12 @@ DEFINE_EVENT(
TP_ARGS(qp, flag)
);
DEFINE_EVENT(/* event */
hfi1_do_send_template, hfi1_rc_do_tid_send,
TP_PROTO(struct rvt_qp *qp, bool flag),
TP_ARGS(qp, flag)
);
DEFINE_EVENT(
hfi1_do_send_template, hfi1_rc_expired_time_slice,
TP_PROTO(struct rvt_qp *qp, bool flag),

View File

@@ -271,7 +271,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ps->s_txreq->ss = &qp->s_sge;
ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_psn++), middle, ps);
qp->remote_qpn, mask_psn(qp->s_psn++),
middle, ps);
return 1;
done_free_tx:

View File

@@ -222,31 +222,11 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
ssge.num_sge = swqe->wr.num_sge;
sge = &ssge.sge;
while (length) {
u32 len = sge->length;
u32 len = rvt_get_sge_length(sge, length);
if (len > length)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (--ssge.num_sge)
*sge = *ssge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
rvt_update_sge(&ssge, len, false);
length -= len;
}
rvt_put_ss(&qp->r_sge);

View File

@@ -48,7 +48,6 @@
*/
#include "hfi.h"
#include "exp_rcv.h"
struct tid_pageset {

View File

@@ -91,9 +91,7 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
/* Convert to number of pages */
size = DIV_ROUND_UP(size, PAGE_SIZE);
down_read(&mm->mmap_sem);
pinned = mm->pinned_vm;
up_read(&mm->mmap_sem);
pinned = atomic64_read(&mm->pinned_vm);
/* First, check the absolute limit against all pinned pages. */
if (pinned + npages >= ulimit && !can_lock)
@@ -111,9 +109,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
if (ret < 0)
return ret;
down_write(&mm->mmap_sem);
mm->pinned_vm += ret;
up_write(&mm->mmap_sem);
atomic64_add(ret, &mm->pinned_vm);
return ret;
}
@@ -130,8 +126,6 @@ void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
}
if (mm) { /* during close after signal, mm can be NULL */
down_write(&mm->mmap_sem);
mm->pinned_vm -= npages;
up_write(&mm->mmap_sem);
atomic64_sub(npages, &mm->pinned_vm);
}
}

View File

@@ -144,8 +144,10 @@ static int defer_packet_queue(
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
write_seqlock(&sde->waitlock);
if (list_empty(&pq->busy.list))
if (list_empty(&pq->busy.list)) {
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
@@ -191,7 +193,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL);
activate_packet_queue, NULL, NULL);
pq->reqidx = 0;
pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
@@ -1126,7 +1128,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
0xffffffull),
psn = val & mask;
if (expct)
psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK);
psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
else
psn = psn + frags;
return psn & mask;

View File

@@ -161,10 +161,12 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the
*/
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
@@ -200,6 +202,14 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36,
[IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
@@ -243,6 +253,17 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
/* TID RDMA has separate handlers for different opcodes.*/
[IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req,
[IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp,
[IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data,
[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data,
[IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req,
[IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp,
[IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync,
[IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
[IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
@@ -308,7 +329,7 @@ static inline opcode_handler qp_ok(struct hfi1_packet *packet)
static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
{
#ifdef CONFIG_FAULT_INJECTION
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) {
/*
* In order to drop non-IB traffic we
* set PbcInsertHrc to NONE (0x2).
@@ -319,8 +340,9 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
* packet will not be delivered to the
* correct context.
*/
pbc &= ~PBC_INSERT_HCRC_SMASK;
pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
else
} else {
/*
* In order to drop regular verbs
* traffic we set the PbcTestEbp
@@ -330,10 +352,129 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
* triggered and will be dropped.
*/
pbc |= PBC_TEST_EBP;
}
#endif
return pbc;
}
static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet)
{
if (packet->qp->ibqp.qp_type != IB_QPT_RC ||
!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
return NULL;
if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA)
return opcode_handler_tbl[opcode];
return NULL;
}
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler opcode_handler;
unsigned long flags;
u32 qp_num;
int lnh;
u8 opcode;
/* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
if (unlikely(tlen < 15 * sizeof(u32)))
goto drop;
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh != HFI1_LRH_BTH)
goto drop;
packet->ohdr = &hdr->u.oth;
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* verbs_qp can be picked up from any tid_rdma header struct */
qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) &
RVT_QPN_MASK;
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp)
goto drop_rcu;
spin_lock_irqsave(&packet->qp->r_lock, flags);
opcode_handler = tid_qp_ok(opcode, packet);
if (likely(opcode_handler))
opcode_handler(packet);
else
goto drop_unlock;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
rcu_read_unlock();
return;
drop_unlock:
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
drop_rcu:
rcu_read_unlock();
drop:
ibp->rvp.n_pkt_drops++;
}
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler opcode_handler;
unsigned long flags;
u32 qp_num;
int lnh;
u8 opcode;
/* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
if (unlikely(tlen < 15 * sizeof(u32)))
goto drop;
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh != HFI1_LRH_BTH)
goto drop;
packet->ohdr = &hdr->u.oth;
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
/* verbs_qp can be picked up from any tid_rdma header struct */
qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) &
RVT_QPN_MASK;
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp)
goto drop_rcu;
spin_lock_irqsave(&packet->qp->r_lock, flags);
opcode_handler = tid_qp_ok(opcode, packet);
if (likely(opcode_handler))
opcode_handler(packet);
else
goto drop_unlock;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
rcu_read_unlock();
return;
drop_unlock:
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
drop_rcu:
rcu_read_unlock();
drop:
ibp->rvp.n_pkt_drops++;
}
static int hfi1_do_pkey_check(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
@@ -504,11 +645,28 @@ static void verbs_sdma_complete(
hfi1_put_txreq(tx);
}
void hfi1_wait_kmem(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct ib_qp *ibqp = &qp->ibqp;
struct ib_device *ibdev = ibqp->device;
struct hfi1_ibdev *dev = to_idev(ibdev);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
}
static int wait_kmem(struct hfi1_ibdev *dev,
struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
int ret = 0;
@@ -517,15 +675,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
&ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
hfi1_wait_kmem(qp);
write_sequnlock(&dev->iowait_lock);
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
@@ -553,11 +703,7 @@ static noinline int build_verbs_ulp_payload(
int ret = 0;
while (length) {
len = ss->sge.length;
if (len > length)
len = length;
if (len > ss->sge.sge_length)
len = ss->sge.sge_length;
len = rvt_get_sge_length(&ss->sge, length);
WARN_ON_ONCE(len == 0);
ret = sdma_txadd_kvaddr(
sde->dd,
@@ -678,6 +824,15 @@ bail_txadd:
return ret;
}
static u64 update_hcrc(u8 opcode, u64 pbc)
{
if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) {
pbc &= ~PBC_INSERT_HCRC_SMASK;
pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT;
}
return pbc;
}
int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
@@ -723,6 +878,9 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
qp->srate_mbps,
vl,
plen);
/* Update HCRC based on packet opcode */
pbc = update_hcrc(ps->opcode, pbc);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
@@ -787,6 +945,7 @@ static int pio_wait(struct rvt_qp *qp,
dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
iowait_get_priority(&priv->s_iowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
priv->s_iowait.lock = &sc->waitlock;
@@ -871,6 +1030,9 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
/* Update HCRC based on packet opcode */
pbc = update_hcrc(ps->opcode, pbc);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
@@ -914,12 +1076,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (ss) {
while (len) {
void *addr = ss->sge.vaddr;
u32 slen = ss->sge.length;
u32 slen = rvt_get_sge_length(&ss->sge, len);
if (slen > len)
slen = len;
if (slen > ss->sge.sge_length)
slen = ss->sge.sge_length;
rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
@@ -1188,7 +1346,9 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
rdi->dparms.props.max_mr_size = U64_MAX;
rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps;
rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
rdi->dparms.props.max_qp_wr =
(hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ?
HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs);
rdi->dparms.props.max_send_sge = hfi1_max_sges;
rdi->dparms.props.max_recv_sge = hfi1_max_sges;
rdi->dparms.props.max_sge_rd = hfi1_max_sges;
@@ -1622,6 +1782,7 @@ static const struct ib_device_ops hfi1_dev_ops = {
.alloc_rdma_netdev = hfi1_vnic_alloc_rn,
.get_dev_fw_str = hfi1_get_dev_fw_str,
.get_hw_stats = get_hw_stats,
.init_port = hfi1_create_port_files,
.modify_device = modify_device,
/* keep process mad in the driver */
.process_mad = hfi1_process_mad,
@@ -1679,7 +1840,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
/*
* Fill in rvt info object.
*/
dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
@@ -1743,6 +1903,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
dd->verbs_dev.rdi.dparms.reserved_operations = 1;
dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;

View File

@@ -72,6 +72,7 @@ struct hfi1_packet;
#include "iowait.h"
#include "tid_rdma.h"
#include "opfn.h"
#define HFI1_MAX_RDMA_ATOMIC 16
@@ -158,10 +159,68 @@ struct hfi1_qp_priv {
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
struct hfi1_ctxtdata *rcd; /* QP's receive context */
struct page **pages; /* for TID page scan */
u32 tid_enqueue; /* saved when tid waited */
u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait;
struct timer_list s_tid_timer; /* for timing tid wait */
struct timer_list s_tid_retry_timer; /* for timing tid ack */
struct list_head tid_wait; /* for queueing tid space */
struct hfi1_opfn_data opfn;
struct tid_flow_state flow_state;
struct tid_rdma_qp_params tid_rdma;
struct rvt_qp *owner;
u8 hdr_type; /* 9B or 16B */
struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */
atomic_t n_requests; /* # of TID RDMA requests in the */
/* queue */
atomic_t n_tid_requests; /* # of sent TID RDMA requests */
unsigned long tid_timer_timeout_jiffies;
unsigned long tid_retry_timeout_jiffies;
/* variables for the TID RDMA SE state machine */
u8 s_state;
u8 s_retry;
u8 rnr_nak_state; /* RNR NAK state */
u8 s_nak_state;
u32 s_nak_psn;
u32 s_flags;
u32 s_tid_cur;
u32 s_tid_head;
u32 s_tid_tail;
u32 r_tid_head; /* Most recently added TID RDMA request */
u32 r_tid_tail; /* the last completed TID RDMA request */
u32 r_tid_ack; /* the TID RDMA request to be ACK'ed */
u32 r_tid_alloc; /* Request for which we are allocating resources */
u32 pending_tid_w_segs; /* Num of pending tid write segments */
u32 pending_tid_w_resp; /* Num of pending tid write responses */
u32 alloc_w_segs; /* Number of segments for which write */
/* resources have been allocated for this QP */
/* For TID RDMA READ */
u32 tid_r_reqs; /* Num of tid reads requested */
u32 tid_r_comp; /* Num of tid reads completed */
u32 pending_tid_r_segs; /* Num of pending tid read segments */
u16 pkts_ps; /* packets per segment */
u8 timeout_shift; /* account for number of packets per segment */
u32 r_next_psn_kdeth;
u32 r_next_psn_kdeth_save;
u32 s_resync_psn;
u8 sync_pt; /* Set when QP reaches sync point */
u8 resync;
};
#define HFI1_QP_WQE_INVALID ((u32)-1)
struct hfi1_swqe_priv {
struct tid_rdma_request tid_req;
struct rvt_sge_state ss; /* Used for TID RDMA READ Request */
};
struct hfi1_ack_priv {
struct rvt_sge_state ss; /* used for TID WRITE RESP */
struct tid_rdma_request tid_req;
};
/*
@@ -225,6 +284,7 @@ struct hfi1_ibdev {
struct kmem_cache *verbs_txreq_cache;
u64 n_txwait;
u64 n_kmem_wait;
u64 n_tidwait;
/* protect iowait lists */
seqlock_t iowait_lock ____cacheline_aligned_in_smp;
@@ -312,6 +372,31 @@ static inline u32 delta_psn(u32 a, u32 b)
return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
}
static inline struct tid_rdma_request *wqe_to_tid_req(struct rvt_swqe *wqe)
{
return &((struct hfi1_swqe_priv *)wqe->priv)->tid_req;
}
static inline struct tid_rdma_request *ack_to_tid_req(struct rvt_ack_entry *e)
{
return &((struct hfi1_ack_priv *)e->priv)->tid_req;
}
/*
* Look through all the active flows for a TID RDMA request and find
* the one (if it exists) that contains the specified PSN.
*/
static inline u32 __full_flow_psn(struct flow_state *state, u32 psn)
{
return mask_psn((state->generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
(psn & HFI1_KDETH_BTH_SEQ_MASK));
}
static inline u32 full_flow_psn(struct tid_rdma_flow *flow, u32 psn)
{
return __full_flow_psn(&flow->flow_state, psn);
}
struct verbs_txreq;
void hfi1_put_txreq(struct verbs_txreq *tx);
@@ -356,9 +441,12 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
const struct ib_global_route *grh, u32 hwords, u32 nwords);
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
bool tid);
void _hfi1_do_send(struct work_struct *work);
void hfi1_do_send_from_rvt(struct rvt_qp *qp);
@@ -377,6 +465,10 @@ int hfi1_register_ib_device(struct hfi1_devdata *);
void hfi1_unregister_ib_device(struct hfi1_devdata *);
void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet);
void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet);
void hfi1_ib_rcv(struct hfi1_packet *packet);
void hfi1_16B_rcv(struct hfi1_packet *packet);
@@ -394,6 +486,16 @@ static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
}
void hfi1_wait_kmem(struct rvt_qp *qp);
static inline void hfi1_trdma_send_complete(struct rvt_qp *qp,
struct rvt_swqe *wqe,
enum ib_wc_status status)
{
trdma_clean_swqe(qp, wqe);
rvt_send_complete(qp, wqe, status);
}
extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
extern const u8 hdr_len_by_opcode[];

View File

@@ -94,6 +94,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
tx->txreq.num_desc = 0;
/* Set the header type */
tx->phdr.hdr.hdr_type = priv->hdr_type;
tx->txreq.flags = 0;
return tx;
}

View File

@@ -240,8 +240,10 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
}
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
if (list_empty(&vnic_sdma->wait.list))
if (list_empty(&vnic_sdma->wait.list)) {
iowait_get_priority(wait->iow);
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
}
write_sequnlock(&sde->waitlock);
return -EBUSY;
}
@@ -281,7 +283,7 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
hfi1_vnic_sdma_sleep,
hfi1_vnic_sdma_wakeup, NULL);
hfi1_vnic_sdma_wakeup, NULL, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd;
vnic_sdma->vinfo = vinfo;

View File

@@ -1,7 +1,6 @@
config INFINIBAND_HNS
tristate "HNS RoCE Driver"
depends on NET_VENDOR_HISILICON
depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on ARM64 || (COMPILE_TEST && 64BIT)
---help---
This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine

View File

@@ -2,7 +2,7 @@
# Makefile for the Hisilicon RoCE drivers.
#
ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \

View File

@@ -176,17 +176,33 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
unsigned long in_modifier, u8 op_modifier, u16 op,
unsigned long timeout)
{
if (hr_dev->is_reset)
return 0;
int ret;
if (hr_dev->hw->rst_prc_mbox) {
ret = hr_dev->hw->rst_prc_mbox(hr_dev);
if (ret == CMD_RST_PRC_SUCCESS)
return 0;
else if (ret == CMD_RST_PRC_EBUSY)
return -EBUSY;
}
if (hr_dev->cmd.use_events)
return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
in_modifier, op_modifier, op,
timeout);
ret = hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
in_modifier, op_modifier, op,
timeout);
else
return hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param,
in_modifier, op_modifier, op,
timeout);
ret = hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param,
in_modifier, op_modifier, op,
timeout);
if (ret == CMD_RST_PRC_EBUSY)
return -EBUSY;
if (ret && (hr_dev->hw->rst_prc_mbox &&
hr_dev->hw->rst_prc_mbox(hr_dev) == CMD_RST_PRC_SUCCESS))
return 0;
return ret;
}
EXPORT_SYMBOL_GPL(hns_roce_cmd_mbox);

View File

@@ -75,6 +75,10 @@ enum {
HNS_ROCE_CMD_DESTROY_MPT_BT1 = 0x29,
HNS_ROCE_CMD_DESTROY_MPT_BT2 = 0x2a,
/* CQC TIMER commands */
HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 = 0x23,
HNS_ROCE_CMD_READ_CQC_TIMER_BT0 = 0x27,
/* MPT commands */
HNS_ROCE_CMD_QUERY_MPT = 0x62,
@@ -89,6 +93,10 @@ enum {
HNS_ROCE_CMD_DESTROY_SRQC_BT1 = 0x39,
HNS_ROCE_CMD_DESTROY_SRQC_BT2 = 0x3a,
/* QPC TIMER commands */
HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 = 0x33,
HNS_ROCE_CMD_READ_QPC_TIMER_BT0 = 0x37,
/* EQC commands */
HNS_ROCE_CMD_CREATE_AEQC = 0x80,
HNS_ROCE_CMD_MODIFY_AEQC = 0x81,
@@ -98,6 +106,10 @@ enum {
HNS_ROCE_CMD_MODIFY_CEQC = 0x91,
HNS_ROCE_CMD_QUERY_CEQC = 0x92,
HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
/* SCC CTX BT commands */
HNS_ROCE_CMD_READ_SCCC_BT0 = 0xa4,
HNS_ROCE_CMD_WRITE_SCCC_BT0 = 0xa5,
};
enum {

View File

@@ -215,7 +215,7 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
EXPORT_SYMBOL_GPL(hns_roce_free_cq);
static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
struct ib_ucontext *context,
struct ib_udata *udata,
struct hns_roce_cq_buf *buf,
struct ib_umem **umem, u64 buf_addr, int cqe)
{
@@ -223,7 +223,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
u32 page_shift;
u32 npages;
*umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
*umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -347,7 +347,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
}
/* Get user space address, write it into mtt table */
ret = hns_roce_ib_get_cq_umem(hr_dev, context, &hr_cq->hr_buf,
ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
&hr_cq->umem, ucmd.buf_addr,
cq_entries);
if (ret) {
@@ -358,7 +358,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
(udata->outlen >= sizeof(resp))) {
ret = hns_roce_db_map_user(to_hr_ucontext(context),
ucmd.db_addr, &hr_cq->db);
udata, ucmd.db_addr,
&hr_cq->db);
if (ret) {
dev_err(dev, "cq record doorbell map failed!\n");
goto err_mtt;

View File

@@ -8,7 +8,8 @@
#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
int hns_roce_db_map_user(struct hns_roce_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct hns_roce_db *db)
{
struct hns_roce_user_db_page *page;
@@ -28,8 +29,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
refcount_set(&page->refcount, 1);
page->user_virt = (virt & PAGE_MASK);
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
PAGE_SIZE, 0, 0);
page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
if (IS_ERR(page->umem)) {
ret = PTR_ERR(page->umem);
kfree(page);

View File

@@ -202,6 +202,7 @@ enum {
HNS_ROCE_CAP_FLAG_SRQ = BIT(5),
HNS_ROCE_CAP_FLAG_MW = BIT(7),
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
};
@@ -216,6 +217,32 @@ enum {
HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4
};
enum hns_roce_reset_stage {
HNS_ROCE_STATE_NON_RST,
HNS_ROCE_STATE_RST_BEF_DOWN,
HNS_ROCE_STATE_RST_DOWN,
HNS_ROCE_STATE_RST_UNINIT,
HNS_ROCE_STATE_RST_INIT,
HNS_ROCE_STATE_RST_INITED,
};
enum hns_roce_instance_state {
HNS_ROCE_STATE_NON_INIT,
HNS_ROCE_STATE_INIT,
HNS_ROCE_STATE_INITED,
HNS_ROCE_STATE_UNINIT,
};
enum {
HNS_ROCE_RST_DIRECT_RETURN = 0,
};
enum {
CMD_RST_PRC_OTHERS,
CMD_RST_PRC_SUCCESS,
CMD_RST_PRC_EBUSY,
};
#define HNS_ROCE_CMD_SUCCESS 1
#define HNS_ROCE_PORT_DOWN 0
@@ -482,6 +509,8 @@ struct hns_roce_qp_table {
struct hns_roce_hem_table qp_table;
struct hns_roce_hem_table irrl_table;
struct hns_roce_hem_table trrl_table;
struct hns_roce_hem_table sccc_table;
struct mutex scc_mutex;
};
struct hns_roce_cq_table {
@@ -729,6 +758,8 @@ struct hns_roce_caps {
u32 max_extend_sg;
int num_qps; /* 256k */
int reserved_qps;
int num_qpc_timer;
int num_cqc_timer;
u32 max_srq_sg;
int num_srqs;
u32 max_wqes; /* 16k */
@@ -768,6 +799,9 @@ struct hns_roce_caps {
int irrl_entry_sz;
int trrl_entry_sz;
int cqc_entry_sz;
int sccc_entry_sz;
int qpc_timer_entry_sz;
int cqc_timer_entry_sz;
int srqc_entry_sz;
int idx_entry_sz;
u32 pbl_ba_pg_sz;
@@ -777,9 +811,12 @@ struct hns_roce_caps {
int ceqe_depth;
enum ib_mtu max_mtu;
u32 qpc_bt_num;
u32 qpc_timer_bt_num;
u32 srqc_bt_num;
u32 cqc_bt_num;
u32 cqc_timer_bt_num;
u32 mpt_bt_num;
u32 sccc_bt_num;
u32 qpc_ba_pg_sz;
u32 qpc_buf_pg_sz;
u32 qpc_hop_num;
@@ -795,6 +832,15 @@ struct hns_roce_caps {
u32 mtt_ba_pg_sz;
u32 mtt_buf_pg_sz;
u32 mtt_hop_num;
u32 sccc_ba_pg_sz;
u32 sccc_buf_pg_sz;
u32 sccc_hop_num;
u32 qpc_timer_ba_pg_sz;
u32 qpc_timer_buf_pg_sz;
u32 qpc_timer_hop_num;
u32 cqc_timer_ba_pg_sz;
u32 cqc_timer_buf_pg_sz;
u32 cqc_timer_hop_num;
u32 cqe_ba_pg_sz;
u32 cqe_buf_pg_sz;
u32 cqe_hop_num;
@@ -834,6 +880,7 @@ struct hns_roce_hw {
u64 out_param, u32 in_modifier, u8 op_modifier, u16 op,
u16 token, int event);
int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout);
int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev);
int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
const union ib_gid *gid, const struct ib_gid_attr *attr);
int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
@@ -861,6 +908,8 @@ struct hns_roce_hw {
int attr_mask, enum ib_qp_state cur_state,
enum ib_qp_state new_state);
int (*destroy_qp)(struct ib_qp *ibqp);
int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp);
int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
@@ -898,6 +947,8 @@ struct hns_roce_dev {
spinlock_t bt_cmd_lock;
bool active;
bool is_reset;
bool dis_db;
unsigned long reset_cnt;
struct hns_roce_ib_iboe iboe;
struct list_head pgdir_list;
@@ -922,6 +973,8 @@ struct hns_roce_dev {
struct hns_roce_srq_table srq_table;
struct hns_roce_qp_table qp_table;
struct hns_roce_eq_table eq_table;
struct hns_roce_hem_table qpc_timer_table;
struct hns_roce_hem_table cqc_timer_table;
int cmd_mod;
int loop_idc;
@@ -1061,10 +1114,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *pd,
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
struct ib_ucontext *context,
struct ib_udata *udata);
int hns_roce_dealloc_pd(struct ib_pd *pd);
int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
struct ib_udata *udata);
void hns_roce_dealloc_pd(struct ib_pd *pd);
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
@@ -1133,7 +1185,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq);
void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
int hns_roce_db_map_user(struct hns_roce_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct hns_roce_db *db);
void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
struct hns_roce_db *db);

View File

@@ -45,6 +45,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
(hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) ||
(hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
(hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
(hr_dev->caps.sccc_hop_num && type == HEM_TYPE_SCCC) ||
(hr_dev->caps.qpc_timer_hop_num && type == HEM_TYPE_QPC_TIMER) ||
(hr_dev->caps.cqc_timer_hop_num && type == HEM_TYPE_CQC_TIMER) ||
(hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
(hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
(hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
@@ -125,6 +128,30 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
mhop->ba_l0_num = hr_dev->caps.cqc_bt_num;
mhop->hop_num = hr_dev->caps.cqc_hop_num;
break;
case HEM_TYPE_SCCC:
mhop->buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
+ PAGE_SHIFT);
mhop->bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
+ PAGE_SHIFT);
mhop->ba_l0_num = hr_dev->caps.sccc_bt_num;
mhop->hop_num = hr_dev->caps.sccc_hop_num;
break;
case HEM_TYPE_QPC_TIMER:
mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
+ PAGE_SHIFT);
mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
+ PAGE_SHIFT);
mhop->ba_l0_num = hr_dev->caps.qpc_timer_bt_num;
mhop->hop_num = hr_dev->caps.qpc_timer_hop_num;
break;
case HEM_TYPE_CQC_TIMER:
mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
+ PAGE_SHIFT);
mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
+ PAGE_SHIFT);
mhop->ba_l0_num = hr_dev->caps.cqc_timer_bt_num;
mhop->hop_num = hr_dev->caps.cqc_timer_hop_num;
break;
case HEM_TYPE_SRQC:
mhop->buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
+ PAGE_SHIFT);
@@ -175,7 +202,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
return 0;
/*
* QPC/MTPT/CQC/SRQC alloc hem for buffer pages.
* QPC/MTPT/CQC/SRQC/SCCC alloc hem for buffer pages.
* MTT/CQE alloc hem for bt pages.
*/
bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
@@ -486,7 +513,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
}
/*
* alloc buffer space chunk for QPC/MTPT/CQC/SRQC.
* alloc buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
* alloc bt space chunk for MTT/CQE.
*/
size = table->type < HEM_TYPE_MTT ? buf_chunk_size : bt_chunk_size;
@@ -593,6 +620,7 @@ out:
mutex_unlock(&table->mutex);
return ret;
}
EXPORT_SYMBOL_GPL(hns_roce_table_get);
static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
@@ -658,7 +686,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
}
/*
* free buffer space chunk for QPC/MTPT/CQC/SRQC.
* free buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
* free bt space chunk for MTT/CQE.
*/
hns_roce_free_hem(hr_dev, table->hem[hem_idx]);
@@ -735,6 +763,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
mutex_unlock(&table->mutex);
}
EXPORT_SYMBOL_GPL(hns_roce_table_put);
void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
@@ -904,6 +933,30 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
num_bt_l0 = hr_dev->caps.cqc_bt_num;
hop_num = hr_dev->caps.cqc_hop_num;
break;
case HEM_TYPE_SCCC:
buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
+ PAGE_SHIFT);
bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
+ PAGE_SHIFT);
num_bt_l0 = hr_dev->caps.sccc_bt_num;
hop_num = hr_dev->caps.sccc_hop_num;
break;
case HEM_TYPE_QPC_TIMER:
buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
+ PAGE_SHIFT);
bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
+ PAGE_SHIFT);
num_bt_l0 = hr_dev->caps.qpc_timer_bt_num;
hop_num = hr_dev->caps.qpc_timer_hop_num;
break;
case HEM_TYPE_CQC_TIMER:
buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
+ PAGE_SHIFT);
bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
+ PAGE_SHIFT);
num_bt_l0 = hr_dev->caps.cqc_timer_bt_num;
hop_num = hr_dev->caps.cqc_timer_hop_num;
break;
case HEM_TYPE_SRQC:
buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
+ PAGE_SHIFT);
@@ -1081,6 +1134,15 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->srq_table.table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
if (hr_dev->caps.qpc_timer_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qpc_timer_table);
if (hr_dev->caps.cqc_timer_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->cqc_timer_table);
if (hr_dev->caps.sccc_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.trrl_table);

View File

@@ -44,6 +44,9 @@ enum {
HEM_TYPE_MTPT,
HEM_TYPE_CQC,
HEM_TYPE_SRQC,
HEM_TYPE_SCCC,
HEM_TYPE_QPC_TIMER,
HEM_TYPE_CQC_TIMER,
/* UNMAP HEM */
HEM_TYPE_MTT,

View File

@@ -711,13 +711,14 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
struct ib_qp_attr attr = { 0 };
struct hns_roce_v1_priv *priv;
struct hns_roce_qp *hr_qp;
struct ib_device *ibdev;
struct ib_cq *cq;
struct ib_pd *pd;
union ib_gid dgid;
u64 subnet_prefix;
int attr_mask = 0;
int ret = -ENOMEM;
int i, j;
int ret;
u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 };
u8 phy_port;
u8 port = 0;
@@ -742,12 +743,16 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
free_mr->mr_free_cq->ib_cq.cq_context = NULL;
atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
if (IS_ERR(pd)) {
dev_err(dev, "Create pd for reserved loop qp failed!");
ret = -ENOMEM;
ibdev = &hr_dev->ib_dev;
pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
if (!pd)
goto alloc_mem_failed;
pd->device = ibdev;
ret = hns_roce_alloc_pd(pd, NULL, NULL);
if (ret)
goto alloc_pd_failed;
}
free_mr->mr_free_pd = to_hr_pd(pd);
free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev;
free_mr->mr_free_pd->ibpd.uobject = NULL;
@@ -854,10 +859,12 @@ create_lp_qp_failed:
dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
}
if (hns_roce_dealloc_pd(pd))
dev_err(dev, "Destroy pd for create_lp_qp failed!\n");
hns_roce_dealloc_pd(pd);
alloc_pd_failed:
kfree(pd);
alloc_mem_failed:
if (hns_roce_ib_destroy_cq(cq))
dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
@@ -891,9 +898,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
if (ret)
dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret);
ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd);
if (ret)
dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret);
hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd);
}
static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
@@ -1866,9 +1871,8 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
unsigned long mtpt_idx)
{
struct hns_roce_v1_mpt_entry *mpt_entry;
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
u64 *pages;
int entry;
int i;
/* MPT filled into mailbox buf */
@@ -1923,8 +1927,8 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
return -ENOMEM;
i = 0;
for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
pages[i] = ((u64)sg_dma_address(sg)) >> 12;
for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
pages[i] = ((u64)sg_page_iter_dma_address(&sg_iter)) >> 12;
/* Directly record to MTPT table firstly 7 entry */
if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM)
@@ -5002,7 +5006,7 @@ static int hns_roce_probe(struct platform_device *pdev)
struct hns_roce_dev *hr_dev;
struct device *dev = &pdev->dev;
hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev));
hr_dev = ib_alloc_device(hns_roce_dev, ib_dev);
if (!hr_dev)
return -ENOMEM;

View File

@@ -587,7 +587,7 @@ out:
roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
V2_DB_PARAMETER_SL_S, qp->sl);
hns_roce_write64_k((__le32 *)&sq_db, qp->sq.db_reg_l);
hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l);
qp->sq_next_wqe = ind;
qp->next_sge = sge_ind;
@@ -712,6 +712,113 @@ out:
return ret;
}
static int hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev,
unsigned long instance_stage,
unsigned long reset_stage)
{
/* When hardware reset has been completed once or more, we should stop
* sending mailbox&cmq&doorbell to hardware. If now in .init_instance()
* function, we should exit with error. If now at HNAE3_INIT_CLIENT
* stage of soft reset process, we should exit with error, and then
* HNAE3_INIT_CLIENT related process can rollback the operation like
* notifing hardware to free resources, HNAE3_INIT_CLIENT related
* process will exit with error to notify NIC driver to reschedule soft
* reset process once again.
*/
hr_dev->is_reset = true;
hr_dev->dis_db = true;
if (reset_stage == HNS_ROCE_STATE_RST_INIT ||
instance_stage == HNS_ROCE_STATE_INIT)
return CMD_RST_PRC_EBUSY;
return CMD_RST_PRC_SUCCESS;
}
static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
unsigned long instance_stage,
unsigned long reset_stage)
{
struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
/* When hardware reset is detected, we should stop sending mailbox&cmq&
* doorbell to hardware. If now in .init_instance() function, we should
* exit with error. If now at HNAE3_INIT_CLIENT stage of soft reset
* process, we should exit with error, and then HNAE3_INIT_CLIENT
* related process can rollback the operation like notifing hardware to
* free resources, HNAE3_INIT_CLIENT related process will exit with
* error to notify NIC driver to reschedule soft reset process once
* again.
*/
hr_dev->dis_db = true;
if (!ops->get_hw_reset_stat(handle))
hr_dev->is_reset = true;
if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT ||
instance_stage == HNS_ROCE_STATE_INIT)
return CMD_RST_PRC_EBUSY;
return CMD_RST_PRC_SUCCESS;
}
static int hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
/* When software reset is detected at .init_instance() function, we
* should stop sending mailbox&cmq&doorbell to hardware, and exit
* with error.
*/
hr_dev->dis_db = true;
if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt)
hr_dev->is_reset = true;
return CMD_RST_PRC_EBUSY;
}
static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
unsigned long instance_stage; /* the current instance stage */
unsigned long reset_stage; /* the current reset stage */
unsigned long reset_cnt;
bool sw_resetting;
bool hw_resetting;
if (hr_dev->is_reset)
return CMD_RST_PRC_SUCCESS;
/* Get information about reset from NIC driver or RoCE driver itself,
* the meaning of the following variables from NIC driver are described
* as below:
* reset_cnt -- The count value of completed hardware reset.
* hw_resetting -- Whether hardware device is resetting now.
* sw_resetting -- Whether NIC's software reset process is running now.
*/
instance_stage = handle->rinfo.instance_state;
reset_stage = handle->rinfo.reset_state;
reset_cnt = ops->ae_dev_reset_cnt(handle);
hw_resetting = ops->get_hw_reset_stat(handle);
sw_resetting = ops->ae_dev_resetting(handle);
if (reset_cnt != hr_dev->reset_cnt)
return hns_roce_v2_cmd_hw_reseted(hr_dev, instance_stage,
reset_stage);
else if (hw_resetting)
return hns_roce_v2_cmd_hw_resetting(hr_dev, instance_stage,
reset_stage);
else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT)
return hns_roce_v2_cmd_sw_resetting(hr_dev);
return 0;
}
static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring)
{
int ntu = ring->next_to_use;
@@ -892,8 +999,8 @@ static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev)
return clean;
}
static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
@@ -905,9 +1012,6 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
int ret = 0;
int ntc;
if (hr_dev->is_reset)
return 0;
spin_lock_bh(&csq->lock);
if (num > hns_roce_cmq_space(csq)) {
@@ -982,6 +1086,30 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
return ret;
}
int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
int retval;
int ret;
ret = hns_roce_v2_rst_process_cmd(hr_dev);
if (ret == CMD_RST_PRC_SUCCESS)
return 0;
if (ret == CMD_RST_PRC_EBUSY)
return ret;
ret = __hns_roce_cmq_send(hr_dev, desc, num);
if (ret) {
retval = hns_roce_v2_rst_process_cmd(hr_dev);
if (retval == CMD_RST_PRC_SUCCESS)
return 0;
else if (retval == CMD_RST_PRC_EBUSY)
return retval;
}
return ret;
}
static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
{
struct hns_roce_query_version *resp;
@@ -1078,6 +1206,44 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num,
PF_RES_DATA_3_PF_SL_NUM_M,
PF_RES_DATA_3_PF_SL_NUM_S);
hr_dev->caps.sccc_bt_num = roce_get_field(req_b->sccc_bt_idx_num,
PF_RES_DATA_4_PF_SCCC_BT_NUM_M,
PF_RES_DATA_4_PF_SCCC_BT_NUM_S);
return 0;
}
static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev)
{
struct hns_roce_pf_timer_res_a *req_a;
struct hns_roce_cmq_desc desc[2];
int ret, i;
for (i = 0; i < 2; i++) {
hns_roce_cmq_setup_basic_desc(&desc[i],
HNS_ROCE_OPC_QUERY_PF_TIMER_RES,
true);
if (i == 0)
desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
else
desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
}
ret = hns_roce_cmq_send(hr_dev, desc, 2);
if (ret)
return ret;
req_a = (struct hns_roce_pf_timer_res_a *)desc[0].data;
hr_dev->caps.qpc_timer_bt_num =
roce_get_field(req_a->qpc_timer_bt_idx_num,
PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M,
PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S);
hr_dev->caps.cqc_timer_bt_num =
roce_get_field(req_a->cqc_timer_bt_idx_num,
PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M,
PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S);
return 0;
}
@@ -1193,6 +1359,14 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
VF_RES_B_DATA_3_VF_SL_NUM_M,
VF_RES_B_DATA_3_VF_SL_NUM_S,
HNS_ROCE_VF_SL_NUM);
roce_set_field(req_b->vf_sccc_idx_num,
VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M,
VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S, 0);
roce_set_field(req_b->vf_sccc_idx_num,
VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M,
VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S,
HNS_ROCE_VF_SCCC_BT_NUM);
}
}
@@ -1205,6 +1379,7 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev)
u8 qpc_hop_num = hr_dev->caps.qpc_hop_num;
u8 cqc_hop_num = hr_dev->caps.cqc_hop_num;
u8 mpt_hop_num = hr_dev->caps.mpt_hop_num;
u8 sccc_hop_num = hr_dev->caps.sccc_hop_num;
struct hns_roce_cfg_bt_attr *req;
struct hns_roce_cmq_desc desc;
@@ -1252,6 +1427,20 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev)
CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S,
mpt_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : mpt_hop_num);
roce_set_field(req->vf_sccc_cfg,
CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_M,
CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_S,
hr_dev->caps.sccc_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_sccc_cfg,
CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_M,
CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_S,
hr_dev->caps.sccc_buf_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_sccc_cfg,
CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_M,
CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_S,
sccc_hop_num ==
HNS_ROCE_HOP_NUM_0 ? 0 : sccc_hop_num);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
@@ -1289,6 +1478,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
if (hr_dev->pci_dev->revision == 0x21) {
ret = hns_roce_query_pf_timer_resource(hr_dev);
if (ret) {
dev_err(hr_dev->dev,
"Query pf timer resource fail, ret = %d.\n",
ret);
return ret;
}
}
ret = hns_roce_alloc_vf_resource(hr_dev);
if (ret) {
dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n",
@@ -1313,6 +1512,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM;
caps->min_cqes = HNS_ROCE_MIN_CQE_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
@@ -1366,7 +1566,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->mpt_ba_pg_sz = 0;
caps->mpt_buf_pg_sz = 0;
caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
caps->pbl_ba_pg_sz = 0;
caps->pbl_ba_pg_sz = 2;
caps->pbl_buf_pg_sz = 0;
caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
caps->mtt_ba_pg_sz = 0;
@@ -1408,9 +1608,27 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
if (hr_dev->pci_dev->revision == 0x21)
if (hr_dev->pci_dev->revision == 0x21) {
caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
HNS_ROCE_CAP_FLAG_SRQ;
HNS_ROCE_CAP_FLAG_SRQ |
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
caps->qpc_timer_ba_pg_sz = 0;
caps->qpc_timer_buf_pg_sz = 0;
caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
caps->cqc_timer_ba_pg_sz = 0;
caps->cqc_timer_buf_pg_sz = 0;
caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
caps->sccc_ba_pg_sz = 0;
caps->sccc_buf_pg_sz = 0;
caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
}
ret = hns_roce_v2_set_bt(hr_dev);
if (ret)
@@ -1611,7 +1829,8 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
int ret;
int qpc_count, cqc_count;
int ret, i;
/* TSQ includes SQ doorbell and ack doorbell */
ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE);
@@ -1626,8 +1845,40 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
goto err_tpq_init_failed;
}
/* Alloc memory for QPC Timer buffer space chunk*/
for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num;
qpc_count++) {
ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table,
qpc_count);
if (ret) {
dev_err(hr_dev->dev, "QPC Timer get failed\n");
goto err_qpc_timer_failed;
}
}
/* Alloc memory for CQC Timer buffer space chunk*/
for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num;
cqc_count++) {
ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table,
cqc_count);
if (ret) {
dev_err(hr_dev->dev, "CQC Timer get failed\n");
goto err_cqc_timer_failed;
}
}
return 0;
err_cqc_timer_failed:
for (i = 0; i < cqc_count; i++)
hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i);
err_qpc_timer_failed:
for (i = 0; i < qpc_count; i++)
hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i);
hns_roce_free_link_table(hr_dev, &priv->tpq);
err_tpq_init_failed:
hns_roce_free_link_table(hr_dev, &priv->tsq);
@@ -1735,6 +1986,9 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
status = hns_roce_v2_cmd_complete(hr_dev);
if (status != 0x1) {
if (status == CMD_RST_PRC_EBUSY)
return status;
dev_err(dev, "mailbox status 0x%x!\n", status);
return -EBUSY;
}
@@ -1831,12 +2085,10 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
struct hns_roce_mr *mr)
{
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
u64 page_addr;
u64 *pages;
int i, j;
int len;
int entry;
int i;
mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
@@ -1849,17 +2101,14 @@ static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
return -ENOMEM;
i = 0;
for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> PAGE_SHIFT;
for (j = 0; j < len; ++j) {
page_addr = sg_dma_address(sg) +
(j << mr->umem->page_shift);
pages[i] = page_addr >> 6;
/* Record the first 2 entry directly to MTPT table */
if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
goto found;
i++;
}
for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
page_addr = sg_page_iter_dma_address(&sg_iter);
pages[i] = page_addr >> 6;
/* Record the first 2 entry directly to MTPT table */
if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
goto found;
i++;
}
found:
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
@@ -1941,6 +2190,9 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf;
int ret = 0;
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
if (flags & IB_MR_REREG_PD) {
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, pdn);
@@ -2245,6 +2497,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
enum ib_cq_notify_flags flags)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
u32 notification_flag;
u32 doorbell[2];
@@ -2270,7 +2523,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S,
notification_flag);
hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
hns_roce_write64(hr_dev, doorbell, hr_cq->cq_db_l);
return 0;
}
@@ -2663,17 +2916,33 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
case HEM_TYPE_SRQC:
op = HNS_ROCE_CMD_WRITE_SRQC_BT0;
break;
case HEM_TYPE_SCCC:
op = HNS_ROCE_CMD_WRITE_SCCC_BT0;
break;
case HEM_TYPE_QPC_TIMER:
op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
break;
case HEM_TYPE_CQC_TIMER:
op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
break;
default:
dev_warn(dev, "Table %d not to be written by mailbox!\n",
table->type);
return 0;
}
if (table->type == HEM_TYPE_SCCC && step_idx)
return 0;
op += step_idx;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
if (table->type == HEM_TYPE_SCCC)
obj = mhop.l0_idx;
if (check_whether_last_step(hop_num, step_idx)) {
hem = table->hem[hem_idx];
for (hns_roce_hem_first(hem, &iter);
@@ -2722,6 +2991,10 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
case HEM_TYPE_CQC:
op = HNS_ROCE_CMD_DESTROY_CQC_BT0;
break;
case HEM_TYPE_SCCC:
case HEM_TYPE_QPC_TIMER:
case HEM_TYPE_CQC_TIMER:
break;
case HEM_TYPE_SRQC:
op = HNS_ROCE_CMD_DESTROY_SRQC_BT0;
break;
@@ -2730,6 +3003,12 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
table->type);
return 0;
}
if (table->type == HEM_TYPE_SCCC ||
table->type == HEM_TYPE_QPC_TIMER ||
table->type == HEM_TYPE_CQC_TIMER)
return 0;
op += step_idx;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
@@ -3686,10 +3965,16 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
V2_QPC_BYTE_212_LSN_S, 0);
if (attr_mask & IB_QP_TIMEOUT) {
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_AT_M,
V2_QPC_BYTE_28_AT_S, attr->timeout);
roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_AT_M,
V2_QPC_BYTE_28_AT_S, 0);
if (attr->timeout < 31) {
roce_set_field(context->byte_28_at_fl,
V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S,
attr->timeout);
roce_set_field(qpc_mask->byte_28_at_fl,
V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S,
0);
} else {
dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n");
}
}
roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M,
@@ -3742,7 +4027,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
struct device *dev = hr_dev->dev;
int ret = -EINVAL;
context = kcalloc(2, sizeof(*context), GFP_KERNEL);
context = kcalloc(2, sizeof(*context), GFP_ATOMIC);
if (!context)
return -ENOMEM;
@@ -3789,13 +4074,16 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_160_sq_ci_pi,
V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
roce_set_field(context->byte_84_rq_ci_pi,
if (!ibqp->srq) {
roce_set_field(context->byte_84_rq_ci_pi,
V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
hr_qp->rq.head);
roce_set_field(qpc_mask->byte_84_rq_ci_pi,
roce_set_field(qpc_mask->byte_84_rq_ci_pi,
V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
}
}
if (attr_mask & IB_QP_AV) {
@@ -4224,6 +4512,59 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp)
return 0;
}
static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp)
{
struct hns_roce_sccc_clr_done *resp;
struct hns_roce_sccc_clr *clr;
struct hns_roce_cmq_desc desc;
int ret, i;
mutex_lock(&hr_dev->qp_table.scc_mutex);
/* set scc ctx clear done flag */
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
dev_err(hr_dev->dev, "Reset SCC ctx failed(%d)\n", ret);
goto out;
}
/* clear scc context */
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLR_SCCC, false);
clr = (struct hns_roce_sccc_clr *)desc.data;
clr->qpn = cpu_to_le32(hr_qp->qpn);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
dev_err(hr_dev->dev, "Clear SCC ctx failed(%d)\n", ret);
goto out;
}
/* query scc context clear is done or not */
resp = (struct hns_roce_sccc_clr_done *)desc.data;
for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) {
hns_roce_cmq_setup_basic_desc(&desc,
HNS_ROCE_OPC_QUERY_SCCC, true);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
dev_err(hr_dev->dev, "Query clr cmq failed(%d)\n", ret);
goto out;
}
if (resp->clr_done)
goto out;
msleep(20);
}
dev_err(hr_dev->dev, "Query SCC clr done flag overtime.\n");
ret = -ETIMEDOUT;
out:
mutex_unlock(&hr_dev->qp_table.scc_mutex);
return ret;
}
static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
struct hns_roce_dev *hr_dev = to_hr_dev(cq->device);
@@ -4281,7 +4622,8 @@ static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn)
if (hr_qp->ibqp.uobject) {
if (hr_qp->sdb_en == 1) {
hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
if (hr_qp->rdb_en == 1)
hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
} else {
dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n");
return;
@@ -4319,64 +4661,19 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
dev_warn(dev, "Send queue drained.\n");
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
dev_err(dev, "Local work queue catastrophic error.\n");
dev_err(dev, "Local work queue 0x%x catas error, sub_type:%d\n",
qpn, irq_work->sub_type);
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
switch (irq_work->sub_type) {
case HNS_ROCE_LWQCE_QPC_ERROR:
dev_err(dev, "QP %d, QPC error.\n", qpn);
break;
case HNS_ROCE_LWQCE_MTU_ERROR:
dev_err(dev, "QP %d, MTU error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
dev_err(dev, "QP %d, WQE addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
dev_err(dev, "QP %d, WQE shift error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n",
irq_work->sub_type);
break;
}
break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
dev_err(dev, "Invalid request local work queue error.\n");
dev_err(dev, "Invalid request local work queue 0x%x error.\n",
qpn);
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
dev_err(dev, "Local access violation work queue error.\n");
dev_err(dev, "Local access violation work queue 0x%x error, sub_type:%d\n",
qpn, irq_work->sub_type);
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
switch (irq_work->sub_type) {
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
dev_err(dev, "QP %d, R_key violation.\n", qpn);
break;
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
dev_err(dev, "QP %d, length error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_VA_ERROR:
dev_err(dev, "QP %d, VA error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_PD_ERROR:
dev_err(dev, "QP %d, PD error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
dev_err(dev, "QP %d, rw acc error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
dev_err(dev, "QP %d, key state error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
dev_err(dev, "QP %d, MR operation error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n",
irq_work->sub_type);
break;
}
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
dev_warn(dev, "SRQ limit reach.\n");
@@ -4427,6 +4724,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
{
struct hns_roce_dev *hr_dev = eq->hr_dev;
u32 doorbell[2];
doorbell[0] = 0;
@@ -4453,7 +4751,7 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
HNS_ROCE_V2_EQ_DB_PARA_S,
(eq->cons_index & HNS_ROCE_V2_CONS_IDX_M));
hns_roce_write64_k(doorbell, eq->doorbell);
hns_roce_write64(hr_dev, doorbell, eq->doorbell);
}
static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
@@ -4568,7 +4866,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
event_type, eq->eqn, eq->cons_index);
break;
};
}
eq->event_type = event_type;
eq->sub_type = sub_type;
@@ -4692,11 +4990,22 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
struct pci_dev *pdev = hr_dev->pci_dev;
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
const struct hnae3_ae_ops *ops = ae_dev->ops;
dev_err(dev, "AEQ overflow!\n");
roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1);
roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
/* Set reset level for reset_event() */
if (ops->set_default_reset_request)
ops->set_default_reset_request(ae_dev,
HNAE3_FUNC_RESET);
if (ops->reset_event)
ops->reset_event(pdev, NULL);
roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
@@ -5599,7 +5908,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
return 0;
}
int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
@@ -5664,6 +5973,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
struct hns_roce_v2_wqe_data_seg *dseg;
struct hns_roce_v2_db srq_db;
@@ -5725,7 +6035,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
srq_db.parameter = srq->head;
hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l);
hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
}
@@ -5758,6 +6068,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.hw_exit = hns_roce_v2_exit,
.post_mbox = hns_roce_v2_post_mbox,
.chk_mbox = hns_roce_v2_chk_mbox,
.rst_prc_mbox = hns_roce_v2_rst_process_cmd,
.set_gid = hns_roce_v2_set_gid,
.set_mac = hns_roce_v2_set_mac,
.write_mtpt = hns_roce_v2_write_mtpt,
@@ -5770,6 +6081,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.modify_qp = hns_roce_v2_modify_qp,
.query_qp = hns_roce_v2_query_qp,
.destroy_qp = hns_roce_v2_destroy_qp,
.qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
.modify_cq = hns_roce_v2_modify_cq,
.post_send = hns_roce_v2_post_send,
.post_recv = hns_roce_v2_post_recv,
@@ -5800,6 +6112,7 @@ MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
struct hnae3_handle *handle)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
const struct pci_device_id *id;
int i;
@@ -5830,15 +6143,18 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
hr_dev->cmd_mod = 1;
hr_dev->loop_idc = 0;
hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle);
priv->handle = handle;
return 0;
}
static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev;
int ret;
hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev));
hr_dev = ib_alloc_device(hns_roce_dev, ib_dev);
if (!hr_dev)
return -ENOMEM;
@@ -5850,7 +6166,6 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
hr_dev->pci_dev = handle->pdev;
hr_dev->dev = &handle->pdev->dev;
handle->priv = hr_dev;
ret = hns_roce_hw_v2_get_cfg(hr_dev, handle);
if (ret) {
@@ -5864,6 +6179,8 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
goto error_failed_get_cfg;
}
handle->priv = hr_dev;
return 0;
error_failed_get_cfg:
@@ -5875,7 +6192,7 @@ error_failed_kzalloc:
return ret;
}
static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
bool reset)
{
struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
@@ -5883,24 +6200,79 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
if (!hr_dev)
return;
handle->priv = NULL;
hns_roce_exit(hr_dev);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
}
static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
struct ib_event event;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
struct device *dev = &handle->pdev->dev;
int ret;
if (!hr_dev) {
dev_err(&handle->pdev->dev,
"Input parameter handle->priv is NULL!\n");
return -EINVAL;
handle->rinfo.instance_state = HNS_ROCE_STATE_INIT;
if (ops->ae_dev_resetting(handle) || ops->get_hw_reset_stat(handle)) {
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
goto reset_chk_err;
}
ret = __hns_roce_hw_v2_init_instance(handle);
if (ret) {
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
dev_err(dev, "RoCE instance init failed! ret = %d\n", ret);
if (ops->ae_dev_resetting(handle) ||
ops->get_hw_reset_stat(handle))
goto reset_chk_err;
else
return ret;
}
handle->rinfo.instance_state = HNS_ROCE_STATE_INITED;
return 0;
reset_chk_err:
dev_err(dev, "Device is busy in resetting state.\n"
"please retry later.\n");
return -EBUSY;
}
static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
bool reset)
{
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
return;
handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT;
__hns_roce_hw_v2_uninit_instance(handle, reset);
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
}
static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev;
struct ib_event event;
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) {
set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
return 0;
}
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_DOWN;
clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
hr_dev = (struct hns_roce_dev *)handle->priv;
if (!hr_dev)
return 0;
hr_dev->active = false;
hr_dev->is_reset = true;
hr_dev->dis_db = true;
event.event = IB_EVENT_DEVICE_FATAL;
event.device = &hr_dev->ib_dev;
@@ -5912,17 +6284,29 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
{
struct device *dev = &handle->pdev->dev;
int ret;
ret = hns_roce_hw_v2_init_instance(handle);
if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN,
&handle->rinfo.state)) {
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
return 0;
}
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INIT;
dev_info(&handle->pdev->dev, "In reset process RoCE client reinit.\n");
ret = __hns_roce_hw_v2_init_instance(handle);
if (ret) {
/* when reset notify type is HNAE3_INIT_CLIENT In reset notify
* callback function, RoCE Engine reinitialize. If RoCE reinit
* failed, we should inform NIC driver.
*/
handle->priv = NULL;
dev_err(&handle->pdev->dev,
"In reset process RoCE reinit failed %d.\n", ret);
dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret);
} else {
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
dev_info(dev, "Reset done, RoCE client reinit finished.\n");
}
return ret;
@@ -5930,8 +6314,14 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
{
if (test_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state))
return 0;
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT;
dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n");
msleep(100);
hns_roce_hw_v2_uninit_instance(handle, false);
__hns_roce_hw_v2_uninit_instance(handle, false);
return 0;
}

View File

@@ -36,6 +36,7 @@
#include <linux/bitops.h>
#define HNS_ROCE_VF_QPC_BT_NUM 256
#define HNS_ROCE_VF_SCCC_BT_NUM 64
#define HNS_ROCE_VF_SRQC_BT_NUM 64
#define HNS_ROCE_VF_CQC_BT_NUM 64
#define HNS_ROCE_VF_MPT_BT_NUM 64
@@ -44,12 +45,14 @@
#define HNS_ROCE_VF_SGID_NUM 32
#define HNS_ROCE_VF_SL_NUM 8
#define HNS_ROCE_V2_MAX_QP_NUM 0x2000
#define HNS_ROCE_V2_MAX_QP_NUM 0x100000
#define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200
#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000
#define HNS_ROCE_V2_MAX_SRQ 0x100000
#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100
#define HNS_ROCE_V2_MAX_CQ_NUM 0x8000
#define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100
#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000
@@ -64,7 +67,7 @@
#define HNS_ROCE_V2_COMP_VEC_NUM 63
#define HNS_ROCE_V2_AEQE_VEC_NUM 1
#define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1
#define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000
#define HNS_ROCE_V2_MAX_MTPT_NUM 0x100000
#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
@@ -83,6 +86,9 @@
#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32
#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
#define HNS_ROCE_INVALID_LKEY 0x100
@@ -90,7 +96,10 @@
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_SCCC_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
#define HNS_ROCE_CQE_HOP_NUM 1
#define HNS_ROCE_SRQWQE_HOP_NUM 1
@@ -120,6 +129,8 @@
#define HNS_ROCE_CMQ_EN_B 16
#define HNS_ROCE_CMQ_ENABLE BIT(HNS_ROCE_CMQ_EN_B)
#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT 5
#define check_whether_last_step(hop_num, step_idx) \
((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
(step_idx == 1 && hop_num == 1) || \
@@ -224,11 +235,15 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401,
HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,
HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406,
HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
HNS_ROCE_OPC_POST_MB = 0x8504,
HNS_ROCE_OPC_QUERY_MB_ST = 0x8505,
HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
HNS_ROCE_OPC_CLR_SCCC = 0x8509,
HNS_ROCE_OPC_QUERY_SCCC = 0x850a,
HNS_ROCE_OPC_RESET_SCCC = 0x850b,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
@@ -1300,7 +1315,8 @@ struct hns_roce_pf_res_b {
__le32 smac_idx_num;
__le32 sgid_idx_num;
__le32 qid_idx_sl_num;
__le32 rsv[2];
__le32 sccc_bt_idx_num;
__le32 rsv;
};
#define PF_RES_DATA_1_PF_SMAC_IDX_S 0
@@ -1321,6 +1337,31 @@ struct hns_roce_pf_res_b {
#define PF_RES_DATA_3_PF_SL_NUM_S 16
#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16)
#define PF_RES_DATA_4_PF_SCCC_BT_IDX_S 0
#define PF_RES_DATA_4_PF_SCCC_BT_IDX_M GENMASK(8, 0)
#define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9
#define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9)
struct hns_roce_pf_timer_res_a {
__le32 rsv0;
__le32 qpc_timer_bt_idx_num;
__le32 cqc_timer_bt_idx_num;
__le32 rsv[3];
};
#define PF_RES_DATA_1_PF_QPC_TIMER_BT_IDX_S 0
#define PF_RES_DATA_1_PF_QPC_TIMER_BT_IDX_M GENMASK(11, 0)
#define PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S 16
#define PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M GENMASK(28, 16)
#define PF_RES_DATA_2_PF_CQC_TIMER_BT_IDX_S 0
#define PF_RES_DATA_2_PF_CQC_TIMER_BT_IDX_M GENMASK(10, 0)
#define PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S 16
#define PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M GENMASK(27, 16)
struct hns_roce_vf_res_a {
__le32 vf_id;
__le32 vf_qpc_bt_idx_num;
@@ -1365,7 +1406,8 @@ struct hns_roce_vf_res_b {
__le32 vf_smac_idx_num;
__le32 vf_sgid_idx_num;
__le32 vf_qid_idx_sl_num;
__le32 rsv[2];
__le32 vf_sccc_idx_num;
__le32 rsv1;
};
#define VF_RES_B_DATA_0_VF_ID_S 0
@@ -1389,6 +1431,12 @@ struct hns_roce_vf_res_b {
#define VF_RES_B_DATA_3_VF_SL_NUM_S 16
#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
#define VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S 0
#define VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M GENMASK(8, 0)
#define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9
#define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9)
struct hns_roce_vf_switch {
__le32 rocee_sel;
__le32 fun_id;
@@ -1424,7 +1472,8 @@ struct hns_roce_cfg_bt_attr {
__le32 vf_srqc_cfg;
__le32 vf_cqc_cfg;
__le32 vf_mpt_cfg;
__le32 rsv[2];
__le32 vf_sccc_cfg;
__le32 rsv;
};
#define CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S 0
@@ -1463,6 +1512,15 @@ struct hns_roce_cfg_bt_attr {
#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8
#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8)
#define CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_S 0
#define CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_M GENMASK(3, 0)
#define CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_S 4
#define CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_M GENMASK(7, 4)
#define CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_S 8
#define CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_M GENMASK(9, 8)
struct hns_roce_cfg_sgid_tb {
__le32 table_idx_rsv;
__le32 vf_sgid_l;
@@ -1546,6 +1604,7 @@ struct hns_roce_link_table_entry {
#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20)
struct hns_roce_v2_priv {
struct hnae3_handle *handle;
struct hns_roce_v2_cmq cmq;
struct hns_roce_link_table tsq;
struct hns_roce_link_table tpq;
@@ -1730,4 +1789,25 @@ struct hns_roce_wqe_atomic_seg {
__le64 cmp_data;
};
struct hns_roce_sccc_clr {
__le32 qpn;
__le32 rsv[5];
};
struct hns_roce_sccc_clr_done {
__le32 clr_done;
__le32 rsv[5];
};
static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
void __iomem *dest)
{
struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
hns_roce_write64_k(val, dest);
}
#endif

View File

@@ -226,6 +226,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_srq_sge = hr_dev->caps.max_srq_sges;
}
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
}
return 0;
}
@@ -330,23 +335,19 @@ static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask,
return 0;
}
static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
struct ib_udata *udata)
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
int ret = 0;
struct hns_roce_ucontext *context;
struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
if (!hr_dev->active)
return ERR_PTR(-EAGAIN);
return -EAGAIN;
resp.qp_tab_size = hr_dev->caps.num_qps;
context = kmalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
goto error_fail_uar_alloc;
@@ -360,25 +361,20 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
if (ret)
goto error_fail_copy_to_udata;
return &context->ibucontext;
return 0;
error_fail_copy_to_udata:
hns_roce_uar_free(hr_dev, &context->uar);
error_fail_uar_alloc:
kfree(context);
return ERR_PTR(ret);
return ret;
}
static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
kfree(context);
return 0;
}
static int hns_roce_mmap(struct ib_ucontext *context,
@@ -472,6 +468,8 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.query_pkey = hns_roce_query_pkey,
.query_port = hns_roce_query_port,
.reg_user_mr = hns_roce_reg_user_mr,
INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext),
};
static const struct ib_device_ops hns_roce_dev_mr_ops = {
@@ -564,7 +562,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->driver_id = RDMA_DRIVER_HNS;
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
ret = ib_register_device(ib_dev, "hns_%d", NULL);
ret = ib_register_device(ib_dev, "hns_%d");
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
return ret;
@@ -702,8 +700,62 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
}
}
if (hr_dev->caps.sccc_entry_sz) {
ret = hns_roce_init_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table,
HEM_TYPE_SCCC,
hr_dev->caps.sccc_entry_sz,
hr_dev->caps.num_qps, 1);
if (ret) {
dev_err(dev,
"Failed to init SCC context memory, aborting.\n");
goto err_unmap_idx;
}
}
if (hr_dev->caps.qpc_timer_entry_sz) {
ret = hns_roce_init_hem_table(hr_dev,
&hr_dev->qpc_timer_table,
HEM_TYPE_QPC_TIMER,
hr_dev->caps.qpc_timer_entry_sz,
hr_dev->caps.num_qpc_timer, 1);
if (ret) {
dev_err(dev,
"Failed to init QPC timer memory, aborting.\n");
goto err_unmap_ctx;
}
}
if (hr_dev->caps.cqc_timer_entry_sz) {
ret = hns_roce_init_hem_table(hr_dev,
&hr_dev->cqc_timer_table,
HEM_TYPE_CQC_TIMER,
hr_dev->caps.cqc_timer_entry_sz,
hr_dev->caps.num_cqc_timer, 1);
if (ret) {
dev_err(dev,
"Failed to init CQC timer memory, aborting.\n");
goto err_unmap_qpc_timer;
}
}
return 0;
err_unmap_qpc_timer:
if (hr_dev->caps.qpc_timer_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qpc_timer_table);
err_unmap_ctx:
if (hr_dev->caps.sccc_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
err_unmap_idx:
if (hr_dev->caps.num_idx_segs)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->mr_table.mtt_idx_table);
err_unmap_srqwqe:
if (hr_dev->caps.num_srqwqe_segs)
hns_roce_cleanup_hem_table(hr_dev,

View File

@@ -976,12 +976,11 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt, struct ib_umem *umem)
{
struct device *dev = hr_dev->dev;
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
unsigned int order;
int i, k, entry;
int npage = 0;
int ret = 0;
int len;
int i;
u64 page_addr;
u64 *pages;
u32 bt_page_size;
@@ -1014,29 +1013,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
i = n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> PAGE_SHIFT;
for (k = 0; k < len; ++k) {
page_addr =
sg_dma_address(sg) + (k << umem->page_shift);
if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
if (page_addr & ((1 << mtt->page_shift) - 1)) {
dev_err(dev, "page_addr 0x%llx is not page_shift %d alignment!\n",
page_addr, mtt->page_shift);
ret = -EINVAL;
goto out;
}
pages[i++] = page_addr;
}
npage++;
if (i == bt_page_size / sizeof(u64)) {
ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
pages);
if (ret)
goto out;
n += i;
i = 0;
for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
page_addr = sg_page_iter_dma_address(&sg_iter);
if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
if (page_addr & ((1 << mtt->page_shift) - 1)) {
dev_err(dev,
"page_addr 0x%llx is not page_shift %d alignment!\n",
page_addr, mtt->page_shift);
ret = -EINVAL;
goto out;
}
pages[i++] = page_addr;
}
npage++;
if (i == bt_page_size / sizeof(u64)) {
ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
if (ret)
goto out;
n += i;
i = 0;
}
}
@@ -1052,10 +1047,8 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr,
struct ib_umem *umem)
{
struct scatterlist *sg;
int i = 0, j = 0, k;
int entry;
int len;
struct sg_dma_page_iter sg_iter;
int i = 0, j = 0;
u64 page_addr;
u32 pbl_bt_sz;
@@ -1063,27 +1056,22 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
return 0;
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> PAGE_SHIFT;
for (k = 0; k < len; ++k) {
page_addr = sg_dma_address(sg) +
(k << umem->page_shift);
for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
page_addr = sg_page_iter_dma_address(&sg_iter);
if (!hr_dev->caps.pbl_hop_num) {
mr->pbl_buf[i++] = page_addr >> 12;
} else if (hr_dev->caps.pbl_hop_num == 1) {
mr->pbl_buf[i++] = page_addr;
} else {
if (hr_dev->caps.pbl_hop_num == 2)
mr->pbl_bt_l1[i][j] = page_addr;
else if (hr_dev->caps.pbl_hop_num == 3)
mr->pbl_bt_l2[i][j] = page_addr;
if (!hr_dev->caps.pbl_hop_num) {
mr->pbl_buf[i++] = page_addr >> 12;
} else if (hr_dev->caps.pbl_hop_num == 1) {
mr->pbl_buf[i++] = page_addr;
} else {
if (hr_dev->caps.pbl_hop_num == 2)
mr->pbl_bt_l1[i][j] = page_addr;
else if (hr_dev->caps.pbl_hop_num == 3)
mr->pbl_bt_l2[i][j] = page_addr;
j++;
if (j >= (pbl_bt_sz / 8)) {
i++;
j = 0;
}
j++;
if (j >= (pbl_bt_sz / 8)) {
i++;
j = 0;
}
}
}
@@ -1110,8 +1098,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length,
access_flags, 0);
mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
if (IS_ERR(mr->umem)) {
ret = PTR_ERR(mr->umem);
goto err_free;
@@ -1220,8 +1207,8 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
}
ib_umem_release(mr->umem);
mr->umem = ib_umem_get(ibmr->uobject->context, start, length,
mr_access_flags, 0);
mr->umem =
ib_umem_get(udata, start, length, mr_access_flags, 0);
if (IS_ERR(mr->umem)) {
ret = PTR_ERR(mr->umem);
mr->umem = NULL;

View File

@@ -57,24 +57,19 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev)
hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap);
}
struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
struct ib_ucontext *context,
struct ib_udata *udata)
int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct ib_device *ib_dev = ibpd->device;
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
struct device *dev = hr_dev->dev;
struct hns_roce_pd *pd;
struct hns_roce_pd *pd = to_hr_pd(ibpd);
int ret;
pd = kmalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn);
if (ret) {
kfree(pd);
dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n");
return ERR_PTR(ret);
return ret;
}
if (context) {
@@ -83,21 +78,17 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn);
dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n");
kfree(pd);
return ERR_PTR(-EFAULT);
return -EFAULT;
}
}
return &pd->ibpd;
return 0;
}
EXPORT_SYMBOL_GPL(hns_roce_alloc_pd);
int hns_roce_dealloc_pd(struct ib_pd *pd)
void hns_roce_dealloc_pd(struct ib_pd *pd)
{
hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
kfree(to_hr_pd(pd));
return 0;
}
EXPORT_SYMBOL_GPL(hns_roce_dealloc_pd);

View File

@@ -35,6 +35,7 @@
#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
@@ -209,13 +210,23 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
}
}
if (hr_dev->caps.sccc_entry_sz) {
/* Alloc memory for SCC CTX */
ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
if (ret) {
dev_err(dev, "SCC CTX table get failed\n");
goto err_put_trrl;
}
}
spin_lock_irq(&qp_table->lock);
ret = radix_tree_insert(&hr_dev->qp_table_tree,
hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp);
spin_unlock_irq(&qp_table->lock);
if (ret) {
dev_err(dev, "QPC radix_tree_insert failed\n");
goto err_put_trrl;
goto err_put_sccc;
}
atomic_set(&hr_qp->refcount, 1);
@@ -223,6 +234,11 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
return 0;
err_put_sccc:
if (hr_dev->caps.sccc_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
err_put_trrl:
if (hr_dev->caps.trrl_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
@@ -258,6 +274,9 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
wait_for_completion(&hr_qp->free);
if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
if (hr_dev->caps.sccc_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->trrl_table,
hr_qp->qpn);
@@ -526,7 +545,8 @@ static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI ||
attr->qp_type == IB_QPT_XRC_TGT || attr->srq)
attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
!attr->cap.max_recv_wr)
return 0;
return 1;
@@ -541,6 +561,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
struct device *dev = hr_dev->dev;
struct hns_roce_ib_create_qp ucmd;
struct hns_roce_ib_create_qp_resp resp = {};
struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
udata, struct hns_roce_ucontext, ibucontext);
unsigned long qpn = 0;
int ret = 0;
u32 page_shift;
@@ -612,9 +634,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
goto err_rq_sge_list;
}
hr_qp->umem = ib_umem_get(ib_pd->uobject->context,
ucmd.buf_addr, hr_qp->buff_size, 0,
0);
hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr,
hr_qp->buff_size, 0, 0);
if (IS_ERR(hr_qp->umem)) {
dev_err(dev, "ib_umem_get error for create qp\n");
ret = PTR_ERR(hr_qp->umem);
@@ -622,19 +643,19 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
}
hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
page_shift = PAGE_SHIFT;
if (hr_dev->caps.mtt_buf_pg_sz) {
npages = (ib_umem_page_count(hr_qp->umem) +
(1 << hr_dev->caps.mtt_buf_pg_sz) - 1) /
(1 << hr_dev->caps.mtt_buf_pg_sz);
page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
(1 << hr_dev->caps.mtt_buf_pg_sz);
page_shift += hr_dev->caps.mtt_buf_pg_sz;
ret = hns_roce_mtt_init(hr_dev, npages,
page_shift,
&hr_qp->mtt);
} else {
ret = hns_roce_mtt_init(hr_dev,
ib_umem_page_count(hr_qp->umem),
hr_qp->umem->page_shift,
&hr_qp->mtt);
ib_umem_page_count(hr_qp->umem),
page_shift, &hr_qp->mtt);
}
if (ret) {
dev_err(dev, "hns_roce_mtt_init error for create qp\n");
@@ -652,9 +673,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
(udata->inlen >= sizeof(ucmd)) &&
(udata->outlen >= sizeof(resp)) &&
hns_roce_qp_has_sq(init_attr)) {
ret = hns_roce_db_map_user(
to_hr_ucontext(ib_pd->uobject->context),
ucmd.sdb_addr, &hr_qp->sdb);
ret = hns_roce_db_map_user(uctx, udata, ucmd.sdb_addr,
&hr_qp->sdb);
if (ret) {
dev_err(dev, "sq record doorbell map failed!\n");
goto err_mtt;
@@ -668,13 +688,16 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
(udata->outlen >= sizeof(resp)) &&
hns_roce_qp_has_rq(init_attr)) {
ret = hns_roce_db_map_user(
to_hr_ucontext(ib_pd->uobject->context),
ucmd.db_addr, &hr_qp->rdb);
ret = hns_roce_db_map_user(uctx, udata, ucmd.db_addr,
&hr_qp->rdb);
if (ret) {
dev_err(dev, "rq record doorbell map failed!\n");
goto err_sq_dbmap;
}
/* indicate kernel supports rq record db */
resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
hr_qp->rdb_en = 1;
}
} else {
if (init_attr->create_flags &
@@ -741,10 +764,10 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
goto err_mtt;
}
hr_qp->sq.wrid = kmalloc_array(hr_qp->sq.wqe_cnt, sizeof(u64),
GFP_KERNEL);
hr_qp->rq.wrid = kmalloc_array(hr_qp->rq.wqe_cnt, sizeof(u64),
GFP_KERNEL);
hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64),
GFP_KERNEL);
hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64),
GFP_KERNEL);
if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) {
ret = -ENOMEM;
goto err_wrid;
@@ -783,17 +806,19 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
else
hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn);
if (udata && (udata->outlen >= sizeof(resp)) &&
(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) {
/* indicate kernel supports rq record db */
resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (udata) {
ret = ib_copy_to_udata(udata, &resp,
min(udata->outlen, sizeof(resp)));
if (ret)
goto err_qp;
hr_qp->rdb_en = 1;
}
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
if (ret)
goto err_qp;
}
hr_qp->event = hns_roce_ib_qp_event;
return 0;
@@ -814,9 +839,7 @@ err_wrid:
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
(udata->outlen >= sizeof(resp)) &&
hns_roce_qp_has_rq(init_attr))
hns_roce_db_unmap_user(
to_hr_ucontext(ib_pd->uobject->context),
&hr_qp->rdb);
hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
} else {
kfree(hr_qp->sq.wrid);
kfree(hr_qp->rq.wrid);
@@ -828,9 +851,7 @@ err_sq_dbmap:
(udata->inlen >= sizeof(ucmd)) &&
(udata->outlen >= sizeof(resp)) &&
hns_roce_qp_has_sq(init_attr))
hns_roce_db_unmap_user(
to_hr_ucontext(ib_pd->uobject->context),
&hr_qp->sdb);
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
err_mtt:
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
@@ -969,7 +990,9 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
(attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
if (hr_qp->sdb_en == 1) {
hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
if (hr_qp->rdb_en == 1)
hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
} else {
dev_warn(dev, "flush cqe is not supported in userspace!\n");
goto out;
@@ -1133,6 +1156,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
int reserved_from_bot;
int ret;
mutex_init(&qp_table->scc_mutex);
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);

View File

@@ -78,9 +78,9 @@ static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev,
HNS_ROCE_CMD_TIMEOUT_MSECS);
}
int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd,
struct hns_roce_mtt *hr_mtt, u64 db_rec_addr,
struct hns_roce_srq *srq)
static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
u16 xrcd, struct hns_roce_mtt *hr_mtt,
u64 db_rec_addr, struct hns_roce_srq *srq)
{
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
struct hns_roce_cmd_mailbox *mailbox;
@@ -155,7 +155,8 @@ err_out:
return ret;
}
void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
struct hns_roce_srq *srq)
{
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
int ret;
@@ -253,8 +254,8 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
goto err_srq;
}
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
srq_buf_size, 0, 0);
srq->umem =
ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
if (IS_ERR(srq->umem)) {
ret = PTR_ERR(srq->umem);
goto err_srq;
@@ -281,8 +282,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
goto err_srq_mtt;
/* config index queue BA */
srq->idx_que.umem = ib_umem_get(pd->uobject->context,
ucmd.que_addr,
srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
srq->idx_que.buf_size, 0, 0);
if (IS_ERR(srq->idx_que.umem)) {
dev_err(hr_dev->dev,

View File

@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y := -Idrivers/net/ethernet/intel/i40e
ccflags-y := -I $(srctree)/drivers/net/ethernet/intel/i40e
obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o

View File

@@ -601,7 +601,6 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
if (!atomic_dec_and_test(&iwpd->usecount))
return;
i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id);
kfree(iwpd);
}
/**

View File

@@ -45,6 +45,7 @@
#include <rdma/iw_cm.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
#include "i40iw.h"
/**
@@ -120,78 +121,55 @@ static int i40iw_query_port(struct ib_device *ibdev,
/**
* i40iw_alloc_ucontext - Allocate the user context data structure
* @ibdev: device pointer from stack
* @uctx: Uverbs context pointer from stack
* @udata: user data
*
* This keeps track of all objects associated with a particular
* user-mode client.
*/
static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
static int i40iw_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
struct ib_device *ibdev = uctx->device;
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct i40iw_alloc_ucontext_req req;
struct i40iw_alloc_ucontext_resp uresp;
struct i40iw_ucontext *ucontext;
struct i40iw_alloc_ucontext_resp uresp = {};
struct i40iw_ucontext *ucontext = to_ucontext(uctx);
if (ib_copy_from_udata(&req, udata, sizeof(req)))
return ERR_PTR(-EINVAL);
return -EINVAL;
if (req.userspace_ver < 4 || req.userspace_ver > I40IW_ABI_VER) {
i40iw_pr_err("Unsupported provider library version %u.\n", req.userspace_ver);
return ERR_PTR(-EINVAL);
return -EINVAL;
}
memset(&uresp, 0, sizeof(uresp));
uresp.max_qps = iwdev->max_qp;
uresp.max_pds = iwdev->max_pd;
uresp.wq_size = iwdev->max_qp_wr * 2;
uresp.kernel_ver = req.userspace_ver;
ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
if (!ucontext)
return ERR_PTR(-ENOMEM);
ucontext->iwdev = iwdev;
ucontext->abi_ver = req.userspace_ver;
if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
kfree(ucontext);
return ERR_PTR(-EFAULT);
}
if (ib_copy_to_udata(udata, &uresp, sizeof(uresp)))
return -EFAULT;
INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
spin_lock_init(&ucontext->cq_reg_mem_list_lock);
INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
spin_lock_init(&ucontext->qp_reg_mem_list_lock);
return &ucontext->ibucontext;
return 0;
}
/**
* i40iw_dealloc_ucontext - deallocate the user context data structure
* @context: user context created during alloc
*/
static int i40iw_dealloc_ucontext(struct ib_ucontext *context)
static void i40iw_dealloc_ucontext(struct ib_ucontext *context)
{
struct i40iw_ucontext *ucontext = to_ucontext(context);
unsigned long flags;
spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
if (!list_empty(&ucontext->cq_reg_mem_list)) {
spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
return -EBUSY;
}
spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
if (!list_empty(&ucontext->qp_reg_mem_list)) {
spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
return -EBUSY;
}
spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
kfree(ucontext);
return 0;
return;
}
/**
@@ -312,16 +290,15 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_
/**
* i40iw_alloc_pd - allocate protection domain
* @ibdev: device pointer from stack
* @pd: PD pointer
* @context: user context created during alloc
* @udata: user data
*/
static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct i40iw_pd *iwpd;
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct i40iw_pd *iwpd = to_iwpd(pd);
struct i40iw_device *iwdev = to_iwdev(pd->device);
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
struct i40iw_alloc_pd_resp uresp;
struct i40iw_sc_pd *sc_pd;
@@ -330,19 +307,13 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
int err;
if (iwdev->closing)
return ERR_PTR(-ENODEV);
return -ENODEV;
err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
iwdev->max_pd, &pd_id, &iwdev->next_pd);
if (err) {
i40iw_pr_err("alloc resource failed\n");
return ERR_PTR(err);
}
iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL);
if (!iwpd) {
err = -ENOMEM;
goto free_res;
return err;
}
sc_pd = &iwpd->sc_pd;
@@ -361,25 +332,23 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
}
i40iw_add_pdusecount(iwpd);
return &iwpd->ibpd;
return 0;
error:
kfree(iwpd);
free_res:
i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id);
return ERR_PTR(err);
return err;
}
/**
* i40iw_dealloc_pd - deallocate pd
* @ibpd: ptr of pd to be deallocated
*/
static int i40iw_dealloc_pd(struct ib_pd *ibpd)
static void i40iw_dealloc_pd(struct ib_pd *ibpd)
{
struct i40iw_pd *iwpd = to_iwpd(ibpd);
struct i40iw_device *iwdev = to_iwdev(ibpd->device);
i40iw_rem_pdusecount(iwpd, iwdev);
return 0;
}
/**
@@ -565,7 +534,8 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
struct i40iw_device *iwdev = to_iwdev(ibpd->device);
struct i40iw_cqp *iwcqp = &iwdev->cqp;
struct i40iw_qp *iwqp;
struct i40iw_ucontext *ucontext;
struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct i40iw_ucontext, ibucontext);
struct i40iw_create_qp_req req;
struct i40iw_create_qp_resp uresp;
u32 qp_num = 0;
@@ -674,7 +644,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
}
iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
iwqp->user_mode = 1;
ucontext = to_ucontext(ibpd->uobject->context);
if (req.user_wqe_buffers) {
struct i40iw_pbl *iwpbl;
@@ -1369,32 +1338,29 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
{
struct ib_umem *region = iwmr->region;
struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
int chunk_pages, entry, i;
struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
struct i40iw_pble_info *pinfo;
struct scatterlist *sg;
struct sg_dma_page_iter sg_iter;
u64 pg_addr = 0;
u32 idx = 0;
bool first_pg = true;
pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
chunk_pages = sg_dma_len(sg) >> region->page_shift;
if ((iwmr->type == IW_MEMREG_TYPE_QP) &&
!iwpbl->qp_mr.sq_page)
iwpbl->qp_mr.sq_page = sg_page(sg);
for (i = 0; i < chunk_pages; i++) {
pg_addr = sg_dma_address(sg) +
(i << region->page_shift);
if (iwmr->type == IW_MEMREG_TYPE_QP)
iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl);
if ((entry + i) == 0)
*pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
else if (!(pg_addr & ~iwmr->page_msk))
*pbl = cpu_to_le64(pg_addr);
else
continue;
pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
}
for_each_sg_dma_page (region->sg_head.sgl, &sg_iter, region->nmap, 0) {
pg_addr = sg_page_iter_dma_address(&sg_iter);
if (first_pg)
*pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
else if (!(pg_addr & ~iwmr->page_msk))
*pbl = cpu_to_le64(pg_addr);
else
continue;
first_pg = false;
pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
}
}
@@ -1831,7 +1797,8 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
{
struct i40iw_pd *iwpd = to_iwpd(pd);
struct i40iw_device *iwdev = to_iwdev(pd->device);
struct i40iw_ucontext *ucontext;
struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct i40iw_ucontext, ibucontext);
struct i40iw_pble_alloc *palloc;
struct i40iw_pbl *iwpbl;
struct i40iw_mr *iwmr;
@@ -1852,7 +1819,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
if (length > I40IW_MAX_MR_SIZE)
return ERR_PTR(-EINVAL);
region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
region = ib_umem_get(udata, start, length, acc, 0);
if (IS_ERR(region))
return (struct ib_mr *)region;
@@ -1872,7 +1839,6 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
iwmr->region = region;
iwmr->ibmr.pd = pd;
iwmr->ibmr.device = pd->device;
ucontext = to_ucontext(pd->uobject->context);
iwmr->page_size = PAGE_SIZE;
iwmr->page_msk = PAGE_MASK;
@@ -2139,9 +2105,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct i40iw_ib_device *iwibdev = container_of(dev,
struct i40iw_ib_device,
ibdev.dev);
struct i40iw_ib_device *iwibdev =
rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev);
u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
return sprintf(buf, "%x\n", hw_rev);
@@ -2751,6 +2716,8 @@ static const struct ib_device_ops i40iw_dev_ops = {
.query_qp = i40iw_query_qp,
.reg_user_mr = i40iw_reg_user_mr,
.req_notify_cq = i40iw_req_notify_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, i40iw_ucontext, ibucontext),
};
/**
@@ -2763,7 +2730,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
struct net_device *netdev = iwdev->netdev;
struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev));
iwibdev = ib_alloc_device(i40iw_ib_device, ibdev);
if (!iwibdev) {
i40iw_pr_err("iwdev == NULL\n");
return NULL;
@@ -2868,7 +2835,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
iwibdev = iwdev->iwibdev;
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
if (ret)
goto error;

View File

@@ -1,7 +1,6 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI && INET
depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---

View File

@@ -39,7 +39,7 @@
#include "mlx4_ib.h"
#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
#define CM_CLEANUP_CACHE_TIMEOUT (30 * HZ)
struct id_map_entry {
struct rb_node node;

View File

@@ -134,16 +134,16 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
struct mlx4_ib_cq_buf *buf, struct ib_umem **umem,
u64 buf_addr, int cqe)
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
struct mlx4_ib_cq_buf *buf,
struct ib_umem **umem, u64 buf_addr, int cqe)
{
int err;
int cqe_size = dev->dev->caps.cqe_size;
int shift;
int n;
*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
*umem = ib_umem_get(udata, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -190,7 +190,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
return ERR_PTR(-EINVAL);
cq = kmalloc(sizeof *cq, GFP_KERNEL);
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@@ -213,14 +213,13 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
}
buf_addr = (void *)(unsigned long)ucmd.buf_addr;
err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
err = mlx4_ib_get_cq_umem(dev, udata, &cq->buf, &cq->umem,
ucmd.buf_addr, entries);
if (err)
goto err_cq;
err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
&cq->db);
err = mlx4_ib_db_map_user(to_mucontext(context), udata,
ucmd.db_addr, &cq->db);
if (err)
goto err_mtt;
@@ -336,7 +335,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
if (!cq->resize_buf)
return -ENOMEM;
err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf,
err = mlx4_ib_get_cq_umem(dev, udata, &cq->resize_buf->buf,
&cq->resize_umem, ucmd.buf_addr, entries);
if (err) {
kfree(cq->resize_buf);

View File

@@ -41,7 +41,8 @@ struct mlx4_ib_user_db_page {
int refcnt;
};
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
@@ -61,8 +62,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
PAGE_SIZE, 0, 0);
page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);

View File

@@ -1076,17 +1076,18 @@ out:
return err;
}
static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
struct ib_device *ibdev = uctx->device;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
struct mlx4_ib_ucontext *context = to_mucontext(uctx);
struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
return -EAGAIN;
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
@@ -1100,15 +1101,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
resp.cqe_size = dev->dev->caps.cqe_size;
}
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
if (err) {
kfree(context);
return ERR_PTR(err);
}
if (err)
return err;
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -1123,21 +1118,17 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
return ERR_PTR(-EFAULT);
return -EFAULT;
}
return &context->ibucontext;
return err;
}
static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
kfree(context);
return 0;
}
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
@@ -1186,38 +1177,27 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
}
}
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_pd *pd;
struct mlx4_ib_pd *pd = to_mpd(ibpd);
struct ib_device *ibdev = ibpd->device;
int err;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
if (err)
return err;
if (context)
if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
kfree(pd);
return ERR_PTR(-EFAULT);
}
return &pd->ibpd;
if (context && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
return -EFAULT;
}
return 0;
}
static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
static void mlx4_ib_dealloc_pd(struct ib_pd *pd)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
kfree(pd);
return 0;
}
static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
@@ -2043,7 +2023,7 @@ static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
@@ -2052,7 +2032,7 @@ static ssize_t hw_rev_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
return sprintf(buf, "%x\n", dev->dev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -2061,7 +2041,8 @@ static ssize_t board_id_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
dev->dev->board_id);
}
@@ -2579,6 +2560,8 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
.req_notify_cq = mlx4_ib_arm_cq,
.rereg_user_mr = mlx4_ib_rereg_user_mr,
.resize_cq = mlx4_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
};
static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
@@ -2634,7 +2617,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (num_ports == 0)
return NULL;
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
if (!ibdev) {
dev_err(&dev->persist->pdev->dev,
"Device struct alloc failed\n");
@@ -2856,7 +2839,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
if (ib_register_device(&ibdev->ib_dev, "mlx4_%d"))
goto err_diag_counters;
if (mlx4_ib_mad_init(ibdev))

View File

@@ -722,7 +722,8 @@ static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev)
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct mlx4_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);

View File

@@ -367,7 +367,7 @@ end:
return block_shift;
}
static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
u64 length, u64 virt_addr,
int access_flags)
{
@@ -398,7 +398,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
up_read(&current->mm->mmap_sem);
}
return ib_umem_get(context, start, length, access_flags, 0);
return ib_umem_get(udata, start, length, access_flags, 0);
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
@@ -415,8 +415,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length,
virt_addr, access_flags);
mr->umem =
mlx4_get_umem_mr(udata, start, length, virt_addr, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
@@ -505,9 +505,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
mmr->umem =
mlx4_get_umem_mr(mr->uobject->context, start, length,
virt_addr, mr_access_flags);
mmr->umem = mlx4_get_umem_mr(udata, start, length, virt_addr,
mr_access_flags);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */

View File

@@ -41,6 +41,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/uverbs_ioctl.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/qp.h>
@@ -52,7 +53,8 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
struct mlx4_ib_cq *recv_cq);
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
struct mlx4_ib_cq *recv_cq);
static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state);
static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state,
struct ib_udata *udata);
enum {
MLX4_IB_ACK_REQ_FREQ = 8,
@@ -863,6 +865,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
int err;
struct mlx4_ib_sqp *sqp = NULL;
struct mlx4_ib_qp *qp;
struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx4_ib_ucontext, ibucontext);
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
struct mlx4_ib_cq *mcq;
unsigned long flags;
@@ -1015,9 +1019,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
(qp->sq.wqe_cnt << qp->sq.wqe_shift);
}
qp->umem = ib_umem_get(pd->uobject->context,
(src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
ucmd.wq.buf_addr, qp->buf_size, 0, 0);
qp->umem =
ib_umem_get(udata,
(src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
ucmd.wq.buf_addr,
qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
@@ -1035,9 +1041,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_mtt;
if (qp_has_rq(init_attr)) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
err = mlx4_ib_db_map_user(
context, udata,
(src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr :
ucmd.wq.db_addr, &qp->db);
ucmd.wq.db_addr,
&qp->db);
if (err)
goto err_mtt;
}
@@ -1108,8 +1116,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
}
} else if (src == MLX4_IB_RWQ_SRC) {
err = mlx4_ib_alloc_wqn(to_mucontext(pd->uobject->context), qp,
range_size, &qpn);
err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
if (err)
goto err_wrid;
} else {
@@ -1180,8 +1187,7 @@ err_qpn:
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qpn, 1);
else if (src == MLX4_IB_RWQ_SRC)
mlx4_ib_release_wqn(to_mucontext(pd->uobject->context),
qp, 0);
mlx4_ib_release_wqn(context, qp, 0);
else
mlx4_qp_release_range(dev->dev, qpn, 1);
}
@@ -1191,7 +1197,7 @@ err_proxy:
err_wrid:
if (udata) {
if (qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
mlx4_ib_db_unmap_user(context, &qp->db);
} else {
kvfree(qp->sq.wrid);
kvfree(qp->rq.wrid);
@@ -1938,7 +1944,8 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
* Go over all RSS QP's childes (WQs) and apply their HW state according to
* their logic state if the RSS QP is the first RSS QP associated for the WQ.
*/
static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num,
struct ib_udata *udata)
{
int err = 0;
int i;
@@ -1962,7 +1969,7 @@ static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
}
wq->port = port_num;
if ((wq->rss_usecnt == 0) && (ibwq->state == IB_WQS_RDY)) {
err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY);
err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY, udata);
if (err) {
mutex_unlock(&wq->mutex);
break;
@@ -1984,7 +1991,8 @@ static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
if ((wq->rss_usecnt == 1) &&
(ibwq->state == IB_WQS_RDY))
if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET))
if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET,
udata))
pr_warn("failed to reverse WQN=0x%06x\n",
ibwq->wq_num);
wq->rss_usecnt--;
@@ -1996,7 +2004,8 @@ static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
return err;
}
static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl)
static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl,
struct ib_udata *udata)
{
int i;
@@ -2007,7 +2016,7 @@ static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl)
mutex_lock(&wq->mutex);
if ((wq->rss_usecnt == 1) && (ibwq->state == IB_WQS_RDY))
if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET))
if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET, udata))
pr_warn("failed to reverse WQN=%x\n",
ibwq->wq_num);
wq->rss_usecnt--;
@@ -2039,9 +2048,10 @@ static void fill_qp_rss_context(struct mlx4_qp_context *context,
static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
enum ib_qp_state cur_state,
enum ib_qp_state new_state,
struct ib_udata *udata)
{
struct ib_uobject *ibuobject;
struct ib_srq *ibsrq;
const struct ib_gid_attr *gid_attr = NULL;
struct ib_rwq_ind_table *rwq_ind_tbl;
@@ -2050,6 +2060,8 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
struct mlx4_ib_qp *qp;
struct mlx4_ib_pd *pd;
struct mlx4_ib_cq *send_cq, *recv_cq;
struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx4_ib_ucontext, ibucontext);
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
@@ -2061,7 +2073,6 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
struct ib_wq *ibwq;
ibwq = (struct ib_wq *)src;
ibuobject = ibwq->uobject;
ibsrq = NULL;
rwq_ind_tbl = NULL;
qp_type = IB_QPT_RAW_PACKET;
@@ -2072,7 +2083,6 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
struct ib_qp *ibqp;
ibqp = (struct ib_qp *)src;
ibuobject = ibqp->uobject;
ibsrq = ibqp->srq;
rwq_ind_tbl = ibqp->rwq_ind_tbl;
qp_type = ibqp->qp_type;
@@ -2157,11 +2167,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
context->param3 |= cpu_to_be32(1 << 30);
}
if (ibuobject)
if (ucontext)
context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev,
to_mucontext(ibuobject->context)
->uar.index));
mlx4_to_hw_uar_index(dev->dev, ucontext->uar.index));
else
context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
@@ -2293,7 +2301,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
/* Set "fast registration enabled" for all kernel QPs */
if (!ibuobject)
if (!ucontext)
context->params1 |= cpu_to_be32(1 << 11);
if (attr_mask & IB_QP_RNR_RETRY) {
@@ -2430,7 +2438,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
else
sqd_event = 0;
if (!ibuobject &&
if (!ucontext &&
cur_state == IB_QPS_RESET &&
new_state == IB_QPS_INIT)
context->rlkey_roce_mode |= (1 << 4);
@@ -2441,7 +2449,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
* headroom is stamped so that the hardware doesn't start
* processing stale work requests.
*/
if (!ibuobject &&
if (!ucontext &&
cur_state == IB_QPS_RESET &&
new_state == IB_QPS_INIT) {
struct mlx4_wqe_ctrl_seg *ctrl;
@@ -2505,7 +2513,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET) {
if (!ibuobject) {
if (!ucontext) {
mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
ibsrq ? to_msrq(ibsrq) : NULL);
if (send_cq != recv_cq)
@@ -2731,16 +2739,17 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if (ibqp->rwq_ind_tbl && (new_state == IB_QPS_INIT)) {
err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num);
err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num,
udata);
if (err)
goto out;
}
err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask,
cur_state, new_state);
cur_state, new_state, udata);
if (ibqp->rwq_ind_tbl && err)
bring_down_rss_rwqs(ibqp->rwq_ind_tbl);
bring_down_rss_rwqs(ibqp->rwq_ind_tbl, udata);
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
attr->port_num = 1;
@@ -4118,7 +4127,8 @@ static int ib_wq2qp_state(enum ib_wq_state state)
}
}
static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state)
static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state,
struct ib_udata *udata)
{
struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
enum ib_qp_state qp_cur_state;
@@ -4142,7 +4152,8 @@ static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state)
attr_mask = IB_QP_PORT;
err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, &attr,
attr_mask, IB_QPS_RESET, IB_QPS_INIT);
attr_mask, IB_QPS_RESET, IB_QPS_INIT,
udata);
if (err) {
pr_debug("WQN=0x%06x failed to apply RST->INIT on the HW QP\n",
ibwq->wq_num);
@@ -4154,12 +4165,13 @@ static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state)
attr_mask = 0;
err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, attr_mask,
qp_cur_state, qp_new_state);
qp_cur_state, qp_new_state, udata);
if (err && (qp_cur_state == IB_QPS_INIT)) {
qp_new_state = IB_QPS_RESET;
if (__mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL,
attr_mask, IB_QPS_INIT, IB_QPS_RESET)) {
attr_mask, IB_QPS_INIT, IB_QPS_RESET,
udata)) {
pr_warn("WQN=0x%06x failed with reverting HW's resources failure\n",
ibwq->wq_num);
qp_new_state = IB_QPS_INIT;
@@ -4222,7 +4234,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
* WQ, so we can apply its port on the WQ.
*/
if (qp->rss_usecnt)
err = _mlx4_ib_modify_wq(ibwq, new_state);
err = _mlx4_ib_modify_wq(ibwq, new_state, udata);
if (!err)
ibwq->state = new_state;

View File

@@ -37,6 +37,7 @@
#include "mlx4_ib.h"
#include <rdma/mlx4-abi.h>
#include <rdma/uverbs_ioctl.h>
static void *get_wqe(struct mlx4_ib_srq *srq, int n)
{
@@ -73,6 +74,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx4_ib_ucontext, ibucontext);
struct mlx4_ib_srq *srq;
struct mlx4_wqe_srq_next_seg *next;
struct mlx4_wqe_data_seg *scatter;
@@ -113,8 +116,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
goto err_srq;
}
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
buf_size, 0, 0);
srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
if (IS_ERR(srq->umem)) {
err = PTR_ERR(srq->umem);
goto err_srq;
@@ -129,8 +131,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &srq->db);
err = mlx4_ib_db_map_user(ucontext, udata, ucmd.db_addr,
&srq->db);
if (err)
goto err_mtt;
} else {
@@ -203,7 +205,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
err_wrid:
if (udata)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
mlx4_ib_db_unmap_user(ucontext, &srq->db);
else
kvfree(srq->wrid);

View File

@@ -1,7 +1,6 @@
config MLX5_INFINIBAND
tristate "Mellanox 5th generation network adapters (ConnectX series) support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n
---help---
This driver provides low-level InfiniBand support for
Mellanox Connect-IB PCI Express host channel adapters (HCAs).

View File

@@ -389,19 +389,19 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
dev->port[port_num].dbg_cc_params = NULL;
}
int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
{
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
struct mlx5_core_dev *mdev;
int i;
if (!mlx5_debugfs_root)
goto out;
return;
/* Takes a 1-based port number */
mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
if (!mdev)
goto out;
return;
if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
!MLX5_CAP_GEN(mdev, cc_modify_allowed))
@@ -415,8 +415,6 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
dbg_cc_params->root = debugfs_create_dir("cc_params",
mdev->priv.dbg_root);
if (!dbg_cc_params->root)
goto err;
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
dbg_cc_params->params[i].offset = i;
@@ -427,14 +425,11 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
0600, dbg_cc_params->root,
&dbg_cc_params->params[i],
&dbg_cc_fops);
if (!dbg_cc_params->params[i].dentry)
goto err;
}
put_mdev:
mlx5_ib_put_native_port_mdev(dev, port_num + 1);
out:
return 0;
return;
err:
mlx5_ib_warn(dev, "cong debugfs failure\n");
@@ -445,5 +440,5 @@ err:
* We don't want to fail driver if debugfs failed to initialize,
* so we are not forwarding error to the user.
*/
return 0;
return;
}

View File

@@ -187,8 +187,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wqe_ctr = be16_to_cpu(cqe->wqe_counter);
wc->wr_id = srq->wrid[wqe_ctr];
mlx5_ib_free_srq_wqe(srq, wqe_ctr);
if (msrq && atomic_dec_and_test(&msrq->refcount))
complete(&msrq->free);
if (msrq)
mlx5_core_res_put(&msrq->common);
}
} else {
wq = &qp->rq;
@@ -707,15 +707,15 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
*cqe_size = ucmd.cqe_size;
cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
entries * ucmd.cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
cq->buf.umem =
ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(cq->buf.umem)) {
err = PTR_ERR(cq->buf.umem);
return err;
}
err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr,
&cq->db);
if (err)
goto err_umem;
@@ -1111,7 +1111,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
struct ib_umem *umem;
int err;
int npages;
struct ib_ucontext *context = cq->buf.umem->context;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err)
@@ -1124,7 +1123,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
return -EINVAL;
umem = ib_umem_get(context, ucmd.buf_addr,
umem = ib_umem_get(udata, ucmd.buf_addr,
(size_t)ucmd.cqe_size * entries,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(umem)) {

View File

@@ -8,6 +8,7 @@
#include <rdma/uverbs_types.h>
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
@@ -17,12 +18,28 @@
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
enum devx_obj_flags {
DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
};
struct devx_async_data {
struct mlx5_ib_dev *mdev;
struct list_head list;
struct ib_uobject *fd_uobj;
struct mlx5_async_work cb_work;
u16 cmd_out_len;
/* must be last field in this structure */
struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
};
#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
struct devx_obj {
struct mlx5_core_dev *mdev;
u64 obj_id;
u32 dinlen; /* destroy inbox length */
u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
u32 flags;
struct mlx5_ib_devx_mr devx_mr;
};
struct devx_umem {
@@ -1011,6 +1028,92 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
}
}
static int devx_handle_mkey_indirect(struct devx_obj *obj,
struct mlx5_ib_dev *dev,
void *in, void *out)
{
struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
unsigned long flags;
struct mlx5_core_mkey *mkey;
void *mkc;
u8 key;
int err;
mkey = &devx_mr->mmkey;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
key = MLX5_GET(mkc, mkc, mkey_7_0);
mkey->key = mlx5_idx_to_mkey(
MLX5_GET(create_mkey_out, out, mkey_index)) | key;
mkey->type = MLX5_MKEY_INDIRECT_DEVX;
mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
mkey->size = MLX5_GET64(mkc, mkc, len);
mkey->pd = MLX5_GET(mkc, mkc, pd);
devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
write_lock_irqsave(&table->lock, flags);
err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key),
mkey);
write_unlock_irqrestore(&table->lock, flags);
return err;
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
struct devx_obj *obj,
void *in, int in_len)
{
int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
MLX5_FLD_SZ_BYTES(create_mkey_in,
memory_key_mkey_entry);
void *mkc;
u8 access_mode;
if (in_len < min_len)
return -EINVAL;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
access_mode = MLX5_GET(mkc, mkc, access_mode_1_0);
access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
return 0;
}
MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
return 0;
}
static void devx_free_indirect_mkey(struct rcu_head *rcu)
{
kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
}
/* This function to delete from the radix tree needs to be called before
* destroying the underlying mkey. Otherwise a race might occur in case that
* other thread will get the same mkey before this one will be deleted,
* in that case it will fail via inserting to the tree its own data.
*
* Note:
* An error in the destroy is not expected unless there is some other indirect
* mkey which points to this one. In a kernel cleanup flow it will be just
* destroyed in the iterative destruction call. In a user flow, in case
* the application didn't close in the expected order it's its own problem,
* the mkey won't be part of the tree, in both cases the kernel is safe.
*/
static void devx_cleanup_mkey(struct devx_obj *obj)
{
struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table;
unsigned long flags;
write_lock_irqsave(&table->lock, flags);
radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key));
write_unlock_irqrestore(&table->lock, flags);
}
static int devx_obj_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
@@ -1018,10 +1121,21 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
struct devx_obj *obj = uobject->object;
int ret;
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
devx_cleanup_mkey(obj);
ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
struct mlx5_ib_dev *dev = to_mdev(uobject->context->device);
call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
devx_free_indirect_mkey);
return ret;
}
kfree(obj);
return ret;
}
@@ -1032,10 +1146,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
int cmd_in_len = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
void *cmd_out;
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
struct devx_obj *obj;
@@ -1060,10 +1177,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
return -ENOMEM;
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
devx_set_umem_valid(cmd_in);
if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
if (err)
goto obj_free;
} else {
devx_set_umem_valid(cmd_in);
}
err = mlx5_cmd_exec(dev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
cmd_in_len,
cmd_out, cmd_out_len);
if (err)
goto obj_free;
@@ -1074,13 +1197,22 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
if (err)
goto obj_destroy;
}
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
goto obj_destroy;
goto err_copy;
obj->obj_id = get_enc_obj_id(opcode, obj_id);
return 0;
err_copy:
if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
devx_cleanup_mkey(obj);
obj_destroy:
mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
obj_free:
@@ -1096,8 +1228,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
void *cmd_out;
int err;
int uid;
@@ -1137,11 +1270,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
void *cmd_out;
int err;
int uid;
struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
@@ -1168,6 +1302,154 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
cmd_out, cmd_out_len);
}
struct devx_async_event_queue {
spinlock_t lock;
wait_queue_head_t poll_wait;
struct list_head event_list;
atomic_t bytes_in_use;
u8 is_destroyed:1;
};
struct devx_async_cmd_event_file {
struct ib_uobject uobj;
struct devx_async_event_queue ev_queue;
struct mlx5_async_ctx async_ctx;
};
static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
{
spin_lock_init(&ev_queue->lock);
INIT_LIST_HEAD(&ev_queue->event_list);
init_waitqueue_head(&ev_queue->poll_wait);
atomic_set(&ev_queue->bytes_in_use, 0);
ev_queue->is_destroyed = 0;
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
struct uverbs_attr_bundle *attrs)
{
struct devx_async_cmd_event_file *ev_file;
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
ev_file = container_of(uobj, struct devx_async_cmd_event_file,
uobj);
devx_init_event_queue(&ev_file->ev_queue);
mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
return 0;
}
static void devx_query_callback(int status, struct mlx5_async_work *context)
{
struct devx_async_data *async_data =
container_of(context, struct devx_async_data, cb_work);
struct ib_uobject *fd_uobj = async_data->fd_uobj;
struct devx_async_cmd_event_file *ev_file;
struct devx_async_event_queue *ev_queue;
unsigned long flags;
ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
uobj);
ev_queue = &ev_file->ev_queue;
spin_lock_irqsave(&ev_queue->lock, flags);
list_add_tail(&async_data->list, &ev_queue->event_list);
spin_unlock_irqrestore(&ev_queue->lock, flags);
wake_up_interruptible(&ev_queue->poll_wait);
fput(fd_uobj->object);
}
#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
u16 cmd_out_len;
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct ib_uobject *fd_uobj;
int err;
int uid;
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
struct devx_async_cmd_event_file *ev_file;
struct devx_async_data *async_data;
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
if (!devx_is_obj_query_cmd(cmd_in))
return -EINVAL;
err = uverbs_get_const(&cmd_out_len, attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
if (err)
return err;
if (!devx_is_valid_obj_id(uobj, cmd_in))
return -EINVAL;
fd_uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
if (IS_ERR(fd_uobj))
return PTR_ERR(fd_uobj);
ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
uobj);
if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
MAX_ASYNC_BYTES_IN_USE) {
atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
return -EAGAIN;
}
async_data = kvzalloc(struct_size(async_data, hdr.out_data,
cmd_out_len), GFP_KERNEL);
if (!async_data) {
err = -ENOMEM;
goto sub_bytes;
}
err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
if (err)
goto free_async;
async_data->cmd_out_len = cmd_out_len;
async_data->mdev = mdev;
async_data->fd_uobj = fd_uobj;
get_file(fd_uobj->object);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
async_data->hdr.out_data,
async_data->cmd_out_len,
devx_query_callback, &async_data->cb_work);
if (err)
goto cb_err;
return 0;
cb_err:
fput(fd_uobj->object);
free_async:
kvfree(async_data);
sub_bytes:
atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
return err;
}
static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
struct uverbs_attr_bundle *attrs,
struct devx_umem *obj)
@@ -1195,7 +1477,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
if (err)
return err;
obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0);
if (IS_ERR(obj->umem))
return PTR_ERR(obj->umem);
@@ -1252,7 +1534,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
u32 obj_id;
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
int err;
@@ -1313,6 +1596,123 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
return 0;
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
struct devx_async_data *event;
int ret = 0;
size_t eventsz;
spin_lock_irq(&ev_queue->lock);
while (list_empty(&ev_queue->event_list)) {
spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
if (wait_event_interruptible(
ev_queue->poll_wait,
(!list_empty(&ev_queue->event_list) ||
ev_queue->is_destroyed))) {
return -ERESTARTSYS;
}
if (list_empty(&ev_queue->event_list) &&
ev_queue->is_destroyed)
return -EIO;
spin_lock_irq(&ev_queue->lock);
}
event = list_entry(ev_queue->event_list.next,
struct devx_async_data, list);
eventsz = event->cmd_out_len +
sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
if (eventsz > count) {
spin_unlock_irq(&ev_queue->lock);
return -ENOSPC;
}
list_del(ev_queue->event_list.next);
spin_unlock_irq(&ev_queue->lock);
if (copy_to_user(buf, &event->hdr, eventsz))
ret = -EFAULT;
else
ret = eventsz;
atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
kvfree(event);
return ret;
}
static int devx_async_cmd_event_close(struct inode *inode, struct file *filp)
{
struct ib_uobject *uobj = filp->private_data;
struct devx_async_cmd_event_file *comp_ev_file = container_of(
uobj, struct devx_async_cmd_event_file, uobj);
struct devx_async_data *entry, *tmp;
spin_lock_irq(&comp_ev_file->ev_queue.lock);
list_for_each_entry_safe(entry, tmp,
&comp_ev_file->ev_queue.event_list, list)
kvfree(entry);
spin_unlock_irq(&comp_ev_file->ev_queue.lock);
uverbs_close_fd(filp);
return 0;
}
static __poll_t devx_async_cmd_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
__poll_t pollflags = 0;
poll_wait(filp, &ev_queue->poll_wait, wait);
spin_lock_irq(&ev_queue->lock);
if (ev_queue->is_destroyed)
pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
else if (!list_empty(&ev_queue->event_list))
pollflags = EPOLLIN | EPOLLRDNORM;
spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
const struct file_operations devx_async_cmd_event_fops = {
.owner = THIS_MODULE,
.read = devx_async_cmd_event_read,
.poll = devx_async_cmd_event_poll,
.release = devx_async_cmd_event_close,
.llseek = no_llseek,
};
static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
struct devx_async_cmd_event_file *comp_ev_file =
container_of(uobj, struct devx_async_cmd_event_file,
uobj);
struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
spin_lock_irq(&ev_queue->lock);
ev_queue->is_destroyed = 1;
spin_unlock_irq(&ev_queue->lock);
if (why == RDMA_REMOVE_DRIVER_REMOVE)
wake_up_interruptible(&ev_queue->poll_wait);
mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
return 0;
};
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_UMEM_REG,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
@@ -1423,6 +1823,27 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
UA_MANDATORY));
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
UVERBS_IDR_ANY_OBJECT,
UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
UA_MANDATORY,
UA_ALLOC_AND_COPY),
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
u16, UA_MANDATORY),
UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY));
DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
@@ -1433,13 +1854,30 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UVERBS_ACCESS_NEW,
UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
devx_hot_unplug_async_cmd_event_file,
&devx_async_cmd_event_fops, "[devx_async_cmd]",
O_RDONLY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
static bool devx_is_supported(struct ib_device *device)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1457,5 +1895,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_DEVX_UMEM,
UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
{},
};

View File

@@ -43,7 +43,8 @@ struct mlx5_ib_user_db_page {
int refcnt;
};
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct mlx5_db *db)
{
struct mlx5_ib_user_db_page *page;
@@ -63,8 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
PAGE_SIZE, 0, 0);
page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);

Some files were not shown because too many files have changed in this diff Show More