{net,IB}/mlx5: Refactor page fault handling

* Update page fault event according to last specification.
* Separate code path for page fault EQ, completion EQ and async EQ.
* Move page fault handling work queue from mlx5_ib static variable
  into mlx5_core page fault EQ.
* Allocate memory to store ODP event dynamically as the
  events arrive, since in atomic context - use mempool.
* Make mlx5_ib page fault handler run in process context.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Artemy Kovalyov
2017-01-02 11:37:46 +02:00
committed by David S. Miller
parent 223cdc7242
commit d9aaed8387
12 changed files with 522 additions and 468 deletions

View File

@@ -534,7 +534,9 @@ struct mlx5_eqe_page_fault {
__be16 wqe_index;
u16 reserved2;
__be16 packet_length;
u8 reserved3[12];
__be32 token;
u8 reserved4[8];
__be32 pftype_wq;
} __packed wqe;
struct {
__be32 r_key;
@@ -542,9 +544,9 @@ struct mlx5_eqe_page_fault {
__be16 packet_length;
__be32 rdma_op_len;
__be64 rdma_va;
__be32 pftype_token;
} __packed rdma;
} __packed;
__be32 flags_qpn;
} __packed;
struct mlx5_eqe_vport_change {

View File

@@ -42,6 +42,7 @@
#include <linux/vmalloc.h>
#include <linux/radix-tree.h>
#include <linux/workqueue.h>
#include <linux/mempool.h>
#include <linux/interrupt.h>
#include <linux/mlx5/device.h>
@@ -83,6 +84,7 @@ enum {
MLX5_EQ_VEC_PAGES = 0,
MLX5_EQ_VEC_CMD = 1,
MLX5_EQ_VEC_ASYNC = 2,
MLX5_EQ_VEC_PFAULT = 3,
MLX5_EQ_VEC_COMP_BASE,
};
@@ -178,6 +180,14 @@ enum mlx5_port_status {
MLX5_PORT_DOWN = 2,
};
enum mlx5_eq_type {
MLX5_EQ_TYPE_COMP,
MLX5_EQ_TYPE_ASYNC,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
MLX5_EQ_TYPE_PF,
#endif
};
struct mlx5_uuar_info {
struct mlx5_uar *uars;
int num_uars;
@@ -333,6 +343,14 @@ struct mlx5_eq_tasklet {
spinlock_t lock;
};
struct mlx5_eq_pagefault {
struct work_struct work;
/* Pagefaults lock */
spinlock_t lock;
struct workqueue_struct *wq;
mempool_t *pool;
};
struct mlx5_eq {
struct mlx5_core_dev *dev;
__be32 __iomem *doorbell;
@@ -346,7 +364,13 @@ struct mlx5_eq {
struct list_head list;
int index;
struct mlx5_rsc_debug *dbg;
struct mlx5_eq_tasklet tasklet_ctx;
enum mlx5_eq_type type;
union {
struct mlx5_eq_tasklet tasklet_ctx;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct mlx5_eq_pagefault pf_ctx;
#endif
};
};
struct mlx5_core_psv {
@@ -377,6 +401,8 @@ struct mlx5_core_mkey {
u32 pd;
};
#define MLX5_24BIT_MASK ((1 << 24) - 1)
enum mlx5_res_type {
MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
@@ -411,6 +437,9 @@ struct mlx5_eq_table {
struct mlx5_eq pages_eq;
struct mlx5_eq async_eq;
struct mlx5_eq cmd_eq;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct mlx5_eq pfault_eq;
#endif
int num_comp_vectors;
/* protect EQs list
*/
@@ -497,6 +526,7 @@ struct mlx5_fc_stats {
struct mlx5_eswitch;
struct mlx5_lag;
struct mlx5_pagefault;
struct mlx5_rl_entry {
u32 rate;
@@ -601,6 +631,14 @@ struct mlx5_priv {
struct mlx5_rl_table rl_table;
struct mlx5_port_module_event_stats pme_stats;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void (*pfault)(struct mlx5_core_dev *dev,
void *context,
struct mlx5_pagefault *pfault);
void *pfault_ctx;
struct srcu_struct pfault_srcu;
#endif
};
enum mlx5_device_state {
@@ -619,6 +657,50 @@ enum mlx5_pci_status {
MLX5_PCI_STATUS_ENABLED,
};
enum mlx5_pagefault_type_flags {
MLX5_PFAULT_REQUESTOR = 1 << 0,
MLX5_PFAULT_WRITE = 1 << 1,
MLX5_PFAULT_RDMA = 1 << 2,
};
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
u32 token;
u8 event_subtype;
u8 type;
union {
/* Initiator or send message responder pagefault details. */
struct {
/* Received packet size, only valid for responders. */
u32 packet_size;
/*
* Number of resource holding WQE, depends on type.
*/
u32 wq_num;
/*
* WQE index. Refers to either the send queue or
* receive queue, according to event_subtype.
*/
u16 wqe_index;
} wqe;
/* RDMA responder pagefault details */
struct {
u32 r_key;
/*
* Received packet size, minimal size page fault
* resolution required for forward progress.
*/
u32 packet_size;
u32 rdma_op_len;
u64 rdma_va;
} rdma;
};
struct mlx5_eq *eq;
struct work_struct work;
};
struct mlx5_td {
struct list_head tirs_list;
u32 tdn;
@@ -879,15 +961,13 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
int nent, u64 mask, const char *name, struct mlx5_uar *uar);
int nent, u64 mask, const char *name,
struct mlx5_uar *uar, enum mlx5_eq_type type);
int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
int mlx5_start_eqs(struct mlx5_core_dev *dev);
int mlx5_stop_eqs(struct mlx5_core_dev *dev);
@@ -926,6 +1006,10 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
struct mlx5_odp_caps *odp_caps);
int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
u8 port_num, void *out, size_t sz);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
u32 wq_num, u8 type, int error);
#endif
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
@@ -974,6 +1058,9 @@ struct mlx5_interface {
void (*detach)(struct mlx5_core_dev *dev, void *context);
void (*event)(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param);
void (*pfault)(struct mlx5_core_dev *dev,
void *context,
struct mlx5_pagefault *pfault);
void * (*get_dev)(void *context);
int protocol;
struct list_head list;

View File

@@ -50,9 +50,6 @@
#define MLX5_BSF_APPTAG_ESCAPE 0x1
#define MLX5_BSF_APPREF_ESCAPE 0x2
#define MLX5_QPN_BITS 24
#define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1)
enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX5_QP_OPTPAR_RRE = 1 << 1,
@@ -418,46 +415,9 @@ struct mlx5_stride_block_ctrl_seg {
__be16 num_entries;
};
enum mlx5_pagefault_flags {
MLX5_PFAULT_REQUESTOR = 1 << 0,
MLX5_PFAULT_WRITE = 1 << 1,
MLX5_PFAULT_RDMA = 1 << 2,
};
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
u8 event_subtype;
enum mlx5_pagefault_flags flags;
union {
/* Initiator or send message responder pagefault details. */
struct {
/* Received packet size, only valid for responders. */
u32 packet_size;
/*
* WQE index. Refers to either the send queue or
* receive queue, according to event_subtype.
*/
u16 wqe_index;
} wqe;
/* RDMA responder pagefault details */
struct {
u32 r_key;
/*
* Received packet size, minimal size page fault
* resolution required for forward progress.
*/
u32 packet_size;
u32 rdma_op_len;
u64 rdma_va;
} rdma;
};
};
struct mlx5_core_qp {
struct mlx5_core_rsc_common common; /* must be first */
void (*event) (struct mlx5_core_qp *, int);
void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
int qpn;
struct mlx5_rsc_debug *dbg;
int pid;
@@ -557,10 +517,6 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
u8 context, int error);
#endif
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *rq);
void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,