Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a smaller cycle with many of the commits being smallish
  code fixes and improvements across the drivers.

   - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and
     rxe

   - Memory window support in hns

   - mlx5 user API 'flow mutate/steering' allows accessing the full
     packet mangling and matching machinery from user space

   - Support inter-working with verbs API calls in the 'devx' mlx5 user
     API, and provide options to use devx with less privilege

   - Modernize the use of syfs and the device interface to use attribute
     groups and cdev properly for uverbs, and clean up some of the core
     code's device list management

   - More progress on net namespaces for RDMA devices

   - Consolidate driver BAR mmapping support into core code helpers and
     rework how RDMA holds poitners to mm_struct for get_user_pages
     cases

   - First pass to use 'dev_name' instead of ib_device->name

   - Device renaming for RDMA devices"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (242 commits)
  IB/mlx5: Add support for extended atomic operations
  RDMA/core: Fix comment for hw stats init for port == 0
  RDMA/core: Refactor ib_register_device() function
  RDMA/core: Fix unwinding flow in case of error to register device
  ib_srp: Remove WARN_ON in srp_terminate_io()
  IB/mlx5: Allow scatter to CQE without global signaled WRs
  IB/mlx5: Verify that driver supports user flags
  IB/mlx5: Support scatter to CQE for DC transport type
  RDMA/drivers: Use core provided API for registering device attributes
  RDMA/core: Allow existing drivers to set one sysfs group per device
  IB/rxe: Remove unnecessary enum values
  RDMA/umad: Use kernel API to allocate umad indexes
  RDMA/uverbs: Use kernel API to allocate uverbs indexes
  RDMA/core: Increase total number of RDMA ports across all devices
  IB/mlx4: Add port and TID to MAD debug print
  IB/mlx4: Enable debug print of SMPs
  RDMA/core: Rename ports_parent to ports_kobj
  RDMA/core: Do not expose unsupported counters
  IB/mlx4: Refer to the device kobject instead of ports_parent
  RDMA/nldev: Allow IB device rename through RDMA netlink
  ...
This commit is contained in:
Linus Torvalds
2018-10-26 07:38:19 -07:00
204 changed files with 7633 additions and 5205 deletions

View File

@@ -69,8 +69,11 @@
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
struct ib_umem_odp;
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
extern struct workqueue_struct *ib_comp_unbound_wq;
union ib_gid {
u8 raw[16];
@@ -1137,7 +1140,9 @@ enum ib_qp_create_flags {
*/
struct ib_qp_init_attr {
/* Consumer's event_handler callback must not block */
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
@@ -1146,7 +1151,7 @@ struct ib_qp_init_attr {
struct ib_qp_cap cap;
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
enum ib_qp_create_flags create_flags;
u32 create_flags;
/*
* Only needed for special QP types, or when using the RW API.
@@ -1278,21 +1283,27 @@ struct ib_qp_attr {
};
enum ib_wr_opcode {
IB_WR_RDMA_WRITE,
IB_WR_RDMA_WRITE_WITH_IMM,
IB_WR_SEND,
IB_WR_SEND_WITH_IMM,
IB_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD,
IB_WR_LSO,
IB_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV,
IB_WR_LOCAL_INV,
IB_WR_REG_MR,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
/* These are shared with userspace */
IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE,
IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM,
IB_WR_SEND = IB_UVERBS_WR_SEND,
IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM,
IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
IB_WR_LSO = IB_UVERBS_WR_TSO,
IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP =
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
/* These are kernel only and can not be issued by userspace */
IB_WR_REG_MR = 0x20,
IB_WR_REG_SIG_MR,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
*/
@@ -1485,26 +1496,15 @@ struct ib_ucontext {
* it is set when we are closing the file descriptor and indicates
* that mm_sem may be locked.
*/
int closing;
bool closing;
bool cleanup_retryable;
struct pid *tgid;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct rb_root_cached umem_tree;
/*
* Protects .umem_rbroot and tree, as well as odp_mrs_count and
* mmu notifiers registration.
*/
struct rw_semaphore umem_rwsem;
void (*invalidate_range)(struct ib_umem *umem,
void (*invalidate_range)(struct ib_umem_odp *umem_odp,
unsigned long start, unsigned long end);
struct mmu_notifier mn;
atomic_t notifier_count;
/* A list of umems that don't have private mmu notifier counters yet. */
struct list_head no_private_counters;
int odp_mrs_count;
struct mutex per_mm_list_lock;
struct list_head per_mm_list;
#endif
struct ib_rdmacg_object cg_obj;
@@ -1570,9 +1570,10 @@ struct ib_ah {
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
enum ib_poll_context {
IB_POLL_DIRECT, /* caller context, no hw completions */
IB_POLL_SOFTIRQ, /* poll from softirq context */
IB_POLL_WORKQUEUE, /* poll from workqueue */
IB_POLL_DIRECT, /* caller context, no hw completions */
IB_POLL_SOFTIRQ, /* poll from softirq context */
IB_POLL_WORKQUEUE, /* poll from workqueue */
IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
};
struct ib_cq {
@@ -1589,6 +1590,7 @@ struct ib_cq {
struct irq_poll iop;
struct work_struct work;
};
struct workqueue_struct *comp_wq;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -2263,10 +2265,11 @@ struct ib_device {
struct list_head event_handler_list;
spinlock_t event_handler_lock;
spinlock_t client_data_lock;
rwlock_t client_data_lock;
struct list_head core_list;
/* Access to the client_data_list is protected by the client_data_lock
* spinlock and the lists_rwsem read-write semaphore */
* rwlock and the lists_rwsem read-write semaphore
*/
struct list_head client_data_list;
struct ib_cache cache;
@@ -2550,7 +2553,13 @@ struct ib_device {
struct module *owner;
struct device dev;
struct kobject *ports_parent;
/* First group for device attributes,
* Second group for driver provided attributes (optional).
* It is NULL terminated array.
*/
const struct attribute_group *groups[3];
struct kobject *ports_kobj;
struct list_head port_list;
enum {
@@ -2633,9 +2642,9 @@ void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
int ib_register_device(struct ib_device *device, const char *name,
int (*port_callback)(struct ib_device *, u8,
struct kobject *));
void ib_unregister_device(struct ib_device *device);
int ib_register_client (struct ib_client *client);
@@ -2645,6 +2654,28 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
unsigned long pfn, unsigned long size, pgprot_t prot);
int rdma_user_mmap_page(struct ib_ucontext *ucontext,
struct vm_area_struct *vma, struct page *page,
unsigned long size);
#else
static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
struct vm_area_struct *vma,
unsigned long pfn, unsigned long size,
pgprot_t prot)
{
return -EINVAL;
}
static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext,
struct vm_area_struct *vma, struct page *page,
unsigned long size)
{
return -EINVAL;
}
#endif
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
@@ -2728,7 +2759,6 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
* @next_state: Next QP state
* @type: QP type
* @mask: Mask of supplied QP attributes
* @ll : link layer of port
*
* This function is a helper function that a low-level driver's
* modify_qp method can use to validate the consumer's input. It
@@ -2737,8 +2767,7 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
* and that the attribute mask supplied is allowed for the transition.
*/
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
enum ib_qp_type type, enum ib_qp_attr_mask mask,
enum rdma_link_layer ll);
enum ib_qp_type type, enum ib_qp_attr_mask mask);
void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -4167,20 +4196,6 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
}
static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
struct ib_qp *qp, struct ib_device *device)
{
uobj->object = ibflow;
ibflow->uobject = uobj;
if (qp) {
atomic_inc(&qp->usecnt);
ibflow->qp = qp;
}
ibflow->device = device;
}
/**
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
@@ -4205,4 +4220,26 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num,
void (*setup)(struct net_device *),
struct net_device *netdev);
/**
* rdma_set_device_sysfs_group - Set device attributes group to have
* driver specific sysfs entries at
* for infiniband class.
*
* @device: device pointer for which attributes to be created
* @group: Pointer to group which should be added when device
* is registered with sysfs.
* rdma_set_device_sysfs_group() allows existing drivers to expose one
* group per device to have sysfs attributes.
*
* NOTE: New drivers should not make use of this API; instead new device
* parameter should be exposed via netlink command. This API and mechanism
* exist only for existing drivers.
*/
static inline void
rdma_set_device_sysfs_group(struct ib_device *dev,
const struct attribute_group *group)
{
dev->groups[1] = group;
}
#endif /* IB_VERBS_H */