Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull inifiniband/rdma updates from Doug Ledford:
 "This is a fairly sizeable set of changes.  I've put them through a
  decent amount of testing prior to sending the pull request due to
  that.

  There are still a few fixups that I know are coming, but I wanted to
  go ahead and get the big, sizable chunk into your hands sooner rather
  than waiting for those last few fixups.

  Of note is the fact that this creates what is intended to be a
  temporary area in the drivers/staging tree specifically for some
  cleanups and additions that are coming for the RDMA stack.  We
  deprecated two drivers (ipath and amso1100) and are waiting to hear
  back if we can deprecate another one (ehca).  We also put Intel's new
  hfi1 driver into this area because it needs to be refactored and a
  transfer library created out of the factored out code, and then it and
  the qib driver and the soft-roce driver should all be modified to use
  that library.

  I expect drivers/staging/rdma to be around for three or four kernel
  releases and then to go away as all of the work is completed and final
  deletions of deprecated drivers are done.

  Summary of changes for 4.3:

   - Create drivers/staging/rdma
   - Move amso1100 driver to staging/rdma and schedule for deletion
   - Move ipath driver to staging/rdma and schedule for deletion
   - Add hfi1 driver to staging/rdma and set TODO for move to regular
     tree
   - Initial support for namespaces to be used on RDMA devices
   - Add RoCE GID table handling to the RDMA core caching code
   - Infrastructure to support handling of devices with differing read
     and write scatter gather capabilities
   - Various iSER updates
   - Kill off unsafe usage of global mr registrations
   - Update SRP driver
   - Misc  mlx4 driver updates
   - Support for the mr_alloc verb
   - Support for a netlink interface between kernel and user space cache
     daemon to speed path record queries and route resolution
   - Ininitial support for safe hot removal of verbs devices"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (136 commits)
  IB/ipoib: Suppress warning for send only join failures
  IB/ipoib: Clean up send-only multicast joins
  IB/srp: Fix possible protection fault
  IB/core: Move SM class defines from ib_mad.h to ib_smi.h
  IB/core: Remove unnecessary defines from ib_mad.h
  IB/hfi1: Add PSM2 user space header to header_install
  IB/hfi1: Add CSRs for CONFIG_SDMA_VERBOSITY
  mlx5: Fix incorrect wc pkey_index assignment for GSI messages
  IB/mlx5: avoid destroying a NULL mr in reg_user_mr error flow
  IB/uverbs: reject invalid or unknown opcodes
  IB/cxgb4: Fix if statement in pick_local_ip6adddrs
  IB/sa: Fix rdma netlink message flags
  IB/ucma: HW Device hot-removal support
  IB/mlx4_ib: Disassociate support
  IB/uverbs: Enable device removal when there are active user space applications
  IB/uverbs: Explicitly pass ib_dev to uverbs commands
  IB/uverbs: Fix race between ib_uverbs_open and remove_one
  IB/uverbs: Fix reference counting usage of event files
  IB/core: Make ib_dealloc_pd return void
  IB/srp: Create an insecure all physical rkey only if needed
  ...
This commit is contained in:
Linus Torvalds
2015-09-09 08:33:31 -07:00
231 changed files with 64456 additions and 2756 deletions

View File

@@ -55,10 +55,8 @@ config INFINIBAND_ADDR_TRANS
default y
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ipath/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/ehca/Kconfig"
source "drivers/infiniband/hw/amso1100/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"

View File

@@ -9,7 +9,8 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o

View File

@@ -37,6 +37,8 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/netdevice.h>
#include <net/addrconf.h>
#include <rdma/ib_cache.h>
@@ -47,76 +49,621 @@ struct ib_pkey_cache {
u16 table[0];
};
struct ib_gid_cache {
int table_len;
union ib_gid table[0];
};
struct ib_update_work {
struct work_struct work;
struct ib_device *device;
u8 port_num;
};
union ib_gid zgid;
EXPORT_SYMBOL(zgid);
static const struct ib_gid_attr zattr;
enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2,
};
enum gid_table_entry_props {
GID_TABLE_ENTRY_INVALID = 1UL << 0,
GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
};
enum gid_table_write_action {
GID_TABLE_WRITE_ACTION_ADD,
GID_TABLE_WRITE_ACTION_DEL,
/* MODIFY only updates the GID table. Currently only used by
* ib_cache_update.
*/
GID_TABLE_WRITE_ACTION_MODIFY
};
struct ib_gid_table_entry {
/* This lock protects an entry from being
* read and written simultaneously.
*/
rwlock_t lock;
unsigned long props;
union ib_gid gid;
struct ib_gid_attr attr;
void *context;
};
struct ib_gid_table {
int sz;
/* In RoCE, adding a GID to the table requires:
* (a) Find if this GID is already exists.
* (b) Find a free space.
* (c) Write the new GID
*
* Delete requires different set of operations:
* (a) Find the GID
* (b) Delete it.
*
* Add/delete should be carried out atomically.
* This is done by locking this mutex from multiple
* writers. We don't need this lock for IB, as the MAD
* layer replaces all entries. All data_vec entries
* are locked by this lock.
**/
struct mutex lock;
struct ib_gid_table_entry *data_vec;
};
static int write_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix,
const union ib_gid *gid,
const struct ib_gid_attr *attr,
enum gid_table_write_action action,
bool default_gid)
{
int ret = 0;
struct net_device *old_net_dev;
unsigned long flags;
/* in rdma_cap_roce_gid_table, this funciton should be protected by a
* sleep-able lock.
*/
write_lock_irqsave(&table->data_vec[ix].lock, flags);
if (rdma_cap_roce_gid_table(ib_dev, port)) {
table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
/* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
* RoCE providers and thus only updates the cache.
*/
if (action == GID_TABLE_WRITE_ACTION_ADD)
ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
&table->data_vec[ix].context);
else if (action == GID_TABLE_WRITE_ACTION_DEL)
ret = ib_dev->del_gid(ib_dev, port, ix,
&table->data_vec[ix].context);
write_lock_irqsave(&table->data_vec[ix].lock, flags);
}
old_net_dev = table->data_vec[ix].attr.ndev;
if (old_net_dev && old_net_dev != attr->ndev)
dev_put(old_net_dev);
/* if modify_gid failed, just delete the old gid */
if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
gid = &zgid;
attr = &zattr;
table->data_vec[ix].context = NULL;
}
if (default_gid)
table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
if (table->data_vec[ix].attr.ndev &&
table->data_vec[ix].attr.ndev != old_net_dev)
dev_hold(table->data_vec[ix].attr.ndev);
table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
struct ib_event event;
event.device = ib_dev;
event.element.port_num = port;
event.event = IB_EVENT_GID_CHANGE;
ib_dispatch_event(&event);
}
return ret;
}
static int add_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix,
const union ib_gid *gid,
const struct ib_gid_attr *attr,
bool default_gid) {
return write_gid(ib_dev, port, table, ix, gid, attr,
GID_TABLE_WRITE_ACTION_ADD, default_gid);
}
static int modify_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix,
const union ib_gid *gid,
const struct ib_gid_attr *attr,
bool default_gid) {
return write_gid(ib_dev, port, table, ix, gid, attr,
GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
}
static int del_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix,
bool default_gid) {
return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
GID_TABLE_WRITE_ACTION_DEL, default_gid);
}
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
unsigned long mask)
{
int i;
for (i = 0; i < table->sz; i++) {
unsigned long flags;
struct ib_gid_attr *attr = &table->data_vec[i].attr;
read_lock_irqsave(&table->data_vec[i].lock, flags);
if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
goto next;
if (mask & GID_ATTR_FIND_MASK_GID &&
memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
goto next;
if (mask & GID_ATTR_FIND_MASK_NETDEV &&
attr->ndev != val->ndev)
goto next;
if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
!!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) !=
default_gid)
goto next;
read_unlock_irqrestore(&table->data_vec[i].lock, flags);
return i;
next:
read_unlock_irqrestore(&table->data_vec[i].lock, flags);
}
return -1;
}
static void make_default_gid(struct net_device *dev, union ib_gid *gid)
{
gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
addrconf_ifid_eui48(&gid->raw[8], dev);
}
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
int ret = 0;
struct net_device *idev;
table = ports_table[port - rdma_start_port(ib_dev)];
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
if (ib_dev->get_netdev) {
idev = ib_dev->get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;
/* Adding default GIDs in not permitted */
make_default_gid(idev, &default_gid);
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
dev_put(idev);
return -EPERM;
}
}
if (idev)
dev_put(idev);
}
mutex_lock(&table->lock);
ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_NETDEV);
if (ix >= 0)
goto out_unlock;
ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_DEFAULT);
if (ix < 0) {
ret = -ENOSPC;
goto out_unlock;
}
add_gid(ib_dev, port, table, ix, gid, attr, false);
out_unlock:
mutex_unlock(&table->lock);
return ret;
}
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
table = ports_table[port - rdma_start_port(ib_dev)];
mutex_lock(&table->lock);
ix = find_gid(table, gid, attr, false,
GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_NETDEV |
GID_ATTR_FIND_MASK_DEFAULT);
if (ix < 0)
goto out_unlock;
del_gid(ib_dev, port, table, ix, false);
out_unlock:
mutex_unlock(&table->lock);
return 0;
}
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *ndev)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
table = ports_table[port - rdma_start_port(ib_dev)];
mutex_lock(&table->lock);
for (ix = 0; ix < table->sz; ix++)
if (table->data_vec[ix].attr.ndev == ndev)
del_gid(ib_dev, port, table, ix, false);
mutex_unlock(&table->lock);
return 0;
}
static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
union ib_gid *gid, struct ib_gid_attr *attr)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned long flags;
table = ports_table[port - rdma_start_port(ib_dev)];
if (index < 0 || index >= table->sz)
return -EINVAL;
read_lock_irqsave(&table->data_vec[index].lock, flags);
if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
read_unlock_irqrestore(&table->data_vec[index].lock, flags);
return -EAGAIN;
}
memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
if (attr) {
memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
if (attr->ndev)
dev_hold(attr->ndev);
}
read_unlock_irqrestore(&table->data_vec[index].lock, flags);
return 0;
}
static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
const union ib_gid *gid,
const struct ib_gid_attr *val,
unsigned long mask,
u8 *port, u16 *index)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
u8 p;
int local_index;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
table = ports_table[p];
local_index = find_gid(table, gid, val, false, mask);
if (local_index >= 0) {
if (index)
*index = local_index;
if (port)
*port = p + rdma_start_port(ib_dev);
return 0;
}
}
return -ENOENT;
}
static int ib_cache_gid_find(struct ib_device *ib_dev,
const union ib_gid *gid,
struct net_device *ndev, u8 *port,
u16 *index)
{
unsigned long mask = GID_ATTR_FIND_MASK_GID;
struct ib_gid_attr gid_attr_val = {.ndev = ndev};
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
mask, port, index);
}
int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
u8 port, struct net_device *ndev,
u16 *index)
{
int local_index;
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID;
struct ib_gid_attr val = {.ndev = ndev};
if (port < rdma_start_port(ib_dev) ||
port > rdma_end_port(ib_dev))
return -ENOENT;
table = ports_table[port - rdma_start_port(ib_dev)];
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
local_index = find_gid(table, gid, &val, false, mask);
if (local_index >= 0) {
if (index)
*index = local_index;
return 0;
}
return -ENOENT;
}
static struct ib_gid_table *alloc_gid_table(int sz)
{
unsigned int i;
struct ib_gid_table *table =
kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
if (!table)
return NULL;
table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL);
if (!table->data_vec)
goto err_free_table;
mutex_init(&table->lock);
table->sz = sz;
for (i = 0; i < sz; i++)
rwlock_init(&table->data_vec[i].lock);
return table;
err_free_table:
kfree(table);
return NULL;
}
static void release_gid_table(struct ib_gid_table *table)
{
if (table) {
kfree(table->data_vec);
kfree(table);
}
}
static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
int i;
if (!table)
return;
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
sizeof(table->data_vec[i].gid)))
del_gid(ib_dev, port, table, i,
table->data_vec[i].props &
GID_ATTR_FIND_MASK_DEFAULT);
}
}
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
enum ib_cache_gid_default_mode mode)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
struct ib_gid_table *table;
int ix;
union ib_gid current_gid;
struct ib_gid_attr current_gid_attr = {};
table = ports_table[port - rdma_start_port(ib_dev)];
make_default_gid(ndev, &gid);
memset(&gid_attr, 0, sizeof(gid_attr));
gid_attr.ndev = ndev;
ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT);
/* Coudn't find default GID location */
WARN_ON(ix < 0);
mutex_lock(&table->lock);
if (!__ib_cache_gid_get(ib_dev, port, ix,
&current_gid, &current_gid_attr) &&
mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
!memcmp(&gid, &current_gid, sizeof(gid)) &&
!memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
goto unlock;
if ((memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
memcmp(&current_gid_attr, &zattr,
sizeof(current_gid_attr))) &&
del_gid(ib_dev, port, table, ix, true)) {
pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
ix, gid.raw);
goto unlock;
}
if (mode == IB_CACHE_GID_DEFAULT_MODE_SET)
if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
gid.raw);
unlock:
if (current_gid_attr.ndev)
dev_put(current_gid_attr.ndev);
mutex_unlock(&table->lock);
}
static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
if (rdma_protocol_roce(ib_dev, port)) {
struct ib_gid_table_entry *entry = &table->data_vec[0];
entry->props |= GID_TABLE_ENTRY_DEFAULT;
}
return 0;
}
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
struct ib_gid_table **table;
int err = 0;
table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
if (!table) {
pr_warn("failed to allocate ib gid cache for %s\n",
ib_dev->name);
return -ENOMEM;
}
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
table[port] =
alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
if (!table[port]) {
err = -ENOMEM;
goto rollback_table_setup;
}
err = gid_table_reserve_default(ib_dev,
port + rdma_start_port(ib_dev),
table[port]);
if (err)
goto rollback_table_setup;
}
ib_dev->cache.gid_cache = table;
return 0;
rollback_table_setup:
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
table[port]);
release_gid_table(table[port]);
}
kfree(table);
return err;
}
static void gid_table_release_one(struct ib_device *ib_dev)
{
struct ib_gid_table **table = ib_dev->cache.gid_cache;
u8 port;
if (!table)
return;
for (port = 0; port < ib_dev->phys_port_cnt; port++)
release_gid_table(table[port]);
kfree(table);
ib_dev->cache.gid_cache = NULL;
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
struct ib_gid_table **table = ib_dev->cache.gid_cache;
u8 port;
if (!table)
return;
for (port = 0; port < ib_dev->phys_port_cnt; port++)
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
table[port]);
}
static int gid_table_setup_one(struct ib_device *ib_dev)
{
int err;
err = _gid_table_setup_one(ib_dev);
if (err)
return err;
err = roce_rescan_device(ib_dev);
if (err) {
gid_table_cleanup_one(ib_dev);
gid_table_release_one(ib_dev);
}
return err;
}
int ib_get_cached_gid(struct ib_device *device,
u8 port_num,
int index,
union ib_gid *gid)
{
struct ib_gid_cache *cache;
unsigned long flags;
int ret = 0;
if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.gid_cache[port_num - rdma_start_port(device)];
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*gid = cache->table[index];
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
return __ib_cache_gid_get(device, port_num, index, gid, NULL);
}
EXPORT_SYMBOL(ib_get_cached_gid);
int ib_find_cached_gid(struct ib_device *device,
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
u8 *port_num,
u16 *index)
u8 *port_num,
u16 *index)
{
struct ib_gid_cache *cache;
unsigned long flags;
int p, i;
int ret = -ENOENT;
*port_num = -1;
if (index)
*index = -1;
read_lock_irqsave(&device->cache.lock, flags);
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) {
cache = device->cache.gid_cache[p];
for (i = 0; i < cache->table_len; ++i) {
if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
*port_num = p + rdma_start_port(device);
if (index)
*index = i;
ret = 0;
goto found;
}
}
}
found:
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
return ib_cache_gid_find(device, gid, NULL, port_num, index);
}
EXPORT_SYMBOL(ib_find_cached_gid);
@@ -243,9 +790,21 @@ static void ib_cache_update(struct ib_device *device,
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
struct ib_gid_cache *gid_cache = NULL, *old_gid_cache;
struct ib_gid_cache {
int table_len;
union ib_gid table[0];
} *gid_cache = NULL;
int i;
int ret;
struct ib_gid_table *table;
struct ib_gid_table **ports_table = device->cache.gid_cache;
bool use_roce_gid_table =
rdma_cap_roce_gid_table(device, port);
if (port < rdma_start_port(device) || port > rdma_end_port(device))
return;
table = ports_table[port - rdma_start_port(device)];
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
@@ -265,12 +824,14 @@ static void ib_cache_update(struct ib_device *device,
pkey_cache->table_len = tprops->pkey_tbl_len;
gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len *
sizeof *gid_cache->table, GFP_KERNEL);
if (!gid_cache)
goto err;
if (!use_roce_gid_table) {
gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
sizeof(*gid_cache->table), GFP_KERNEL);
if (!gid_cache)
goto err;
gid_cache->table_len = tprops->gid_tbl_len;
gid_cache->table_len = tprops->gid_tbl_len;
}
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
@@ -281,29 +842,36 @@ static void ib_cache_update(struct ib_device *device,
}
}
for (i = 0; i < gid_cache->table_len; ++i) {
ret = ib_query_gid(device, port, i, gid_cache->table + i);
if (ret) {
printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
goto err;
if (!use_roce_gid_table) {
for (i = 0; i < gid_cache->table_len; ++i) {
ret = ib_query_gid(device, port, i,
gid_cache->table + i);
if (ret) {
printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
goto err;
}
}
}
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
old_gid_cache = device->cache.gid_cache [port - rdma_start_port(device)];
device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
device->cache.gid_cache [port - rdma_start_port(device)] = gid_cache;
if (!use_roce_gid_table) {
for (i = 0; i < gid_cache->table_len; i++) {
modify_gid(device, port, table, i, gid_cache->table + i,
&zattr, false);
}
}
device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
write_unlock_irq(&device->cache.lock);
kfree(gid_cache);
kfree(old_pkey_cache);
kfree(old_gid_cache);
kfree(tprops);
return;
@@ -344,85 +912,88 @@ static void ib_cache_event(struct ib_event_handler *handler,
}
}
static void ib_cache_setup_one(struct ib_device *device)
int ib_cache_setup_one(struct ib_device *device)
{
int p;
int err;
rwlock_init(&device->cache.lock);
device->cache.pkey_cache =
kmalloc(sizeof *device->cache.pkey_cache *
kzalloc(sizeof *device->cache.pkey_cache *
(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
device->cache.gid_cache =
kmalloc(sizeof *device->cache.gid_cache *
(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
(rdma_end_port(device) -
rdma_start_port(device) + 1),
GFP_KERNEL);
if (!device->cache.pkey_cache || !device->cache.gid_cache ||
if (!device->cache.pkey_cache ||
!device->cache.lmc_cache) {
printk(KERN_WARNING "Couldn't allocate cache "
"for %s\n", device->name);
goto err;
return -ENOMEM;
}
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) {
device->cache.pkey_cache[p] = NULL;
device->cache.gid_cache [p] = NULL;
err = gid_table_setup_one(device);
if (err)
/* Allocated memory will be cleaned in the release function */
return err;
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device));
}
INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
if (ib_register_event_handler(&device->cache.event_handler))
goto err_cache;
err = ib_register_event_handler(&device->cache.event_handler);
if (err)
goto err;
return;
err_cache:
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);
kfree(device->cache.gid_cache[p]);
}
return 0;
err:
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
gid_table_cleanup_one(device);
return err;
}
static void ib_cache_cleanup_one(struct ib_device *device)
void ib_cache_release_one(struct ib_device *device)
{
int p;
ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);
kfree(device->cache.gid_cache[p]);
}
/*
* The release function frees all the cache elements.
* This function should be called as part of freeing
* all the device's resources when the cache could no
* longer be accessed.
*/
if (device->cache.pkey_cache)
for (p = 0;
p <= rdma_end_port(device) - rdma_start_port(device); ++p)
kfree(device->cache.pkey_cache[p]);
gid_table_release_one(device);
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
}
static struct ib_client cache_client = {
.name = "cache",
.add = ib_cache_setup_one,
.remove = ib_cache_cleanup_one
};
int __init ib_cache_setup(void)
void ib_cache_cleanup_one(struct ib_device *device)
{
return ib_register_client(&cache_client);
/* The cleanup function unregisters the event handler,
* waits for all in-progress workqueue elements and cleans
* up the GID cache. This function should be called after
* the device was removed from the devices list and all
* clients were removed, so the cache exists but is
* non-functional and shouldn't be updated anymore.
*/
ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
}
void __init ib_cache_setup(void)
{
roce_gid_mgmt_init();
}
void __exit ib_cache_cleanup(void)
{
ib_unregister_client(&cache_client);
roce_gid_mgmt_cleanup();
}

View File

@@ -58,7 +58,7 @@ MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
static struct ib_client cm_client = {
.name = "cm",
@@ -213,13 +213,15 @@ struct cm_id_private {
spinlock_t lock; /* Do not acquire inside cm.lock */
struct completion comp;
atomic_t refcount;
/* Number of clients sharing this ib_cm_id. Only valid for listeners.
* Protected by the cm.lock spinlock. */
int listen_sharecount;
struct ib_mad_send_buf *msg;
struct cm_timewait_info *timewait_info;
/* todo: use alternate port on send failure */
struct cm_av av;
struct cm_av alt_av;
struct ib_cm_compare_data *compare_data;
void *private_data;
__be64 tid;
@@ -440,40 +442,6 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
return cm_id_priv;
}
static void cm_mask_copy(u32 *dst, const u32 *src, const u32 *mask)
{
int i;
for (i = 0; i < IB_CM_COMPARE_SIZE; i++)
dst[i] = src[i] & mask[i];
}
static int cm_compare_data(struct ib_cm_compare_data *src_data,
struct ib_cm_compare_data *dst_data)
{
u32 src[IB_CM_COMPARE_SIZE];
u32 dst[IB_CM_COMPARE_SIZE];
if (!src_data || !dst_data)
return 0;
cm_mask_copy(src, src_data->data, dst_data->mask);
cm_mask_copy(dst, dst_data->data, src_data->mask);
return memcmp(src, dst, sizeof(src));
}
static int cm_compare_private_data(u32 *private_data,
struct ib_cm_compare_data *dst_data)
{
u32 src[IB_CM_COMPARE_SIZE];
if (!dst_data)
return 0;
cm_mask_copy(src, private_data, dst_data->mask);
return memcmp(src, dst_data->data, sizeof(src));
}
/*
* Trivial helpers to strip endian annotation and compare; the
* endianness doesn't actually matter since we just need a stable
@@ -506,18 +474,14 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
struct cm_id_private *cur_cm_id_priv;
__be64 service_id = cm_id_priv->id.service_id;
__be64 service_mask = cm_id_priv->id.service_mask;
int data_cmp;
while (*link) {
parent = *link;
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
data_cmp = cm_compare_data(cm_id_priv->compare_data,
cur_cm_id_priv->compare_data);
if ((cur_cm_id_priv->id.service_mask & service_id) ==
(service_mask & cur_cm_id_priv->id.service_id) &&
(cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
!data_cmp)
(cm_id_priv->id.device == cur_cm_id_priv->id.device))
return cur_cm_id_priv;
if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
@@ -528,8 +492,6 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
link = &(*link)->rb_left;
else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
link = &(*link)->rb_right;
else if (data_cmp < 0)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
}
@@ -539,20 +501,16 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
}
static struct cm_id_private * cm_find_listen(struct ib_device *device,
__be64 service_id,
u32 *private_data)
__be64 service_id)
{
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
int data_cmp;
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
data_cmp = cm_compare_private_data(private_data,
cm_id_priv->compare_data);
if ((cm_id_priv->id.service_mask & service_id) ==
cm_id_priv->id.service_id &&
(cm_id_priv->id.device == device) && !data_cmp)
(cm_id_priv->id.device == device))
return cm_id_priv;
if (device < cm_id_priv->id.device)
@@ -563,8 +521,6 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
node = node->rb_left;
else if (be64_gt(service_id, cm_id_priv->id.service_id))
node = node->rb_right;
else if (data_cmp < 0)
node = node->rb_left;
else
node = node->rb_right;
}
@@ -859,9 +815,15 @@ retest:
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id->state) {
case IB_CM_LISTEN:
cm_id->state = IB_CM_IDLE;
spin_unlock_irq(&cm_id_priv->lock);
spin_lock_irq(&cm.lock);
if (--cm_id_priv->listen_sharecount > 0) {
/* The id is still shared. */
cm_deref_id(cm_id_priv);
spin_unlock_irq(&cm.lock);
return;
}
rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
spin_unlock_irq(&cm.lock);
break;
@@ -930,7 +892,6 @@ retest:
wait_for_completion(&cm_id_priv->comp);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
kfree(cm_id_priv->compare_data);
kfree(cm_id_priv->private_data);
kfree(cm_id_priv);
}
@@ -941,11 +902,23 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
}
EXPORT_SYMBOL(ib_destroy_cm_id);
int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
struct ib_cm_compare_data *compare_data)
/**
* __ib_cm_listen - Initiates listening on the specified service ID for
* connection and service ID resolution requests.
* @cm_id: Connection identifier associated with the listen request.
* @service_id: Service identifier matched against incoming connection
* and service ID resolution requests. The service ID should be specified
* network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
* assign a service ID to the caller.
* @service_mask: Mask applied to service ID used to listen across a
* range of service IDs. If set to 0, the service ID is matched
* exactly. This parameter is ignored if %service_id is set to
* IB_CM_ASSIGN_SERVICE_ID.
*/
static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
__be64 service_mask)
{
struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
unsigned long flags;
int ret = 0;
service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
@@ -958,20 +931,9 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
if (cm_id->state != IB_CM_IDLE)
return -EINVAL;
if (compare_data) {
cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
GFP_KERNEL);
if (!cm_id_priv->compare_data)
return -ENOMEM;
cm_mask_copy(cm_id_priv->compare_data->data,
compare_data->data, compare_data->mask);
memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
sizeof(compare_data->mask));
}
cm_id->state = IB_CM_LISTEN;
++cm_id_priv->listen_sharecount;
spin_lock_irqsave(&cm.lock, flags);
if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
cm_id->service_mask = ~cpu_to_be64(0);
@@ -980,18 +942,95 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
cm_id->service_mask = service_mask;
}
cur_cm_id_priv = cm_insert_listen(cm_id_priv);
spin_unlock_irqrestore(&cm.lock, flags);
if (cur_cm_id_priv) {
cm_id->state = IB_CM_IDLE;
kfree(cm_id_priv->compare_data);
cm_id_priv->compare_data = NULL;
--cm_id_priv->listen_sharecount;
ret = -EBUSY;
}
return ret;
}
int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&cm.lock, flags);
ret = __ib_cm_listen(cm_id, service_id, service_mask);
spin_unlock_irqrestore(&cm.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_cm_listen);
/**
* Create a new listening ib_cm_id and listen on the given service ID.
*
* If there's an existing ID listening on that same device and service ID,
* return it.
*
* @device: Device associated with the cm_id. All related communication will
* be associated with the specified device.
* @cm_handler: Callback invoked to notify the user of CM events.
* @service_id: Service identifier matched against incoming connection
* and service ID resolution requests. The service ID should be specified
* network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
* assign a service ID to the caller.
*
* Callers should call ib_destroy_cm_id when done with the listener ID.
*/
struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
ib_cm_handler cm_handler,
__be64 service_id)
{
struct cm_id_private *cm_id_priv;
struct ib_cm_id *cm_id;
unsigned long flags;
int err = 0;
/* Create an ID in advance, since the creation may sleep */
cm_id = ib_create_cm_id(device, cm_handler, NULL);
if (IS_ERR(cm_id))
return cm_id;
spin_lock_irqsave(&cm.lock, flags);
if (service_id == IB_CM_ASSIGN_SERVICE_ID)
goto new_id;
/* Find an existing ID */
cm_id_priv = cm_find_listen(device, service_id);
if (cm_id_priv) {
if (cm_id->cm_handler != cm_handler || cm_id->context) {
/* Sharing an ib_cm_id with different handlers is not
* supported */
spin_unlock_irqrestore(&cm.lock, flags);
return ERR_PTR(-EINVAL);
}
atomic_inc(&cm_id_priv->refcount);
++cm_id_priv->listen_sharecount;
spin_unlock_irqrestore(&cm.lock, flags);
ib_destroy_cm_id(cm_id);
cm_id = &cm_id_priv->id;
return cm_id;
}
new_id:
/* Use newly created ID */
err = __ib_cm_listen(cm_id, service_id, 0);
spin_unlock_irqrestore(&cm.lock, flags);
if (err) {
ib_destroy_cm_id(cm_id);
return ERR_PTR(err);
}
return cm_id;
}
EXPORT_SYMBOL(ib_cm_insert_listen);
static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
enum cm_msg_sequence msg_seq)
{
@@ -1268,6 +1307,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
primary_path->packet_life_time =
cm_req_get_primary_local_ack_timeout(req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
primary_path->service_id = req_msg->service_id;
if (req_msg->alt_local_lid) {
memset(alt_path, 0, sizeof *alt_path);
@@ -1289,9 +1329,28 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
alt_path->packet_life_time =
cm_req_get_alt_local_ack_timeout(req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
alt_path->service_id = req_msg->service_id;
}
}
static u16 cm_get_bth_pkey(struct cm_work *work)
{
struct ib_device *ib_dev = work->port->cm_dev->ib_device;
u8 port_num = work->port->port_num;
u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
u16 pkey;
int ret;
ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
if (ret) {
dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
port_num, pkey_index, ret);
return 0;
}
return pkey;
}
static void cm_format_req_event(struct cm_work *work,
struct cm_id_private *cm_id_priv,
struct ib_cm_id *listen_id)
@@ -1302,6 +1361,7 @@ static void cm_format_req_event(struct cm_work *work,
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.req_rcvd;
param->listen_id = listen_id;
param->bth_pkey = cm_get_bth_pkey(work);
param->port = cm_id_priv->av.port->port_num;
param->primary_path = &work->path[0];
if (req_msg->alt_local_lid)
@@ -1484,8 +1544,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
/* Find matching listen request. */
listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
req_msg->service_id,
req_msg->private_data);
req_msg->service_id);
if (!listen_cm_id_priv) {
cm_cleanup_timewait(cm_id_priv->timewait_info);
spin_unlock_irq(&cm.lock);
@@ -2992,6 +3051,8 @@ static void cm_format_sidr_req_event(struct cm_work *work,
param = &work->cm_event.param.sidr_req_rcvd;
param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
param->listen_id = listen_id;
param->service_id = sidr_req_msg->service_id;
param->bth_pkey = cm_get_bth_pkey(work);
param->port = work->port->port_num;
work->cm_event.private_data = &sidr_req_msg->private_data;
}
@@ -3031,8 +3092,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
}
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
cur_cm_id_priv = cm_find_listen(cm_id->device,
sidr_req_msg->service_id,
sidr_req_msg->private_data);
sidr_req_msg->service_id);
if (!cur_cm_id_priv) {
spin_unlock_irq(&cm.lock);
cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
@@ -3886,9 +3946,9 @@ free:
kfree(cm_dev);
}
static void cm_remove_one(struct ib_device *ib_device)
static void cm_remove_one(struct ib_device *ib_device, void *client_data)
{
struct cm_device *cm_dev;
struct cm_device *cm_dev = client_data;
struct cm_port *port;
struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_CM_SUP
@@ -3896,7 +3956,6 @@ static void cm_remove_one(struct ib_device *ib_device)
unsigned long flags;
int i;
cm_dev = ib_get_client_data(ib_device, &cm_client);
if (!cm_dev)
return;

View File

@@ -46,6 +46,8 @@
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
#include <net/ip6_route.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
@@ -94,7 +96,7 @@ const char *rdma_event_msg(enum rdma_cm_event_type event)
EXPORT_SYMBOL(rdma_event_msg);
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
static struct ib_client cma_client = {
.name = "cma",
@@ -113,6 +115,22 @@ static DEFINE_IDR(udp_ps);
static DEFINE_IDR(ipoib_ps);
static DEFINE_IDR(ib_ps);
static struct idr *cma_idr(enum rdma_port_space ps)
{
switch (ps) {
case RDMA_PS_TCP:
return &tcp_ps;
case RDMA_PS_UDP:
return &udp_ps;
case RDMA_PS_IPOIB:
return &ipoib_ps;
case RDMA_PS_IB:
return &ib_ps;
default:
return NULL;
}
}
struct cma_device {
struct list_head list;
struct ib_device *device;
@@ -122,11 +140,33 @@ struct cma_device {
};
struct rdma_bind_list {
struct idr *ps;
enum rdma_port_space ps;
struct hlist_head owners;
unsigned short port;
};
static int cma_ps_alloc(enum rdma_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
struct idr *idr = cma_idr(ps);
return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
}
static struct rdma_bind_list *cma_ps_find(enum rdma_port_space ps, int snum)
{
struct idr *idr = cma_idr(ps);
return idr_find(idr, snum);
}
static void cma_ps_remove(enum rdma_port_space ps, int snum)
{
struct idr *idr = cma_idr(ps);
idr_remove(idr, snum);
}
enum {
CMA_OPTION_AFONLY,
};
@@ -225,6 +265,15 @@ struct cma_hdr {
#define CMA_VERSION 0x00
struct cma_req_info {
struct ib_device *device;
int port;
union ib_gid local_gid;
__be64 service_id;
u16 pkey;
bool has_gid:1;
};
static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
{
unsigned long flags;
@@ -262,7 +311,7 @@ static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
return old;
}
static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
{
return hdr->ip_version >> 4;
}
@@ -870,107 +919,397 @@ static inline int cma_any_port(struct sockaddr *addr)
return !cma_port(addr);
}
static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
static void cma_save_ib_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct rdma_cm_id *listen_id,
struct ib_sa_path_rec *path)
{
struct sockaddr_ib *listen_ib, *ib;
listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
ib = (struct sockaddr_ib *) &id->route.addr.src_addr;
ib->sib_family = listen_ib->sib_family;
if (path) {
ib->sib_pkey = path->pkey;
ib->sib_flowinfo = path->flow_label;
memcpy(&ib->sib_addr, &path->sgid, 16);
} else {
ib->sib_pkey = listen_ib->sib_pkey;
ib->sib_flowinfo = listen_ib->sib_flowinfo;
ib->sib_addr = listen_ib->sib_addr;
if (src_addr) {
ib = (struct sockaddr_ib *)src_addr;
ib->sib_family = AF_IB;
if (path) {
ib->sib_pkey = path->pkey;
ib->sib_flowinfo = path->flow_label;
memcpy(&ib->sib_addr, &path->sgid, 16);
ib->sib_sid = path->service_id;
ib->sib_scope_id = 0;
} else {
ib->sib_pkey = listen_ib->sib_pkey;
ib->sib_flowinfo = listen_ib->sib_flowinfo;
ib->sib_addr = listen_ib->sib_addr;
ib->sib_sid = listen_ib->sib_sid;
ib->sib_scope_id = listen_ib->sib_scope_id;
}
ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
}
ib->sib_sid = listen_ib->sib_sid;
ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
ib->sib_scope_id = listen_ib->sib_scope_id;
if (path) {
ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
ib->sib_family = listen_ib->sib_family;
ib->sib_pkey = path->pkey;
ib->sib_flowinfo = path->flow_label;
memcpy(&ib->sib_addr, &path->dgid, 16);
if (dst_addr) {
ib = (struct sockaddr_ib *)dst_addr;
ib->sib_family = AF_IB;
if (path) {
ib->sib_pkey = path->pkey;
ib->sib_flowinfo = path->flow_label;
memcpy(&ib->sib_addr, &path->dgid, 16);
}
}
}
static __be16 ss_get_port(const struct sockaddr_storage *ss)
{
if (ss->ss_family == AF_INET)
return ((struct sockaddr_in *)ss)->sin_port;
else if (ss->ss_family == AF_INET6)
return ((struct sockaddr_in6 *)ss)->sin6_port;
BUG();
}
static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
struct cma_hdr *hdr)
static void cma_save_ip4_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct cma_hdr *hdr,
__be16 local_port)
{
struct sockaddr_in *ip4;
ip4 = (struct sockaddr_in *) &id->route.addr.src_addr;
ip4->sin_family = AF_INET;
ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
ip4->sin_port = ss_get_port(&listen_id->route.addr.src_addr);
if (src_addr) {
ip4 = (struct sockaddr_in *)src_addr;
ip4->sin_family = AF_INET;
ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
ip4->sin_port = local_port;
}
ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr;
ip4->sin_family = AF_INET;
ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
ip4->sin_port = hdr->port;
if (dst_addr) {
ip4 = (struct sockaddr_in *)dst_addr;
ip4->sin_family = AF_INET;
ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
ip4->sin_port = hdr->port;
}
}
static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
struct cma_hdr *hdr)
static void cma_save_ip6_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct cma_hdr *hdr,
__be16 local_port)
{
struct sockaddr_in6 *ip6;
ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr;
ip6->sin6_family = AF_INET6;
ip6->sin6_addr = hdr->dst_addr.ip6;
ip6->sin6_port = ss_get_port(&listen_id->route.addr.src_addr);
if (src_addr) {
ip6 = (struct sockaddr_in6 *)src_addr;
ip6->sin6_family = AF_INET6;
ip6->sin6_addr = hdr->dst_addr.ip6;
ip6->sin6_port = local_port;
}
ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr;
ip6->sin6_family = AF_INET6;
ip6->sin6_addr = hdr->src_addr.ip6;
ip6->sin6_port = hdr->port;
if (dst_addr) {
ip6 = (struct sockaddr_in6 *)dst_addr;
ip6->sin6_family = AF_INET6;
ip6->sin6_addr = hdr->src_addr.ip6;
ip6->sin6_port = hdr->port;
}
}
static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event)
static u16 cma_port_from_service_id(__be64 service_id)
{
return (u16)be64_to_cpu(service_id);
}
static int cma_save_ip_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct ib_cm_event *ib_event,
__be64 service_id)
{
struct cma_hdr *hdr;
if (listen_id->route.addr.src_addr.ss_family == AF_IB) {
if (ib_event->event == IB_CM_REQ_RECEIVED)
cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
cma_save_ib_info(id, listen_id, NULL);
return 0;
}
__be16 port;
hdr = ib_event->private_data;
if (hdr->cma_version != CMA_VERSION)
return -EINVAL;
port = htons(cma_port_from_service_id(service_id));
switch (cma_get_ip_ver(hdr)) {
case 4:
cma_save_ip4_info(id, listen_id, hdr);
cma_save_ip4_info(src_addr, dst_addr, hdr, port);
break;
case 6:
cma_save_ip6_info(id, listen_id, hdr);
cma_save_ip6_info(src_addr, dst_addr, hdr, port);
break;
default:
return -EAFNOSUPPORT;
}
return 0;
}
static int cma_save_net_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
sa_family_t sa_family, __be64 service_id)
{
if (sa_family == AF_IB) {
if (ib_event->event == IB_CM_REQ_RECEIVED)
cma_save_ib_info(src_addr, dst_addr, listen_id,
ib_event->param.req_rcvd.primary_path);
else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
cma_save_ib_info(src_addr, dst_addr, listen_id, NULL);
return 0;
}
return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id);
}
static int cma_save_req_info(const struct ib_cm_event *ib_event,
struct cma_req_info *req)
{
const struct ib_cm_req_event_param *req_param =
&ib_event->param.req_rcvd;
const struct ib_cm_sidr_req_event_param *sidr_param =
&ib_event->param.sidr_req_rcvd;
switch (ib_event->event) {
case IB_CM_REQ_RECEIVED:
req->device = req_param->listen_id->device;
req->port = req_param->port;
memcpy(&req->local_gid, &req_param->primary_path->sgid,
sizeof(req->local_gid));
req->has_gid = true;
req->service_id = req_param->primary_path->service_id;
req->pkey = req_param->bth_pkey;
break;
case IB_CM_SIDR_REQ_RECEIVED:
req->device = sidr_param->listen_id->device;
req->port = sidr_param->port;
req->has_gid = false;
req->service_id = sidr_param->service_id;
req->pkey = sidr_param->bth_pkey;
break;
default:
return -EINVAL;
}
return 0;
}
static bool validate_ipv4_net_dev(struct net_device *net_dev,
const struct sockaddr_in *dst_addr,
const struct sockaddr_in *src_addr)
{
__be32 daddr = dst_addr->sin_addr.s_addr,
saddr = src_addr->sin_addr.s_addr;
struct fib_result res;
struct flowi4 fl4;
int err;
bool ret;
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) ||
ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) ||
ipv4_is_loopback(saddr))
return false;
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_iif = net_dev->ifindex;
fl4.daddr = daddr;
fl4.saddr = saddr;
rcu_read_lock();
err = fib_lookup(dev_net(net_dev), &fl4, &res, 0);
if (err)
return false;
ret = FIB_RES_DEV(res) == net_dev;
rcu_read_unlock();
return ret;
}
static bool validate_ipv6_net_dev(struct net_device *net_dev,
const struct sockaddr_in6 *dst_addr,
const struct sockaddr_in6 *src_addr)
{
#if IS_ENABLED(CONFIG_IPV6)
const int strict = ipv6_addr_type(&dst_addr->sin6_addr) &
IPV6_ADDR_LINKLOCAL;
struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
&src_addr->sin6_addr, net_dev->ifindex,
strict);
bool ret;
if (!rt)
return false;
ret = rt->rt6i_idev->dev == net_dev;
ip6_rt_put(rt);
return ret;
#else
return false;
#endif
}
static bool validate_net_dev(struct net_device *net_dev,
const struct sockaddr *daddr,
const struct sockaddr *saddr)
{
const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr;
const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr;
const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr;
switch (daddr->sa_family) {
case AF_INET:
return saddr->sa_family == AF_INET &&
validate_ipv4_net_dev(net_dev, daddr4, saddr4);
case AF_INET6:
return saddr->sa_family == AF_INET6 &&
validate_ipv6_net_dev(net_dev, daddr6, saddr6);
default:
return false;
}
}
static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
const struct cma_req_info *req)
{
struct sockaddr_storage listen_addr_storage, src_addr_storage;
struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage,
*src_addr = (struct sockaddr *)&src_addr_storage;
struct net_device *net_dev;
const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
int err;
err = cma_save_ip_info(listen_addr, src_addr, ib_event,
req->service_id);
if (err)
return ERR_PTR(err);
net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey,
gid, listen_addr);
if (!net_dev)
return ERR_PTR(-ENODEV);
if (!validate_net_dev(net_dev, listen_addr, src_addr)) {
dev_put(net_dev);
return ERR_PTR(-EHOSTUNREACH);
}
return net_dev;
}
static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
{
return (be64_to_cpu(service_id) >> 16) & 0xffff;
}
static bool cma_match_private_data(struct rdma_id_private *id_priv,
const struct cma_hdr *hdr)
{
struct sockaddr *addr = cma_src_addr(id_priv);
__be32 ip4_addr;
struct in6_addr ip6_addr;
if (cma_any_addr(addr) && !id_priv->afonly)
return true;
switch (addr->sa_family) {
case AF_INET:
ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr;
if (cma_get_ip_ver(hdr) != 4)
return false;
if (!cma_any_addr(addr) &&
hdr->dst_addr.ip4.addr != ip4_addr)
return false;
break;
case AF_INET6:
ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr;
if (cma_get_ip_ver(hdr) != 6)
return false;
if (!cma_any_addr(addr) &&
memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr)))
return false;
break;
case AF_IB:
return true;
default:
return false;
}
return true;
}
static bool cma_match_net_dev(const struct rdma_id_private *id_priv,
const struct net_device *net_dev)
{
const struct rdma_addr *addr = &id_priv->id.route.addr;
if (!net_dev)
/* This request is an AF_IB request */
return addr->src_addr.ss_family == AF_IB;
return !addr->dev_addr.bound_dev_if ||
(net_eq(dev_net(net_dev), &init_net) &&
addr->dev_addr.bound_dev_if == net_dev->ifindex);
}
static struct rdma_id_private *cma_find_listener(
const struct rdma_bind_list *bind_list,
const struct ib_cm_id *cm_id,
const struct ib_cm_event *ib_event,
const struct cma_req_info *req,
const struct net_device *net_dev)
{
struct rdma_id_private *id_priv, *id_priv_dev;
if (!bind_list)
return ERR_PTR(-EINVAL);
hlist_for_each_entry(id_priv, &bind_list->owners, node) {
if (cma_match_private_data(id_priv, ib_event->private_data)) {
if (id_priv->id.device == cm_id->device &&
cma_match_net_dev(id_priv, net_dev))
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
listen_list) {
if (id_priv_dev->id.device == cm_id->device &&
cma_match_net_dev(id_priv_dev, net_dev))
return id_priv_dev;
}
}
}
return ERR_PTR(-EINVAL);
}
static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
struct ib_cm_event *ib_event,
struct net_device **net_dev)
{
struct cma_req_info req;
struct rdma_bind_list *bind_list;
struct rdma_id_private *id_priv;
int err;
err = cma_save_req_info(ib_event, &req);
if (err)
return ERR_PTR(err);
*net_dev = cma_get_net_dev(ib_event, &req);
if (IS_ERR(*net_dev)) {
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
*net_dev = NULL;
} else {
return ERR_CAST(*net_dev);
}
}
bind_list = cma_ps_find(rdma_ps_from_service_id(req.service_id),
cma_port_from_service_id(req.service_id));
id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
if (IS_ERR(id_priv)) {
dev_put(*net_dev);
*net_dev = NULL;
}
return id_priv;
}
static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
{
return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
@@ -1038,7 +1377,7 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_lock(&lock);
hlist_del(&id_priv->node);
if (hlist_empty(&bind_list->owners)) {
idr_remove(bind_list->ps, bind_list->port);
cma_ps_remove(bind_list->ps, bind_list->port);
kfree(bind_list);
}
mutex_unlock(&lock);
@@ -1216,11 +1555,15 @@ out:
}
static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event)
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
struct rdma_route *rt;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
const __be64 service_id =
ib_event->param.req_rcvd.primary_path->service_id;
int ret;
id = rdma_create_id(listen_id->event_handler, listen_id->context,
@@ -1229,7 +1572,9 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
return NULL;
id_priv = container_of(id, struct rdma_id_private, id);
if (cma_save_net_info(id, listen_id, ib_event))
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family, service_id))
goto err;
rt = &id->route;
@@ -1243,14 +1588,16 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
if (rt->num_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (cma_any_addr(cma_src_addr(id_priv))) {
rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
} else {
ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
if (net_dev) {
ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
if (ret)
goto err;
} else {
/* An AF_IB connection */
WARN_ON_ONCE(ss_family != AF_IB);
cma_translate_ib((struct sockaddr_ib *)cma_src_addr(id_priv),
&rt->addr.dev_addr);
}
rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
@@ -1263,10 +1610,12 @@ err:
}
static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event)
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
int ret;
id = rdma_create_id(listen_id->event_handler, listen_id->context,
@@ -1275,13 +1624,24 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
return NULL;
id_priv = container_of(id, struct rdma_id_private, id);
if (cma_save_net_info(id, listen_id, ib_event))
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family,
ib_event->param.sidr_req_rcvd.service_id))
goto err;
if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr);
if (net_dev) {
ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
if (ret)
goto err;
} else {
/* An AF_IB connection */
WARN_ON_ONCE(ss_family != AF_IB);
if (!cma_any_addr(cma_src_addr(id_priv)))
cma_translate_ib((struct sockaddr_ib *)
cma_src_addr(id_priv),
&id->route.addr.dev_addr);
}
id_priv->state = RDMA_CM_CONNECT;
@@ -1319,25 +1679,33 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
{
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
struct net_device *net_dev;
int offset, ret;
listen_id = cm_id->context;
if (!cma_check_req_qp_type(&listen_id->id, ib_event))
return -EINVAL;
listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
return -ECONNABORTED;
if (!cma_check_req_qp_type(&listen_id->id, ib_event)) {
ret = -EINVAL;
goto net_dev_put;
}
if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) {
ret = -ECONNABORTED;
goto net_dev_put;
}
memset(&event, 0, sizeof event);
offset = cma_user_data_offset(listen_id);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
conn_id = cma_new_udp_id(&listen_id->id, ib_event);
conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
} else {
conn_id = cma_new_conn_id(&listen_id->id, ib_event);
conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev);
cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
ib_event->private_data, offset);
}
@@ -1375,6 +1743,8 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
cma_deref_id(conn_id);
if (net_dev)
dev_put(net_dev);
return 0;
err3:
@@ -1388,6 +1758,11 @@ err1:
mutex_unlock(&listen_id->handler_mutex);
if (conn_id)
rdma_destroy_id(&conn_id->id);
net_dev_put:
if (net_dev)
dev_put(net_dev);
return ret;
}
@@ -1400,42 +1775,6 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
}
EXPORT_SYMBOL(rdma_get_service_id);
static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
struct ib_cm_compare_data *compare)
{
struct cma_hdr *cma_data, *cma_mask;
__be32 ip4_addr;
struct in6_addr ip6_addr;
memset(compare, 0, sizeof *compare);
cma_data = (void *) compare->data;
cma_mask = (void *) compare->mask;
switch (addr->sa_family) {
case AF_INET:
ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
cma_set_ip_ver(cma_data, 4);
cma_set_ip_ver(cma_mask, 0xF);
if (!cma_any_addr(addr)) {
cma_data->dst_addr.ip4.addr = ip4_addr;
cma_mask->dst_addr.ip4.addr = htonl(~0);
}
break;
case AF_INET6:
ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
cma_set_ip_ver(cma_data, 6);
cma_set_ip_ver(cma_mask, 0xF);
if (!cma_any_addr(addr)) {
cma_data->dst_addr.ip6 = ip6_addr;
memset(&cma_mask->dst_addr.ip6, 0xFF,
sizeof cma_mask->dst_addr.ip6);
}
break;
default:
break;
}
}
static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
{
struct rdma_id_private *id_priv = iw_id->context;
@@ -1589,33 +1928,18 @@ out:
static int cma_ib_listen(struct rdma_id_private *id_priv)
{
struct ib_cm_compare_data compare_data;
struct sockaddr *addr;
struct ib_cm_id *id;
__be64 svc_id;
int ret;
id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv);
if (IS_ERR(id))
return PTR_ERR(id);
id_priv->cm_id.ib = id;
addr = cma_src_addr(id_priv);
svc_id = rdma_get_service_id(&id_priv->id, addr);
if (cma_any_addr(addr) && !id_priv->afonly)
ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
else {
cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
}
id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id);
if (IS_ERR(id))
return PTR_ERR(id);
id_priv->cm_id.ib = id;
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
id_priv->cm_id.ib = NULL;
}
return ret;
return 0;
}
static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
@@ -2203,8 +2527,11 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
src_addr = (struct sockaddr *) &id->route.addr.src_addr;
src_addr->sa_family = dst_addr->sa_family;
if (dst_addr->sa_family == AF_INET6) {
((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
} else if (dst_addr->sa_family == AF_IB) {
((struct sockaddr_ib *) src_addr)->sib_pkey =
((struct sockaddr_ib *) dst_addr)->sib_pkey;
@@ -2325,8 +2652,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
hlist_add_head(&id_priv->node, &bind_list->owners);
}
static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
unsigned short snum)
static int cma_alloc_port(enum rdma_port_space ps,
struct rdma_id_private *id_priv, unsigned short snum)
{
struct rdma_bind_list *bind_list;
int ret;
@@ -2335,7 +2662,7 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
if (!bind_list)
return -ENOMEM;
ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL);
ret = cma_ps_alloc(ps, bind_list, snum);
if (ret < 0)
goto err;
@@ -2348,7 +2675,8 @@ err:
return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
}
static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
static int cma_alloc_any_port(enum rdma_port_space ps,
struct rdma_id_private *id_priv)
{
static unsigned int last_used_port;
int low, high, remaining;
@@ -2359,7 +2687,7 @@ static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
rover = prandom_u32() % remaining + low;
retry:
if (last_used_port != rover &&
!idr_find(ps, (unsigned short) rover)) {
!cma_ps_find(ps, (unsigned short)rover)) {
int ret = cma_alloc_port(ps, id_priv, rover);
/*
* Remember previously used port number in order to avoid
@@ -2414,7 +2742,8 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
return 0;
}
static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
static int cma_use_port(enum rdma_port_space ps,
struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list;
unsigned short snum;
@@ -2424,7 +2753,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
bind_list = idr_find(ps, snum);
bind_list = cma_ps_find(ps, snum);
if (!bind_list) {
ret = cma_alloc_port(ps, id_priv, snum);
} else {
@@ -2447,25 +2776,24 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
return ret;
}
static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv)
static enum rdma_port_space cma_select_inet_ps(
struct rdma_id_private *id_priv)
{
switch (id_priv->id.ps) {
case RDMA_PS_TCP:
return &tcp_ps;
case RDMA_PS_UDP:
return &udp_ps;
case RDMA_PS_IPOIB:
return &ipoib_ps;
case RDMA_PS_IB:
return &ib_ps;
return id_priv->id.ps;
default:
return NULL;
return 0;
}
}
static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
{
struct idr *ps = NULL;
enum rdma_port_space ps = 0;
struct sockaddr_ib *sib;
u64 sid_ps, mask, sid;
@@ -2475,15 +2803,15 @@ static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
sid_ps = RDMA_IB_IP_PS_IB;
ps = &ib_ps;
ps = RDMA_PS_IB;
} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
(sid == (RDMA_IB_IP_PS_TCP & mask))) {
sid_ps = RDMA_IB_IP_PS_TCP;
ps = &tcp_ps;
ps = RDMA_PS_TCP;
} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
(sid == (RDMA_IB_IP_PS_UDP & mask))) {
sid_ps = RDMA_IB_IP_PS_UDP;
ps = &udp_ps;
ps = RDMA_PS_UDP;
}
if (ps) {
@@ -2496,7 +2824,7 @@ static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
static int cma_get_port(struct rdma_id_private *id_priv)
{
struct idr *ps;
enum rdma_port_space ps;
int ret;
if (cma_family(id_priv) != AF_IB)
@@ -3551,11 +3879,10 @@ static void cma_process_remove(struct cma_device *cma_dev)
wait_for_completion(&cma_dev->comp);
}
static void cma_remove_one(struct ib_device *device)
static void cma_remove_one(struct ib_device *device, void *client_data)
{
struct cma_device *cma_dev;
struct cma_device *cma_dev = client_data;
cma_dev = ib_get_client_data(device, &cma_client);
if (!cma_dev)
return;

View File

@@ -43,12 +43,58 @@ int ib_device_register_sysfs(struct ib_device *device,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sysfs_setup(void);
void ib_sysfs_cleanup(void);
int ib_cache_setup(void);
void ib_cache_setup(void);
void ib_cache_cleanup(void);
int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
struct ib_qp_attr *qp_attr, int *qp_attr_mask);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_filter filter,
void *filter_cookie,
roce_netdev_callback cb,
void *cookie);
void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
void *filter_cookie,
roce_netdev_callback cb,
void *cookie);
int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
u8 port, struct net_device *ndev,
u16 *index);
enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_SET,
IB_CACHE_GID_DEFAULT_MODE_DELETE
};
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
enum ib_cache_gid_default_mode mode);
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr);
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr);
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *ndev);
int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
int roce_rescan_device(struct ib_device *ib_dev);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
#endif /* _CORE_PRIV_H */

View File

@@ -38,7 +38,10 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <rdma/rdma_netlink.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include "core_priv.h"
@@ -50,22 +53,34 @@ struct ib_client_data {
struct list_head list;
struct ib_client *client;
void * data;
/* The device or client is going down. Do not call client or device
* callbacks other than remove(). */
bool going_down;
};
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
/* The device_list and client_list contain devices and clients after their
* registration has completed, and the devices and clients are removed
* during unregistration. */
static LIST_HEAD(device_list);
static LIST_HEAD(client_list);
/*
* device_mutex protects access to both device_list and client_list.
* There's no real point to using multiple locks or something fancier
* like an rwsem: we always access both lists, and we're always
* modifying one list or the other list. In any case this is not a
* hot path so there's no point in trying to optimize.
* device_mutex and lists_rwsem protect access to both device_list and
* client_list. device_mutex protects writer access by device and client
* registration / de-registration. lists_rwsem protects reader access to
* these lists. Iterators of these lists must lock it for read, while updates
* to the lists must be done with a write lock. A special case is when the
* device_mutex is locked. In this case locking the lists for read access is
* not necessary as the device_mutex implies it.
*
* lists_rwsem also protects access to the client data list.
*/
static DEFINE_MUTEX(device_mutex);
static DECLARE_RWSEM(lists_rwsem);
static int ib_device_check_mandatory(struct ib_device *device)
{
@@ -152,6 +167,36 @@ static int alloc_name(char *name)
return 0;
}
static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
ib_cache_release_one(dev);
kfree(dev->port_immutable);
kfree(dev);
}
static int ib_device_uevent(struct device *device,
struct kobj_uevent_env *env)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
if (add_uevent_var(env, "NAME=%s", dev->name))
return -ENOMEM;
/*
* It would be nice to pass the node GUID with the event...
*/
return 0;
}
static struct class ib_class = {
.name = "infiniband",
.dev_release = ib_device_release,
.dev_uevent = ib_device_uevent,
};
/**
* ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
@@ -164,9 +209,27 @@ static int alloc_name(char *name)
*/
struct ib_device *ib_alloc_device(size_t size)
{
BUG_ON(size < sizeof (struct ib_device));
struct ib_device *device;
return kzalloc(size, GFP_KERNEL);
if (WARN_ON(size < sizeof(struct ib_device)))
return NULL;
device = kzalloc(size, GFP_KERNEL);
if (!device)
return NULL;
device->dev.class = &ib_class;
device_initialize(&device->dev);
dev_set_drvdata(&device->dev, device);
INIT_LIST_HEAD(&device->event_handler_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list);
return device;
}
EXPORT_SYMBOL(ib_alloc_device);
@@ -178,13 +241,8 @@ EXPORT_SYMBOL(ib_alloc_device);
*/
void ib_dealloc_device(struct ib_device *device)
{
if (device->reg_state == IB_DEV_UNINITIALIZED) {
kfree(device);
return;
}
BUG_ON(device->reg_state != IB_DEV_UNREGISTERED);
WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
device->reg_state != IB_DEV_UNINITIALIZED);
kobject_put(&device->dev.kobj);
}
EXPORT_SYMBOL(ib_dealloc_device);
@@ -203,10 +261,13 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
context->client = client;
context->data = NULL;
context->going_down = false;
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
list_add(&context->list, &device->client_data_list);
spin_unlock_irqrestore(&device->client_data_lock, flags);
up_write(&lists_rwsem);
return 0;
}
@@ -219,7 +280,7 @@ static int verify_immutable(const struct ib_device *dev, u8 port)
static int read_port_immutable(struct ib_device *device)
{
int ret = -ENOMEM;
int ret;
u8 start_port = rdma_start_port(device);
u8 end_port = rdma_end_port(device);
u8 port;
@@ -235,26 +296,18 @@ static int read_port_immutable(struct ib_device *device)
* (end_port + 1),
GFP_KERNEL);
if (!device->port_immutable)
goto err;
return -ENOMEM;
for (port = start_port; port <= end_port; ++port) {
ret = device->get_port_immutable(device, port,
&device->port_immutable[port]);
if (ret)
goto err;
return ret;
if (verify_immutable(device, port)) {
ret = -EINVAL;
goto err;
}
if (verify_immutable(device, port))
return -EINVAL;
}
ret = 0;
goto out;
err:
kfree(device->port_immutable);
out:
return ret;
return 0;
}
/**
@@ -271,6 +324,7 @@ int ib_register_device(struct ib_device *device,
u8, struct kobject *))
{
int ret;
struct ib_client *client;
mutex_lock(&device_mutex);
@@ -285,11 +339,6 @@ int ib_register_device(struct ib_device *device,
goto out;
}
INIT_LIST_HEAD(&device->event_handler_list);
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
ret = read_port_immutable(device);
if (ret) {
printk(KERN_WARNING "Couldn't create per port immutable data %s\n",
@@ -297,27 +346,30 @@ int ib_register_device(struct ib_device *device,
goto out;
}
ret = ib_cache_setup_one(device);
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
goto out;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
kfree(device->port_immutable);
ib_cache_cleanup_one(device);
goto out;
}
list_add_tail(&device->core_list, &device_list);
device->reg_state = IB_DEV_REGISTERED;
{
struct ib_client *client;
list_for_each_entry(client, &client_list, list)
if (client->add && !add_client_context(device, client))
client->add(device);
list_for_each_entry(client, &client_list, list)
if (client->add && !add_client_context(device, client))
client->add(device);
}
out:
down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem);
out:
mutex_unlock(&device_mutex);
return ret;
}
@@ -331,26 +383,37 @@ EXPORT_SYMBOL(ib_register_device);
*/
void ib_unregister_device(struct ib_device *device)
{
struct ib_client *client;
struct ib_client_data *context, *tmp;
unsigned long flags;
mutex_lock(&device_mutex);
list_for_each_entry_reverse(client, &client_list, list)
if (client->remove)
client->remove(device);
down_write(&lists_rwsem);
list_del(&device->core_list);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
context->going_down = true;
spin_unlock_irqrestore(&device->client_data_lock, flags);
downgrade_write(&lists_rwsem);
list_for_each_entry_safe(context, tmp, &device->client_data_list,
list) {
if (context->client->remove)
context->client->remove(device, context->data);
}
up_read(&lists_rwsem);
mutex_unlock(&device_mutex);
ib_device_unregister_sysfs(device);
ib_cache_cleanup_one(device);
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
kfree(context);
spin_unlock_irqrestore(&device->client_data_lock, flags);
up_write(&lists_rwsem);
device->reg_state = IB_DEV_UNREGISTERED;
}
@@ -375,11 +438,14 @@ int ib_register_client(struct ib_client *client)
mutex_lock(&device_mutex);
list_add_tail(&client->list, &client_list);
list_for_each_entry(device, &device_list, core_list)
if (client->add && !add_client_context(device, client))
client->add(device);
down_write(&lists_rwsem);
list_add_tail(&client->list, &client_list);
up_write(&lists_rwsem);
mutex_unlock(&device_mutex);
return 0;
@@ -402,19 +468,41 @@ void ib_unregister_client(struct ib_client *client)
mutex_lock(&device_mutex);
list_for_each_entry(device, &device_list, core_list) {
if (client->remove)
client->remove(device);
down_write(&lists_rwsem);
list_del(&client->list);
up_write(&lists_rwsem);
list_for_each_entry(device, &device_list, core_list) {
struct ib_client_data *found_context = NULL;
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
if (context->client == client) {
list_del(&context->list);
kfree(context);
context->going_down = true;
found_context = context;
break;
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
up_write(&lists_rwsem);
if (client->remove)
client->remove(device, found_context ?
found_context->data : NULL);
if (!found_context) {
pr_warn("No client context found for %s/%s\n",
device->name, client->name);
continue;
}
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
list_del(&found_context->list);
kfree(found_context);
spin_unlock_irqrestore(&device->client_data_lock, flags);
up_write(&lists_rwsem);
}
list_del(&client->list);
mutex_unlock(&device_mutex);
}
@@ -590,10 +678,79 @@ EXPORT_SYMBOL(ib_query_port);
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid)
{
if (rdma_cap_roce_gid_table(device, port_num))
return ib_get_cached_gid(device, port_num, index, gid);
return device->query_gid(device, port_num, index, gid);
}
EXPORT_SYMBOL(ib_query_gid);
/**
* ib_enum_roce_netdev - enumerate all RoCE ports
* @ib_dev : IB device we want to query
* @filter: Should we call the callback?
* @filter_cookie: Cookie passed to filter
* @cb: Callback to call for each found RoCE ports
* @cookie: Cookie passed back to the callback
*
* Enumerates all of the physical RoCE ports of ib_dev
* which are related to netdevice and calls callback() on each
* device for which filter() function returns non zero.
*/
void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_filter filter,
void *filter_cookie,
roce_netdev_callback cb,
void *cookie)
{
u8 port;
for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev);
port++)
if (rdma_protocol_roce(ib_dev, port)) {
struct net_device *idev = NULL;
if (ib_dev->get_netdev)
idev = ib_dev->get_netdev(ib_dev, port);
if (idev &&
idev->reg_state >= NETREG_UNREGISTERED) {
dev_put(idev);
idev = NULL;
}
if (filter(ib_dev, port, idev, filter_cookie))
cb(ib_dev, port, idev, cookie);
if (idev)
dev_put(idev);
}
}
/**
* ib_enum_all_roce_netdevs - enumerate all RoCE devices
* @filter: Should we call the callback?
* @filter_cookie: Cookie passed to filter
* @cb: Callback to call for each found RoCE ports
* @cookie: Cookie passed back to the callback
*
* Enumerates all RoCE devices' physical ports which are related
* to netdevices and calls callback() on each device for which
* filter() function returns non zero.
*/
void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
void *filter_cookie,
roce_netdev_callback cb,
void *cookie)
{
struct ib_device *dev;
down_read(&lists_rwsem);
list_for_each_entry(dev, &device_list, core_list)
ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
up_read(&lists_rwsem);
}
/**
* ib_query_pkey - Get P_Key table entry
* @device:Device to query
@@ -673,6 +830,14 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
if (rdma_cap_roce_gid_table(device, port)) {
if (!ib_cache_gid_find_by_port(device, gid, port,
NULL, index)) {
*port_num = port;
return 0;
}
}
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid);
if (ret)
@@ -729,6 +894,51 @@ int ib_find_pkey(struct ib_device *device,
}
EXPORT_SYMBOL(ib_find_pkey);
/**
* ib_get_net_dev_by_params() - Return the appropriate net_dev
* for a received CM request
* @dev: An RDMA device on which the request has been received.
* @port: Port number on the RDMA device.
* @pkey: The Pkey the request came on.
* @gid: A GID that the net_dev uses to communicate.
* @addr: Contains the IP address that the request specified as its
* destination.
*/
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
u8 port,
u16 pkey,
const union ib_gid *gid,
const struct sockaddr *addr)
{
struct net_device *net_dev = NULL;
struct ib_client_data *context;
if (!rdma_protocol_ib(dev, port))
return NULL;
down_read(&lists_rwsem);
list_for_each_entry(context, &dev->client_data_list, list) {
struct ib_client *client = context->client;
if (context->going_down)
continue;
if (client->get_net_dev_by_params) {
net_dev = client->get_net_dev_by_params(dev, port, pkey,
gid, addr,
context->data);
if (net_dev)
break;
}
}
up_read(&lists_rwsem);
return net_dev;
}
EXPORT_SYMBOL(ib_get_net_dev_by_params);
static int __init ib_core_init(void)
{
int ret;
@@ -737,7 +947,7 @@ static int __init ib_core_init(void)
if (!ib_wq)
return -ENOMEM;
ret = ib_sysfs_setup();
ret = class_register(&ib_class);
if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
goto err;
@@ -749,19 +959,12 @@ static int __init ib_core_init(void)
goto err_sysfs;
}
ret = ib_cache_setup();
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
goto err_nl;
}
ib_cache_setup();
return 0;
err_nl:
ibnl_cleanup();
err_sysfs:
ib_sysfs_cleanup();
class_unregister(&ib_class);
err:
destroy_workqueue(ib_wq);
@@ -772,7 +975,7 @@ static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
ibnl_cleanup();
ib_sysfs_cleanup();
class_unregister(&ib_class);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
}

View File

@@ -338,13 +338,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
goto error1;
}
mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(mad_agent_priv->agent.mr)) {
ret = ERR_PTR(-ENOMEM);
goto error2;
}
if (mad_reg_req) {
reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
if (!reg_req) {
@@ -429,8 +422,6 @@ error4:
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
kfree(reg_req);
error3:
ib_dereg_mr(mad_agent_priv->agent.mr);
error2:
kfree(mad_agent_priv);
error1:
return ret;
@@ -590,7 +581,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
wait_for_completion(&mad_agent_priv->comp);
kfree(mad_agent_priv->reg_req);
ib_dereg_mr(mad_agent_priv->agent.mr);
kfree(mad_agent_priv);
}
@@ -1038,7 +1028,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
mad_send_wr->mad_agent_priv = mad_agent_priv;
mad_send_wr->sg_list[0].length = hdr_len;
mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey;
/* OPA MADs don't have to be the full 2048 bytes */
if (opa && base_version == OPA_MGMT_BASE_VERSION &&
@@ -1047,7 +1037,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
else
mad_send_wr->sg_list[1].length = mad_size - hdr_len;
mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
@@ -2885,7 +2875,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
/* Initialize common scatter list fields */
sg_list.lkey = (*qp_info->port_priv->mr).lkey;
sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
/* Initialize common receive WR fields */
recv_wr.next = NULL;
@@ -3201,13 +3191,6 @@ static int ib_mad_port_open(struct ib_device *device,
goto error4;
}
port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(port_priv->mr)) {
dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n");
ret = PTR_ERR(port_priv->mr);
goto error5;
}
if (has_smi) {
ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
if (ret)
@@ -3248,8 +3231,6 @@ error8:
error7:
destroy_mad_qp(&port_priv->qp_info[0]);
error6:
ib_dereg_mr(port_priv->mr);
error5:
ib_dealloc_pd(port_priv->pd);
error4:
ib_destroy_cq(port_priv->cq);
@@ -3284,7 +3265,6 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_workqueue(port_priv->wq);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
ib_dereg_mr(port_priv->mr);
ib_dealloc_pd(port_priv->pd);
ib_destroy_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
@@ -3335,7 +3315,7 @@ error:
}
}
static void ib_mad_remove_device(struct ib_device *device)
static void ib_mad_remove_device(struct ib_device *device, void *client_data)
{
int i;

View File

@@ -199,7 +199,6 @@ struct ib_mad_port_private {
int port_num;
struct ib_cq *cq;
struct ib_pd *pd;
struct ib_mr *mr;
spinlock_t reg_lock;
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];

View File

@@ -43,7 +43,7 @@
#include "sa.h"
static void mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);
static struct ib_client mcast_client = {
.name = "ib_multicast",
@@ -840,13 +840,12 @@ static void mcast_add_one(struct ib_device *device)
ib_register_event_handler(&dev->event_handler);
}
static void mcast_remove_one(struct ib_device *device)
static void mcast_remove_one(struct ib_device *device, void *client_data)
{
struct mcast_device *dev;
struct mcast_device *dev = client_data;
struct mcast_port *port;
int i;
dev = ib_get_client_data(device, &mcast_client);
if (!dev)
return;

View File

@@ -49,6 +49,14 @@ static DEFINE_MUTEX(ibnl_mutex);
static struct sock *nls;
static LIST_HEAD(client_list);
int ibnl_chk_listeners(unsigned int group)
{
if (netlink_has_listeners(nls, group) == 0)
return -1;
return 0;
}
EXPORT_SYMBOL(ibnl_chk_listeners);
int ibnl_add_client(int index, int nops,
const struct ibnl_client_cbs cb_table[])
{
@@ -151,6 +159,23 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
!client->cb_table[op].dump)
return -EINVAL;
/*
* For response or local service set_timeout request,
* there is no need to use netlink_dump_start.
*/
if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
(index == RDMA_NL_LS &&
op == RDMA_NL_LS_OP_SET_TIMEOUT)) {
struct netlink_callback cb = {
.skb = skb,
.nlh = nlh,
.dump = client->cb_table[op].dump,
.module = client->cb_table[op].module,
};
return cb.dump(skb, &cb);
}
{
struct netlink_dump_control c = {
.dump = client->cb_table[op].dump,
@@ -165,9 +190,39 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
}
static void ibnl_rcv_reply_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh;
int msglen;
/*
* Process responses until there is no more message or the first
* request. Generally speaking, it is not recommended to mix responses
* with requests.
*/
while (skb->len >= nlmsg_total_size(0)) {
nlh = nlmsg_hdr(skb);
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
return;
/* Handle response only */
if (nlh->nlmsg_flags & NLM_F_REQUEST)
return;
ibnl_rcv_msg(skb, nlh);
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
msglen = skb->len;
skb_pull(skb, msglen);
}
}
static void ibnl_rcv(struct sk_buff *skb)
{
mutex_lock(&ibnl_mutex);
ibnl_rcv_reply_skb(skb);
netlink_rcv_skb(skb, &ibnl_rcv_msg);
mutex_unlock(&ibnl_mutex);
}

View File

@@ -0,0 +1,728 @@
/*
* Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "core_priv.h"
#include <linux/in.h>
#include <linux/in6.h>
/* For in6_dev_get/in6_dev_put */
#include <net/addrconf.h>
#include <net/bonding.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
enum gid_op_type {
GID_DEL = 0,
GID_ADD
};
struct update_gid_event_work {
struct work_struct work;
union ib_gid gid;
struct ib_gid_attr gid_attr;
enum gid_op_type gid_op;
};
#define ROCE_NETDEV_CALLBACK_SZ 3
struct netdev_event_work_cmd {
roce_netdev_callback cb;
roce_netdev_filter filter;
struct net_device *ndev;
struct net_device *filter_ndev;
};
struct netdev_event_work {
struct work_struct work;
struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ];
};
static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
u8 port, union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
switch (gid_op) {
case GID_ADD:
ib_cache_gid_add(ib_dev, port, gid, gid_attr);
break;
case GID_DEL:
ib_cache_gid_del(ib_dev, port, gid, gid_attr);
break;
}
}
enum bonding_slave_state {
BONDING_SLAVE_STATE_ACTIVE = 1UL << 0,
BONDING_SLAVE_STATE_INACTIVE = 1UL << 1,
/* No primary slave or the device isn't a slave in bonding */
BONDING_SLAVE_STATE_NA = 1UL << 2,
};
static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev,
struct net_device *upper)
{
if (upper && netif_is_bond_master(upper)) {
struct net_device *pdev =
bond_option_active_slave_get_rcu(netdev_priv(upper));
if (pdev)
return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE :
BONDING_SLAVE_STATE_INACTIVE;
}
return BONDING_SLAVE_STATE_NA;
}
static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper)
{
struct net_device *_upper = NULL;
struct list_head *iter;
netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
if (_upper == upper)
break;
return _upper == upper;
}
#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
BONDING_SLAVE_STATE_NA)
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *event_ndev = (struct net_device *)cookie;
struct net_device *real_dev;
int res;
if (!rdma_ndev)
return 0;
rcu_read_lock();
real_dev = rdma_vlan_dev_real_dev(event_ndev);
if (!real_dev)
real_dev = event_ndev;
res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
rcu_read_unlock();
return res;
}
static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *master_dev;
int res;
if (!rdma_ndev)
return 0;
rcu_read_lock();
master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
res = is_eth_active_slave_of_bonding_rcu(rdma_ndev, master_dev) ==
BONDING_SLAVE_STATE_INACTIVE;
rcu_read_unlock();
return res;
}
static int pass_all_filter(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
return 1;
}
static int upper_device_filter(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *event_ndev = (struct net_device *)cookie;
int res;
if (!rdma_ndev)
return 0;
if (rdma_ndev == event_ndev)
return 1;
rcu_read_lock();
res = is_upper_dev_rcu(rdma_ndev, event_ndev);
rcu_read_unlock();
return res;
}
static void update_gid_ip(enum gid_op_type gid_op,
struct ib_device *ib_dev,
u8 port, struct net_device *ndev,
struct sockaddr *addr)
{
union ib_gid gid;
struct ib_gid_attr gid_attr;
rdma_ip2gid(addr, &gid);
memset(&gid_attr, 0, sizeof(gid_attr));
gid_attr.ndev = ndev;
update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
}
static void enum_netdev_default_gids(struct ib_device *ib_dev,
u8 port, struct net_device *event_ndev,
struct net_device *rdma_ndev)
{
rcu_read_lock();
if (!rdma_ndev ||
((rdma_ndev != event_ndev &&
!is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
is_eth_active_slave_of_bonding_rcu(rdma_ndev,
netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
BONDING_SLAVE_STATE_INACTIVE)) {
rcu_read_unlock();
return;
}
rcu_read_unlock();
ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
IB_CACHE_GID_DEFAULT_MODE_SET);
}
static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
u8 port,
struct net_device *event_ndev,
struct net_device *rdma_ndev)
{
struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
if (!rdma_ndev)
return;
if (!real_dev)
real_dev = event_ndev;
rcu_read_lock();
if (is_upper_dev_rcu(rdma_ndev, event_ndev) &&
is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
BONDING_SLAVE_STATE_INACTIVE) {
rcu_read_unlock();
ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
IB_CACHE_GID_DEFAULT_MODE_DELETE);
} else {
rcu_read_unlock();
}
}
static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
u8 port, struct net_device *ndev)
{
struct in_device *in_dev;
if (ndev->reg_state >= NETREG_UNREGISTERING)
return;
in_dev = in_dev_get(ndev);
if (!in_dev)
return;
for_ifa(in_dev) {
struct sockaddr_in ip;
ip.sin_family = AF_INET;
ip.sin_addr.s_addr = ifa->ifa_address;
update_gid_ip(GID_ADD, ib_dev, port, ndev,
(struct sockaddr *)&ip);
}
endfor_ifa(in_dev);
in_dev_put(in_dev);
}
static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
u8 port, struct net_device *ndev)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *in6_dev;
struct sin6_list {
struct list_head list;
struct sockaddr_in6 sin6;
};
struct sin6_list *sin6_iter;
struct sin6_list *sin6_temp;
struct ib_gid_attr gid_attr = {.ndev = ndev};
LIST_HEAD(sin6_list);
if (ndev->reg_state >= NETREG_UNREGISTERING)
return;
in6_dev = in6_dev_get(ndev);
if (!in6_dev)
return;
read_lock_bh(&in6_dev->lock);
list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv6 update\n");
continue;
}
entry->sin6.sin6_family = AF_INET6;
entry->sin6.sin6_addr = ifp->addr;
list_add_tail(&entry->list, &sin6_list);
}
read_unlock_bh(&in6_dev->lock);
in6_dev_put(in6_dev);
list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) {
union ib_gid gid;
rdma_ip2gid((struct sockaddr *)&sin6_iter->sin6, &gid);
update_gid(GID_ADD, ib_dev, port, &gid, &gid_attr);
list_del(&sin6_iter->list);
kfree(sin6_iter);
}
}
static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *ndev)
{
enum_netdev_ipv4_ips(ib_dev, port, ndev);
if (IS_ENABLED(CONFIG_IPV6))
enum_netdev_ipv6_ips(ib_dev, port, ndev);
}
static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *event_ndev = (struct net_device *)cookie;
enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
_add_netdev_ips(ib_dev, port, event_ndev);
}
static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *event_ndev = (struct net_device *)cookie;
ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
u8 port,
struct net_device *rdma_ndev,
void *cookie)
{
struct net *net;
struct net_device *ndev;
/* Lock the rtnl to make sure the netdevs does not move under
* our feet
*/
rtnl_lock();
for_each_net(net)
for_each_netdev(net, ndev)
if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
rtnl_unlock();
}
/* This function will rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices. */
int roce_rescan_device(struct ib_device *ib_dev)
{
ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
enum_all_gids_of_dev_cb, NULL);
return 0;
}
static void callback_for_addr_gid_device_scan(struct ib_device *device,
u8 port,
struct net_device *rdma_ndev,
void *cookie)
{
struct update_gid_event_work *parsed = cookie;
return update_gid(parsed->gid_op, device,
port, &parsed->gid,
&parsed->gid_attr);
}
static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
void *cookie,
void (*handle_netdev)(struct ib_device *ib_dev,
u8 port,
struct net_device *ndev))
{
struct net_device *ndev = (struct net_device *)cookie;
struct upper_list {
struct list_head list;
struct net_device *upper;
};
struct net_device *upper;
struct list_head *iter;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
rcu_read_lock();
netdev_for_each_all_upper_dev_rcu(ndev, upper, iter) {
struct upper_list *entry = kmalloc(sizeof(*entry),
GFP_ATOMIC);
if (!entry) {
pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n");
continue;
}
list_add_tail(&entry->list, &upper_list);
dev_hold(upper);
entry->upper = upper;
}
rcu_read_unlock();
handle_netdev(ib_dev, port, ndev);
list_for_each_entry_safe(upper_iter, upper_temp, &upper_list,
list) {
handle_netdev(ib_dev, port, upper_iter->upper);
dev_put(upper_iter->upper);
list_del(&upper_iter->list);
kfree(upper_iter);
}
}
static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *event_ndev)
{
ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
}
static void del_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
}
static void add_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips);
}
static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev,
void *cookie)
{
struct net_device *master_ndev;
rcu_read_lock();
master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev);
if (master_ndev)
dev_hold(master_ndev);
rcu_read_unlock();
if (master_ndev) {
bond_delete_netdev_default_gids(ib_dev, port, master_ndev,
rdma_ndev);
dev_put(master_ndev);
}
}
static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *event_ndev = (struct net_device *)cookie;
bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
}
/* The following functions operate on all IB devices. netdevice_event and
* addr_event execute ib_enum_all_roce_netdevs through a work.
* ib_enum_all_roce_netdevs iterates through all IB devices.
*/
static void netdevice_event_work_handler(struct work_struct *_work)
{
struct netdev_event_work *work =
container_of(_work, struct netdev_event_work, work);
unsigned int i;
for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) {
ib_enum_all_roce_netdevs(work->cmds[i].filter,
work->cmds[i].filter_ndev,
work->cmds[i].cb,
work->cmds[i].ndev);
dev_put(work->cmds[i].ndev);
dev_put(work->cmds[i].filter_ndev);
}
kfree(work);
}
static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
struct net_device *ndev)
{
unsigned int i;
struct netdev_event_work *ndev_work =
kmalloc(sizeof(*ndev_work), GFP_KERNEL);
if (!ndev_work) {
pr_warn("roce_gid_mgmt: can't allocate work for netdevice_event\n");
return NOTIFY_DONE;
}
memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
if (!ndev_work->cmds[i].ndev)
ndev_work->cmds[i].ndev = ndev;
if (!ndev_work->cmds[i].filter_ndev)
ndev_work->cmds[i].filter_ndev = ndev;
dev_hold(ndev_work->cmds[i].ndev);
dev_hold(ndev_work->cmds[i].filter_ndev);
}
INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
queue_work(ib_wq, &ndev_work->work);
return NOTIFY_DONE;
}
static const struct netdev_event_work_cmd add_cmd = {
.cb = add_netdev_ips, .filter = is_eth_port_of_netdev};
static const struct netdev_event_work_cmd add_cmd_upper_ips = {
.cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev};
static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info,
struct netdev_event_work_cmd *cmds)
{
static const struct netdev_event_work_cmd upper_ips_del_cmd = {
.cb = del_netdev_upper_ips, .filter = upper_device_filter};
static const struct netdev_event_work_cmd bonding_default_del_cmd = {
.cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave};
if (changeupper_info->linking == false) {
cmds[0] = upper_ips_del_cmd;
cmds[0].ndev = changeupper_info->upper_dev;
cmds[1] = add_cmd;
} else {
cmds[0] = bonding_default_del_cmd;
cmds[0].ndev = changeupper_info->upper_dev;
cmds[1] = add_cmd_upper_ips;
cmds[1].ndev = changeupper_info->upper_dev;
cmds[1].filter_ndev = changeupper_info->upper_dev;
}
}
static int netdevice_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
static const struct netdev_event_work_cmd del_cmd = {
.cb = del_netdev_ips, .filter = pass_all_filter};
static const struct netdev_event_work_cmd bonding_default_del_cmd_join = {
.cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave};
static const struct netdev_event_work_cmd default_del_cmd = {
.cb = del_netdev_default_ips, .filter = pass_all_filter};
static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
.cb = del_netdev_upper_ips, .filter = upper_device_filter};
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} };
if (ndev->type != ARPHRD_ETHER)
return NOTIFY_DONE;
switch (event) {
case NETDEV_REGISTER:
case NETDEV_UP:
cmds[0] = bonding_default_del_cmd_join;
cmds[1] = add_cmd;
break;
case NETDEV_UNREGISTER:
if (ndev->reg_state < NETREG_UNREGISTERED)
cmds[0] = del_cmd;
else
return NOTIFY_DONE;
break;
case NETDEV_CHANGEADDR:
cmds[0] = default_del_cmd;
cmds[1] = add_cmd;
break;
case NETDEV_CHANGEUPPER:
netdevice_event_changeupper(
container_of(ptr, struct netdev_notifier_changeupper_info, info),
cmds);
break;
case NETDEV_BONDING_FAILOVER:
cmds[0] = bonding_event_ips_del_cmd;
cmds[1] = bonding_default_del_cmd_join;
cmds[2] = add_cmd_upper_ips;
break;
default:
return NOTIFY_DONE;
}
return netdevice_queue_work(cmds, ndev);
}
static void update_gid_event_work_handler(struct work_struct *_work)
{
struct update_gid_event_work *work =
container_of(_work, struct update_gid_event_work, work);
ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev,
callback_for_addr_gid_device_scan, work);
dev_put(work->gid_attr.ndev);
kfree(work);
}
static int addr_event(struct notifier_block *this, unsigned long event,
struct sockaddr *sa, struct net_device *ndev)
{
struct update_gid_event_work *work;
enum gid_op_type gid_op;
if (ndev->type != ARPHRD_ETHER)
return NOTIFY_DONE;
switch (event) {
case NETDEV_UP:
gid_op = GID_ADD;
break;
case NETDEV_DOWN:
gid_op = GID_DEL;
break;
default:
return NOTIFY_DONE;
}
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
return NOTIFY_DONE;
}
INIT_WORK(&work->work, update_gid_event_work_handler);
rdma_ip2gid(sa, &work->gid);
work->gid_op = gid_op;
memset(&work->gid_attr, 0, sizeof(work->gid_attr));
dev_hold(ndev);
work->gid_attr.ndev = ndev;
queue_work(ib_wq, &work->work);
return NOTIFY_DONE;
}
static int inetaddr_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct sockaddr_in in;
struct net_device *ndev;
struct in_ifaddr *ifa = ptr;
in.sin_family = AF_INET;
in.sin_addr.s_addr = ifa->ifa_address;
ndev = ifa->ifa_dev->dev;
return addr_event(this, event, (struct sockaddr *)&in, ndev);
}
static int inet6addr_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct sockaddr_in6 in6;
struct net_device *ndev;
struct inet6_ifaddr *ifa6 = ptr;
in6.sin6_family = AF_INET6;
in6.sin6_addr = ifa6->addr;
ndev = ifa6->idev->dev;
return addr_event(this, event, (struct sockaddr *)&in6, ndev);
}
static struct notifier_block nb_netdevice = {
.notifier_call = netdevice_event
};
static struct notifier_block nb_inetaddr = {
.notifier_call = inetaddr_event
};
static struct notifier_block nb_inet6addr = {
.notifier_call = inet6addr_event
};
int __init roce_gid_mgmt_init(void)
{
register_inetaddr_notifier(&nb_inetaddr);
if (IS_ENABLED(CONFIG_IPV6))
register_inet6addr_notifier(&nb_inet6addr);
/* We relay on the netdevice notifier to enumerate all
* existing devices in the system. Register to this notifier
* last to make sure we will not miss any IP add/del
* callbacks.
*/
register_netdevice_notifier(&nb_netdevice);
return 0;
}
void __exit roce_gid_mgmt_cleanup(void)
{
if (IS_ENABLED(CONFIG_IPV6))
unregister_inet6addr_notifier(&nb_inet6addr);
unregister_inetaddr_notifier(&nb_inetaddr);
unregister_netdevice_notifier(&nb_netdevice);
/* Ensure all gid deletion tasks complete before we go down,
* to avoid any reference to free'd memory. By the time
* ib-core is removed, all physical devices have been removed,
* so no issue with remaining hardware contexts.
*/
}

View File

@@ -45,12 +45,21 @@
#include <uapi/linux/if_ether.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_cache.h>
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>
#include <uapi/rdma/ib_user_sa.h>
#include <rdma/ib_marshall.h>
#include "sa.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
MODULE_LICENSE("Dual BSD/GPL");
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
struct ib_sa_sm_ah {
struct ib_ah *ah;
struct kref ref;
@@ -80,8 +89,16 @@ struct ib_sa_query {
struct ib_mad_send_buf *mad_buf;
struct ib_sa_sm_ah *sm_ah;
int id;
u32 flags;
struct list_head list; /* Local svc request list */
u32 seq; /* Local svc request sequence number */
unsigned long timeout; /* Local svc timeout */
u8 path_use; /* How will the pathrecord be used */
};
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
#define IB_SA_CANCEL 0x00000002
struct ib_sa_service_query {
void (*callback)(int, struct ib_sa_service_rec *, void *);
void *context;
@@ -106,8 +123,28 @@ struct ib_sa_mcmember_query {
struct ib_sa_query sa_query;
};
static LIST_HEAD(ib_nl_request_list);
static DEFINE_SPINLOCK(ib_nl_request_lock);
static atomic_t ib_nl_sa_request_seq;
static struct workqueue_struct *ib_nl_wq;
static struct delayed_work ib_nl_timed_work;
static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
[LS_NLA_TYPE_PATH_RECORD] = {.type = NLA_BINARY,
.len = sizeof(struct ib_path_rec_data)},
[LS_NLA_TYPE_TIMEOUT] = {.type = NLA_U32},
[LS_NLA_TYPE_SERVICE_ID] = {.type = NLA_U64},
[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
.len = sizeof(struct rdma_nla_ls_gid)},
[LS_NLA_TYPE_SGID] = {.type = NLA_BINARY,
.len = sizeof(struct rdma_nla_ls_gid)},
[LS_NLA_TYPE_TCLASS] = {.type = NLA_U8},
[LS_NLA_TYPE_PKEY] = {.type = NLA_U16},
[LS_NLA_TYPE_QOS_CLASS] = {.type = NLA_U16},
};
static void ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device, void *client_data);
static struct ib_client sa_client = {
.name = "sa",
@@ -381,6 +418,427 @@ static const struct ib_field guidinfo_rec_table[] = {
.size_bits = 512 },
};
static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
{
query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
}
static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
{
return (query->flags & IB_SA_CANCEL);
}
static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
struct ib_sa_query *query)
{
struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1];
struct ib_sa_mad *mad = query->mad_buf->mad;
ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
u16 val16;
u64 val64;
struct rdma_ls_resolve_header *header;
query->mad_buf->context[1] = NULL;
/* Construct the family header first */
header = (struct rdma_ls_resolve_header *)
skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
memcpy(header->device_name, query->port->agent->device->name,
LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num;
if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
sa_rec->reversible != 0)
query->path_use = LS_RESOLVE_PATH_USE_GMP;
else
query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
header->path_use = query->path_use;
/* Now build the attributes */
if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
val64 = be64_to_cpu(sa_rec->service_id);
nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
sizeof(val64), &val64);
}
if (comp_mask & IB_SA_PATH_REC_DGID)
nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
sizeof(sa_rec->dgid), &sa_rec->dgid);
if (comp_mask & IB_SA_PATH_REC_SGID)
nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
sizeof(sa_rec->sgid), &sa_rec->sgid);
if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
if (comp_mask & IB_SA_PATH_REC_PKEY) {
val16 = be16_to_cpu(sa_rec->pkey);
nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
sizeof(val16), &val16);
}
if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
val16 = be16_to_cpu(sa_rec->qos_class);
nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
sizeof(val16), &val16);
}
}
static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
{
int len = 0;
if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
len += nla_total_size(sizeof(u64));
if (comp_mask & IB_SA_PATH_REC_DGID)
len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
if (comp_mask & IB_SA_PATH_REC_SGID)
len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
len += nla_total_size(sizeof(u8));
if (comp_mask & IB_SA_PATH_REC_PKEY)
len += nla_total_size(sizeof(u16));
if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
len += nla_total_size(sizeof(u16));
/*
* Make sure that at least some of the required comp_mask bits are
* set.
*/
if (WARN_ON(len == 0))
return len;
/* Add the family header */
len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
return len;
}
static int ib_nl_send_msg(struct ib_sa_query *query)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
void *data;
int ret = 0;
struct ib_sa_mad *mad;
int len;
mad = query->mad_buf->mad;
len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
if (len <= 0)
return -EMSGSIZE;
skb = nlmsg_new(len, GFP_KERNEL);
if (!skb)
return -ENOMEM;
/* Put nlmsg header only for now */
data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
if (!data) {
kfree_skb(skb);
return -EMSGSIZE;
}
/* Add attributes */
ib_nl_set_path_rec_attrs(skb, query);
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
if (!ret)
ret = len;
else
ret = 0;
return ret;
}
static int ib_nl_make_request(struct ib_sa_query *query)
{
unsigned long flags;
unsigned long delay;
int ret;
INIT_LIST_HEAD(&query->list);
query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
spin_lock_irqsave(&ib_nl_request_lock, flags);
ret = ib_nl_send_msg(query);
if (ret <= 0) {
ret = -EIO;
goto request_out;
} else {
ret = 0;
}
delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
request_out:
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
return ret;
}
static int ib_nl_cancel_request(struct ib_sa_query *query)
{
unsigned long flags;
struct ib_sa_query *wait_query;
int found = 0;
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_for_each_entry(wait_query, &ib_nl_request_list, list) {
/* Let the timeout to take care of the callback */
if (query == wait_query) {
query->flags |= IB_SA_CANCEL;
query->timeout = jiffies;
list_move(&query->list, &ib_nl_request_list);
found = 1;
mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
break;
}
}
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
return found;
}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc);
static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
const struct nlmsghdr *nlh)
{
struct ib_mad_send_wc mad_send_wc;
struct ib_sa_mad *mad = NULL;
const struct nlattr *head, *curr;
struct ib_path_rec_data *rec;
int len, rem;
u32 mask = 0;
int status = -EIO;
if (query->callback) {
head = (const struct nlattr *) nlmsg_data(nlh);
len = nlmsg_len(nlh);
switch (query->path_use) {
case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
break;
case LS_RESOLVE_PATH_USE_ALL:
case LS_RESOLVE_PATH_USE_GMP:
default:
mask = IB_PATH_PRIMARY | IB_PATH_GMP |
IB_PATH_BIDIRECTIONAL;
break;
}
nla_for_each_attr(curr, head, len, rem) {
if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
rec = nla_data(curr);
/*
* Get the first one. In the future, we may
* need to get up to 6 pathrecords.
*/
if ((rec->flags & mask) == mask) {
mad = query->mad_buf->mad;
mad->mad_hdr.method |=
IB_MGMT_METHOD_RESP;
memcpy(mad->data, rec->path_rec,
sizeof(rec->path_rec));
status = 0;
break;
}
}
}
query->callback(query, status, mad);
}
mad_send_wc.send_buf = query->mad_buf;
mad_send_wc.status = IB_WC_SUCCESS;
send_handler(query->mad_buf->mad_agent, &mad_send_wc);
}
static void ib_nl_request_timeout(struct work_struct *work)
{
unsigned long flags;
struct ib_sa_query *query;
unsigned long delay;
struct ib_mad_send_wc mad_send_wc;
int ret;
spin_lock_irqsave(&ib_nl_request_lock, flags);
while (!list_empty(&ib_nl_request_list)) {
query = list_entry(ib_nl_request_list.next,
struct ib_sa_query, list);
if (time_after(query->timeout, jiffies)) {
delay = query->timeout - jiffies;
if ((long)delay <= 0)
delay = 1;
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
break;
}
list_del(&query->list);
ib_sa_disable_local_svc(query);
/* Hold the lock to protect against query cancellation */
if (ib_sa_query_cancelled(query))
ret = -1;
else
ret = ib_post_send_mad(query->mad_buf, NULL);
if (ret) {
mad_send_wc.send_buf = query->mad_buf;
mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
send_handler(query->port->agent, &mad_send_wc);
spin_lock_irqsave(&ib_nl_request_lock, flags);
}
}
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
}
static int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
int timeout, delta, abs_delta;
const struct nlattr *attr;
unsigned long flags;
struct ib_sa_query *query;
long delay = 0;
struct nlattr *tb[LS_NLA_TYPE_MAX];
int ret;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
nlmsg_len(nlh), ib_nl_policy);
attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
if (ret || !attr)
goto settimeout_out;
timeout = *(int *) nla_data(attr);
if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
delta = timeout - sa_local_svc_timeout_ms;
if (delta < 0)
abs_delta = -delta;
else
abs_delta = delta;
if (delta != 0) {
spin_lock_irqsave(&ib_nl_request_lock, flags);
sa_local_svc_timeout_ms = timeout;
list_for_each_entry(query, &ib_nl_request_list, list) {
if (delta < 0 && abs_delta > query->timeout)
query->timeout = 0;
else
query->timeout += delta;
/* Get the new delay from the first entry */
if (!delay) {
delay = query->timeout - jiffies;
if (delay <= 0)
delay = 1;
}
}
if (delay)
mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
(unsigned long)delay);
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
}
settimeout_out:
return skb->len;
}
static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
{
struct nlattr *tb[LS_NLA_TYPE_MAX];
int ret;
if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
return 0;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
nlmsg_len(nlh), ib_nl_policy);
if (ret)
return 0;
return 1;
}
static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
unsigned long flags;
struct ib_sa_query *query;
struct ib_mad_send_buf *send_buf;
struct ib_mad_send_wc mad_send_wc;
int found = 0;
int ret;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_for_each_entry(query, &ib_nl_request_list, list) {
/*
* If the query is cancelled, let the timeout routine
* take care of it.
*/
if (nlh->nlmsg_seq == query->seq) {
found = !ib_sa_query_cancelled(query);
if (found)
list_del(&query->list);
break;
}
}
if (!found) {
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
goto resp_out;
}
send_buf = query->mad_buf;
if (!ib_nl_is_good_resolve_resp(nlh)) {
/* if the result is a failure, send out the packet via IB */
ib_sa_disable_local_svc(query);
ret = ib_post_send_mad(query->mad_buf, NULL);
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
if (ret) {
mad_send_wc.send_buf = send_buf;
mad_send_wc.status = IB_WC_GENERAL_ERR;
send_handler(query->port->agent, &mad_send_wc);
}
} else {
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
ib_nl_process_good_resolve_rsp(query, nlh);
}
resp_out:
return skb->len;
}
static struct ibnl_client_cbs ib_sa_cb_table[] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.dump = ib_nl_handle_resolve_resp,
.module = THIS_MODULE },
[RDMA_NL_LS_OP_SET_TIMEOUT] = {
.dump = ib_nl_handle_set_timeout,
.module = THIS_MODULE },
};
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -502,7 +960,13 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
mad_buf = query->mad_buf;
spin_unlock_irqrestore(&idr_lock, flags);
ib_cancel_mad(agent, mad_buf);
/*
* If the query is still on the netlink request list, schedule
* it to be cancelled by the timeout routine. Otherwise, it has been
* sent to the MAD layer and has to be cancelled from there.
*/
if (!ib_nl_cancel_request(query))
ib_cancel_mad(agent, mad_buf);
}
EXPORT_SYMBOL(ib_sa_cancel_query);
@@ -639,6 +1103,14 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
query->mad_buf->context[0] = query;
query->id = id;
if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query))
return id;
}
ib_sa_disable_local_svc(query);
}
ret = ib_post_send_mad(query->mad_buf, NULL);
if (ret) {
spin_lock_irqsave(&idr_lock, flags);
@@ -740,7 +1212,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kmalloc(sizeof *query, gfp_mask);
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
@@ -767,6 +1239,9 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
*sa_query = &query->sa_query;
query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
query->sa_query.mad_buf->context[1] = rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
@@ -862,7 +1337,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
method != IB_SA_METHOD_DELETE)
return -EINVAL;
query = kmalloc(sizeof *query, gfp_mask);
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
@@ -954,7 +1429,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kmalloc(sizeof *query, gfp_mask);
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
@@ -1051,7 +1526,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kmalloc(sizeof *query, gfp_mask);
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
@@ -1221,9 +1696,9 @@ free:
return;
}
static void ib_sa_remove_one(struct ib_device *device)
static void ib_sa_remove_one(struct ib_device *device, void *client_data)
{
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_device *sa_dev = client_data;
int i;
if (!sa_dev)
@@ -1251,6 +1726,8 @@ static int __init ib_sa_init(void)
get_random_bytes(&tid, sizeof tid);
atomic_set(&ib_nl_sa_request_seq, 0);
ret = ib_register_client(&sa_client);
if (ret) {
printk(KERN_ERR "Couldn't register ib_sa client\n");
@@ -1263,7 +1740,25 @@ static int __init ib_sa_init(void)
goto err2;
}
ib_nl_wq = create_singlethread_workqueue("ib_nl_sa_wq");
if (!ib_nl_wq) {
ret = -ENOMEM;
goto err3;
}
if (ibnl_add_client(RDMA_NL_LS, RDMA_NL_LS_NUM_OPS,
ib_sa_cb_table)) {
pr_err("Failed to add netlink callback\n");
ret = -EINVAL;
goto err4;
}
INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
return 0;
err4:
destroy_workqueue(ib_nl_wq);
err3:
mcast_cleanup();
err2:
ib_unregister_client(&sa_client);
err1:
@@ -1272,6 +1767,10 @@ err1:
static void __exit ib_sa_cleanup(void)
{
ibnl_remove_client(RDMA_NL_LS);
cancel_delayed_work(&ib_nl_timed_work);
flush_workqueue(ib_nl_wq);
destroy_workqueue(ib_nl_wq);
mcast_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);

View File

@@ -457,29 +457,6 @@ static struct kobj_type port_type = {
.default_attrs = port_default_attrs
};
static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
kfree(dev->port_immutable);
kfree(dev);
}
static int ib_device_uevent(struct device *device,
struct kobj_uevent_env *env)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
if (add_uevent_var(env, "NAME=%s", dev->name))
return -ENOMEM;
/*
* It would be nice to pass the node GUID with the event...
*/
return 0;
}
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct ib_port *,
struct port_attribute *, char *buf),
@@ -702,12 +679,6 @@ static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_desc
};
static struct class ib_class = {
.name = "infiniband",
.dev_release = ib_device_release,
.dev_uevent = ib_device_uevent,
};
/* Show a given an attribute in the statistics group */
static ssize_t show_protocol_stat(const struct device *device,
struct device_attribute *attr, char *buf,
@@ -846,14 +817,12 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
class_dev->class = &ib_class;
class_dev->parent = device->dma_device;
dev_set_name(class_dev, "%s", device->name);
dev_set_drvdata(class_dev, device);
device->dev.parent = device->dma_device;
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
INIT_LIST_HEAD(&device->port_list);
ret = device_register(class_dev);
ret = device_add(class_dev);
if (ret)
goto err;
@@ -916,13 +885,3 @@ void ib_device_unregister_sysfs(struct ib_device *device)
device_unregister(&device->dev);
}
int ib_sysfs_setup(void)
{
return class_register(&ib_class);
}
void ib_sysfs_cleanup(void)
{
class_unregister(&ib_class);
}

View File

@@ -109,7 +109,7 @@ enum {
#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
static void ib_ucm_add_one(struct ib_device *device);
static void ib_ucm_remove_one(struct ib_device *device);
static void ib_ucm_remove_one(struct ib_device *device, void *client_data);
static struct ib_client ucm_client = {
.name = "ucm",
@@ -658,8 +658,7 @@ static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
if (result)
goto out;
result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask,
NULL);
result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask);
out:
ib_ucm_ctx_put(ctx);
return result;
@@ -1310,9 +1309,9 @@ err:
return;
}
static void ib_ucm_remove_one(struct ib_device *device)
static void ib_ucm_remove_one(struct ib_device *device, void *client_data)
{
struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client);
struct ib_ucm_device *ucm_dev = client_data;
if (!ucm_dev)
return;

View File

@@ -74,6 +74,7 @@ struct ucma_file {
struct list_head ctx_list;
struct list_head event_list;
wait_queue_head_t poll_wait;
struct workqueue_struct *close_wq;
};
struct ucma_context {
@@ -89,6 +90,13 @@ struct ucma_context {
struct list_head list;
struct list_head mc_list;
/* mark that device is in process of destroying the internal HW
* resources, protected by the global mut
*/
int closing;
/* sync between removal event and id destroy, protected by file mut */
int destroying;
struct work_struct close_work;
};
struct ucma_multicast {
@@ -107,6 +115,7 @@ struct ucma_event {
struct list_head list;
struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
struct work_struct close_work;
};
static DEFINE_MUTEX(mut);
@@ -132,8 +141,12 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
mutex_lock(&mut);
ctx = _ucma_find_context(id, file);
if (!IS_ERR(ctx))
atomic_inc(&ctx->ref);
if (!IS_ERR(ctx)) {
if (ctx->closing)
ctx = ERR_PTR(-EIO);
else
atomic_inc(&ctx->ref);
}
mutex_unlock(&mut);
return ctx;
}
@@ -144,6 +157,28 @@ static void ucma_put_ctx(struct ucma_context *ctx)
complete(&ctx->comp);
}
static void ucma_close_event_id(struct work_struct *work)
{
struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
rdma_destroy_id(uevent_close->cm_id);
kfree(uevent_close);
}
static void ucma_close_id(struct work_struct *work)
{
struct ucma_context *ctx = container_of(work, struct ucma_context, close_work);
/* once all inflight tasks are finished, we close all underlying
* resources. The context is still alive till its explicit destryoing
* by its creator.
*/
ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
}
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
{
struct ucma_context *ctx;
@@ -152,6 +187,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
if (!ctx)
return NULL;
INIT_WORK(&ctx->close_work, ucma_close_id);
atomic_set(&ctx->ref, 1);
init_completion(&ctx->comp);
INIT_LIST_HEAD(&ctx->mc_list);
@@ -242,6 +278,44 @@ static void ucma_set_event_context(struct ucma_context *ctx,
}
}
/* Called with file->mut locked for the relevant context. */
static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
{
struct ucma_context *ctx = cm_id->context;
struct ucma_event *con_req_eve;
int event_found = 0;
if (ctx->destroying)
return;
/* only if context is pointing to cm_id that it owns it and can be
* queued to be closed, otherwise that cm_id is an inflight one that
* is part of that context event list pending to be detached and
* reattached to its new context as part of ucma_get_event,
* handled separately below.
*/
if (ctx->cm_id == cm_id) {
mutex_lock(&mut);
ctx->closing = 1;
mutex_unlock(&mut);
queue_work(ctx->file->close_wq, &ctx->close_work);
return;
}
list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
if (con_req_eve->cm_id == cm_id &&
con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
list_del(&con_req_eve->list);
INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
queue_work(ctx->file->close_wq, &con_req_eve->close_work);
event_found = 1;
break;
}
}
if (!event_found)
printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n");
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
@@ -276,14 +350,21 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
* We ignore events for new connections until userspace has set
* their context. This can only happen if an error occurs on a
* new connection before the user accepts it. This is okay,
* since the accept will just fail later.
* since the accept will just fail later. However, we do need
* to release the underlying HW resources in case of a device
* removal event.
*/
if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
ucma_removal_event_handler(cm_id);
kfree(uevent);
goto out;
}
list_add_tail(&uevent->list, &ctx->file->event_list);
wake_up_interruptible(&ctx->file->poll_wait);
if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
ucma_removal_event_handler(cm_id);
out:
mutex_unlock(&ctx->file->mut);
return ret;
@@ -442,9 +523,15 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
}
/*
* We cannot hold file->mut when calling rdma_destroy_id() or we can
* deadlock. We also acquire file->mut in ucma_event_handler(), and
* rdma_destroy_id() will wait until all callbacks have completed.
* ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
* this point, no new events will be reported from the hardware. However, we
* still need to cleanup the UCMA context for this ID. Specifically, there
* might be events that have not yet been consumed by the user space software.
* These might include pending connect requests which we have not completed
* processing. We cannot call rdma_destroy_id while holding the lock of the
* context (file->mut), as it might cause a deadlock. We therefore extract all
* relevant events from the context pending events list while holding the
* mutex. After that we release them as needed.
*/
static int ucma_free_ctx(struct ucma_context *ctx)
{
@@ -452,8 +539,6 @@ static int ucma_free_ctx(struct ucma_context *ctx)
struct ucma_event *uevent, *tmp;
LIST_HEAD(list);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
ucma_cleanup_multicast(ctx);
@@ -501,10 +586,24 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp);
resp.events_reported = ucma_free_ctx(ctx);
mutex_lock(&ctx->file->mut);
ctx->destroying = 1;
mutex_unlock(&ctx->file->mut);
flush_workqueue(ctx->file->close_wq);
/* At this point it's guaranteed that there is no inflight
* closing task */
mutex_lock(&mut);
if (!ctx->closing) {
mutex_unlock(&mut);
ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp);
rdma_destroy_id(ctx->cm_id);
} else {
mutex_unlock(&mut);
}
resp.events_reported = ucma_free_ctx(ctx);
if (copy_to_user((void __user *)(unsigned long)cmd.response,
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -1321,10 +1420,10 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
mc = ERR_PTR(-ENOENT);
else if (mc->ctx->file != file)
mc = ERR_PTR(-EINVAL);
else {
else if (!atomic_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
else
idr_remove(&multicast_idr, mc->id);
atomic_inc(&mc->ctx->ref);
}
mutex_unlock(&mut);
if (IS_ERR(mc)) {
@@ -1529,6 +1628,7 @@ static int ucma_open(struct inode *inode, struct file *filp)
INIT_LIST_HEAD(&file->ctx_list);
init_waitqueue_head(&file->poll_wait);
mutex_init(&file->mut);
file->close_wq = create_singlethread_workqueue("ucma_close_id");
filp->private_data = file;
file->filp = filp;
@@ -1543,16 +1643,34 @@ static int ucma_close(struct inode *inode, struct file *filp)
mutex_lock(&file->mut);
list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
ctx->destroying = 1;
mutex_unlock(&file->mut);
mutex_lock(&mut);
idr_remove(&ctx_idr, ctx->id);
mutex_unlock(&mut);
flush_workqueue(file->close_wq);
/* At that step once ctx was marked as destroying and workqueue
* was flushed we are safe from any inflights handlers that
* might put other closing task.
*/
mutex_lock(&mut);
if (!ctx->closing) {
mutex_unlock(&mut);
/* rdma_destroy_id ensures that no event handlers are
* inflight for that id before releasing it.
*/
rdma_destroy_id(ctx->cm_id);
} else {
mutex_unlock(&mut);
}
ucma_free_ctx(ctx);
mutex_lock(&file->mut);
}
mutex_unlock(&file->mut);
destroy_workqueue(file->close_wq);
kfree(file);
return 0;
}

View File

@@ -133,7 +133,7 @@ static DEFINE_SPINLOCK(port_lock);
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
static void ib_umad_release_dev(struct kobject *kobj)
{
@@ -1322,9 +1322,9 @@ free:
kobject_put(&umad_dev->kobj);
}
static void ib_umad_remove_one(struct ib_device *device)
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
{
struct ib_umad_device *umad_dev = ib_get_client_data(device, &umad_client);
struct ib_umad_device *umad_dev = client_data;
int i;
if (!umad_dev)

View File

@@ -85,15 +85,20 @@
*/
struct ib_uverbs_device {
struct kref ref;
atomic_t refcount;
int num_comp_vectors;
struct completion comp;
struct device *dev;
struct ib_device *ib_dev;
struct ib_device __rcu *ib_dev;
int devnum;
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
struct kobject kobj;
struct srcu_struct disassociate_srcu;
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
struct list_head uverbs_events_file_list;
};
struct ib_uverbs_event_file {
@@ -105,6 +110,7 @@ struct ib_uverbs_event_file {
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
struct list_head list;
};
struct ib_uverbs_file {
@@ -114,6 +120,8 @@ struct ib_uverbs_file {
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_event_file *async_file;
struct list_head list;
int is_closed;
};
struct ib_uverbs_event {
@@ -177,7 +185,9 @@ extern struct idr ib_uverbs_rule_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev,
int is_async);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
@@ -212,6 +222,7 @@ struct ib_uverbs_flow_spec {
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
struct ib_device *ib_dev, \
const char __user *buf, int in_len, \
int out_len)
@@ -253,6 +264,7 @@ IB_UVERBS_DECLARE_CMD(close_xrcd);
#define IB_UVERBS_DECLARE_EX_CMD(name) \
int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
struct ib_device *ib_dev, \
struct ib_udata *ucore, \
struct ib_udata *uhw)

View File

@@ -282,13 +282,13 @@ static void put_xrcd_read(struct ib_uobject *uobj)
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_get_context cmd;
struct ib_uverbs_get_context_resp resp;
struct ib_udata udata;
struct ib_device *ibdev = file->device->ib_dev;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_device_attr dev_attr;
#endif
@@ -313,13 +313,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
ucontext = ibdev->alloc_ucontext(ibdev, &udata);
ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
goto err;
}
ucontext->device = ibdev;
ucontext->device = ib_dev;
INIT_LIST_HEAD(&ucontext->pd_list);
INIT_LIST_HEAD(&ucontext->mr_list);
INIT_LIST_HEAD(&ucontext->mw_list);
@@ -340,7 +340,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext->odp_mrs_count = 0;
INIT_LIST_HEAD(&ucontext->no_private_counters);
ret = ib_query_device(ibdev, &dev_attr);
ret = ib_query_device(ib_dev, &dev_attr);
if (ret)
goto err_free;
if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
@@ -355,7 +355,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_free;
resp.async_fd = ret;
filp = ib_uverbs_alloc_event_file(file, 1);
filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
goto err_fd;
@@ -367,16 +367,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_file;
}
file->async_file = filp->private_data;
INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
ib_uverbs_event_handler);
ret = ib_register_event_handler(&file->event_handler);
if (ret)
goto err_file;
kref_get(&file->async_file->ref);
kref_get(&file->ref);
file->ucontext = ucontext;
fd_install(resp.async_fd, filp);
@@ -386,6 +376,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
return in_len;
err_file:
ib_uverbs_free_async_event_file(file);
fput(filp);
err_fd:
@@ -393,7 +384,7 @@ err_fd:
err_free:
put_pid(ucontext->tgid);
ibdev->dealloc_ucontext(ucontext);
ib_dev->dealloc_ucontext(ucontext);
err:
mutex_unlock(&file->mutex);
@@ -401,11 +392,12 @@ err:
}
static void copy_query_dev_fields(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_uverbs_query_device_resp *resp,
struct ib_device_attr *attr)
{
resp->fw_ver = attr->fw_ver;
resp->node_guid = file->device->ib_dev->node_guid;
resp->node_guid = ib_dev->node_guid;
resp->sys_image_guid = attr->sys_image_guid;
resp->max_mr_size = attr->max_mr_size;
resp->page_size_cap = attr->page_size_cap;
@@ -443,10 +435,11 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
resp->max_srq_sge = attr->max_srq_sge;
resp->max_pkeys = attr->max_pkeys;
resp->local_ca_ack_delay = attr->local_ca_ack_delay;
resp->phys_port_cnt = file->device->ib_dev->phys_port_cnt;
resp->phys_port_cnt = ib_dev->phys_port_cnt;
}
ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
@@ -461,12 +454,12 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
ret = ib_query_device(file->device->ib_dev, &attr);
ret = ib_query_device(ib_dev, &attr);
if (ret)
return ret;
memset(&resp, 0, sizeof resp);
copy_query_dev_fields(file, &resp, &attr);
copy_query_dev_fields(file, ib_dev, &resp, &attr);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -476,6 +469,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
@@ -490,7 +484,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr);
ret = ib_query_port(ib_dev, cmd.port_num, &attr);
if (ret)
return ret;
@@ -515,7 +509,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.active_width = attr.active_width;
resp.active_speed = attr.active_speed;
resp.phys_state = attr.phys_state;
resp.link_layer = rdma_port_get_link_layer(file->device->ib_dev,
resp.link_layer = rdma_port_get_link_layer(ib_dev,
cmd.port_num);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -526,6 +520,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
@@ -553,15 +548,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
down_write(&uobj->mutex);
pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
file->ucontext, &udata);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto err;
}
pd->device = file->device->ib_dev;
pd->device = ib_dev;
pd->uobject = uobj;
pd->local_mr = NULL;
atomic_set(&pd->usecnt, 0);
uobj->object = pd;
@@ -600,11 +595,13 @@ err:
}
ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
struct ib_uverbs_dealloc_pd cmd;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -613,15 +610,20 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
if (!uobj)
return -EINVAL;
pd = uobj->object;
ret = ib_dealloc_pd(uobj->object);
if (!ret)
uobj->live = 0;
put_uobj_write(uobj);
if (atomic_read(&pd->usecnt)) {
ret = -EBUSY;
goto err_put;
}
ret = pd->device->dealloc_pd(uobj->object);
WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
if (ret)
return ret;
goto err_put;
uobj->live = 0;
put_uobj_write(uobj);
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
@@ -632,6 +634,10 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
put_uobj(uobj);
return in_len;
err_put:
put_uobj_write(uobj);
return ret;
}
struct xrcd_table_entry {
@@ -720,6 +726,7 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
}
ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -778,15 +785,14 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
down_write(&obj->uobject.mutex);
if (!xrcd) {
xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
file->ucontext, &udata);
xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
}
xrcd->inode = inode;
xrcd->device = file->device->ib_dev;
xrcd->device = ib_dev;
atomic_set(&xrcd->usecnt, 0);
mutex_init(&xrcd->tgt_qp_mutex);
INIT_LIST_HEAD(&xrcd->tgt_qp_list);
@@ -857,6 +863,7 @@ err_tree_mutex_unlock:
}
ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -934,6 +941,7 @@ void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
}
ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1043,6 +1051,7 @@ err_free:
}
ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1136,6 +1145,7 @@ put_uobjs:
}
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1174,8 +1184,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_alloc_mw cmd;
struct ib_uverbs_alloc_mw_resp resp;
@@ -1256,8 +1267,9 @@ err_free:
}
ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
struct ib_mw *mw;
@@ -1294,6 +1306,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1313,7 +1326,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
return ret;
resp.fd = ret;
filp = ib_uverbs_alloc_event_file(file, 0);
filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
if (IS_ERR(filp)) {
put_unused_fd(resp.fd);
return PTR_ERR(filp);
@@ -1331,6 +1344,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
}
static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw,
struct ib_uverbs_ex_create_cq *cmd,
@@ -1379,14 +1393,14 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
cq = file->device->ib_dev->create_cq(file->device->ib_dev, &attr,
cq = ib_dev->create_cq(ib_dev, &attr,
file->ucontext, uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
}
cq->device = file->device->ib_dev;
cq->device = ib_dev;
cq->uobject = &obj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
@@ -1447,6 +1461,7 @@ static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1475,7 +1490,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
obj = create_cq(file, &ucore, &uhw, &cmd_ex,
obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
offsetof(typeof(cmd_ex), comp_channel) +
sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
NULL);
@@ -1498,6 +1513,7 @@ static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
}
int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -1523,7 +1539,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
sizeof(resp.response_length)))
return -ENOSPC;
obj = create_cq(file, ucore, uhw, &cmd,
obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
min(ucore->inlen, sizeof(cmd)),
ib_uverbs_ex_create_cq_cb, NULL);
@@ -1534,6 +1550,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1597,6 +1614,7 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
}
ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1648,6 +1666,7 @@ out_put:
}
ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1670,6 +1689,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1722,6 +1742,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -1917,6 +1938,7 @@ err_put:
}
ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_open_qp cmd;
@@ -2011,6 +2033,7 @@ err_put:
}
ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2125,6 +2148,7 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
}
ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2221,6 +2245,7 @@ out:
}
ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2279,6 +2304,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2346,6 +2372,12 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
next->send_flags = user_wr->send_flags;
if (is_ud) {
if (next->opcode != IB_WR_SEND &&
next->opcode != IB_WR_SEND_WITH_IMM) {
ret = -EINVAL;
goto out_put;
}
next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
file->ucontext);
if (!next->wr.ud.ah) {
@@ -2385,9 +2417,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
user_wr->wr.atomic.compare_add;
next->wr.atomic.swap = user_wr->wr.atomic.swap;
next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
case IB_WR_SEND:
break;
default:
break;
ret = -EINVAL;
goto out_put;
}
}
@@ -2523,6 +2557,7 @@ err:
}
ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2572,6 +2607,7 @@ out:
}
ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2621,6 +2657,7 @@ out:
}
ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2713,6 +2750,7 @@ err:
}
ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
@@ -2749,6 +2787,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2796,6 +2835,7 @@ out_put:
}
ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -2876,6 +2916,7 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -3036,6 +3077,7 @@ err_free_attr:
}
int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
@@ -3078,6 +3120,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
}
static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
@@ -3211,6 +3254,7 @@ err:
}
ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -3238,7 +3282,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
ret = __uverbs_create_xsrq(file, &xcmd, &udata);
ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
if (ret)
return ret;
@@ -3246,6 +3290,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_create_xsrq cmd;
@@ -3263,7 +3308,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
ret = __uverbs_create_xsrq(file, &cmd, &udata);
ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
if (ret)
return ret;
@@ -3271,6 +3316,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -3301,6 +3347,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf,
int in_len, int out_len)
{
@@ -3341,6 +3388,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
}
ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len)
{
@@ -3398,16 +3446,15 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
}
int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_query_device_resp resp;
struct ib_uverbs_ex_query_device cmd;
struct ib_device_attr attr;
struct ib_device *device;
int err;
device = file->device->ib_dev;
if (ucore->inlen < sizeof(cmd))
return -EINVAL;
@@ -3428,11 +3475,11 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
memset(&attr, 0, sizeof(attr));
err = device->query_device(device, &attr, uhw);
err = ib_dev->query_device(ib_dev, &attr, uhw);
if (err)
return err;
copy_query_dev_fields(file, &resp.base, &attr);
copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
resp.comp_mask = 0;
if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))

View File

@@ -79,6 +79,7 @@ static DEFINE_SPINLOCK(map_lock);
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
int out_len) = {
[IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
@@ -119,6 +120,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
};
static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
struct ib_udata *uhw) = {
[IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
@@ -128,16 +130,21 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
};
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
static void ib_uverbs_release_dev(struct kref *ref)
static void ib_uverbs_release_dev(struct kobject *kobj)
{
struct ib_uverbs_device *dev =
container_of(ref, struct ib_uverbs_device, ref);
container_of(kobj, struct ib_uverbs_device, kobj);
complete(&dev->comp);
cleanup_srcu_struct(&dev->disassociate_srcu);
kfree(dev);
}
static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
static void ib_uverbs_release_event_file(struct kref *ref)
{
struct ib_uverbs_event_file *file =
@@ -201,9 +208,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
{
struct ib_uobject *uobj, *tmp;
if (!context)
return 0;
context->closing = 1;
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
@@ -303,13 +307,27 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
return context->device->dealloc_ucontext(context);
}
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
complete(&dev->comp);
}
static void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
struct ib_device *ib_dev;
int srcu_key;
module_put(file->device->ib_dev->owner);
kref_put(&file->device->ref, ib_uverbs_release_dev);
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
if (ib_dev && !ib_dev->disassociate_ucontext)
module_put(ib_dev->owner);
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
if (atomic_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
kfree(file);
}
@@ -331,9 +349,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return -EAGAIN;
if (wait_event_interruptible(file->poll_wait,
!list_empty(&file->event_list)))
(!list_empty(&file->event_list) ||
/* The barriers built into wait_event_interruptible()
* and wake_up() guarentee this will see the null set
* without using RCU
*/
!file->uverbs_file->device->ib_dev)))
return -ERESTARTSYS;
/* If device was disassociated and no event exists set an error */
if (list_empty(&file->event_list) &&
!file->uverbs_file->device->ib_dev)
return -EIO;
spin_lock_irq(&file->lock);
}
@@ -396,8 +424,11 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *entry, *tmp;
int closed_already = 0;
mutex_lock(&file->uverbs_file->device->lists_mutex);
spin_lock_irq(&file->lock);
closed_already = file->is_closed;
file->is_closed = 1;
list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
if (entry->counter)
@@ -405,11 +436,15 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
kfree(entry);
}
spin_unlock_irq(&file->lock);
if (file->is_async) {
ib_unregister_event_handler(&file->uverbs_file->event_handler);
kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
if (!closed_already) {
list_del(&file->list);
if (file->is_async)
ib_unregister_event_handler(&file->uverbs_file->
event_handler);
}
mutex_unlock(&file->uverbs_file->device->lists_mutex);
kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
kref_put(&file->ref, ib_uverbs_release_event_file);
return 0;
@@ -541,13 +576,21 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
NULL, NULL);
}
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
file->async_file = NULL;
}
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev,
int is_async)
{
struct ib_uverbs_event_file *ev_file;
struct file *filp;
int ret;
ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
if (!ev_file)
return ERR_PTR(-ENOMEM);
@@ -556,15 +599,46 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
INIT_LIST_HEAD(&ev_file->event_list);
init_waitqueue_head(&ev_file->poll_wait);
ev_file->uverbs_file = uverbs_file;
kref_get(&ev_file->uverbs_file->ref);
ev_file->async_queue = NULL;
ev_file->is_async = is_async;
ev_file->is_closed = 0;
filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
ev_file, O_RDONLY);
if (IS_ERR(filp))
kfree(ev_file);
goto err_put_refs;
mutex_lock(&uverbs_file->device->lists_mutex);
list_add_tail(&ev_file->list,
&uverbs_file->device->uverbs_events_file_list);
mutex_unlock(&uverbs_file->device->lists_mutex);
if (is_async) {
WARN_ON(uverbs_file->async_file);
uverbs_file->async_file = ev_file;
kref_get(&uverbs_file->async_file->ref);
INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
ib_dev,
ib_uverbs_event_handler);
ret = ib_register_event_handler(&uverbs_file->event_handler);
if (ret)
goto err_put_file;
/* At that point async file stuff was fully set */
ev_file->is_async = 1;
}
return filp;
err_put_file:
fput(filp);
kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
uverbs_file->async_file = NULL;
return ERR_PTR(ret);
err_put_refs:
kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
kref_put(&ev_file->ref, ib_uverbs_release_event_file);
return filp;
}
@@ -601,8 +675,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_device *ib_dev;
struct ib_uverbs_cmd_hdr hdr;
__u32 flags;
int srcu_key;
ssize_t ret;
if (count < sizeof hdr)
return -EINVAL;
@@ -610,6 +687,14 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
if (!ib_dev) {
ret = -EIO;
goto out;
}
flags = (hdr.command &
IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
@@ -617,26 +702,36 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
__u32 command;
if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
IB_USER_VERBS_CMD_COMMAND_MASK))
return -EINVAL;
IB_USER_VERBS_CMD_COMMAND_MASK)) {
ret = -EINVAL;
goto out;
}
command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
!uverbs_cmd_table[command])
return -EINVAL;
!uverbs_cmd_table[command]) {
ret = -EINVAL;
goto out;
}
if (!file->ucontext &&
command != IB_USER_VERBS_CMD_GET_CONTEXT)
return -EINVAL;
command != IB_USER_VERBS_CMD_GET_CONTEXT) {
ret = -EINVAL;
goto out;
}
if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
return -ENOSYS;
if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) {
ret = -ENOSYS;
goto out;
}
if (hdr.in_words * 4 != count)
return -EINVAL;
if (hdr.in_words * 4 != count) {
ret = -EINVAL;
goto out;
}
return uverbs_cmd_table[command](file,
ret = uverbs_cmd_table[command](file, ib_dev,
buf + sizeof(hdr),
hdr.in_words * 4,
hdr.out_words * 4);
@@ -647,51 +742,72 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_udata ucore;
struct ib_udata uhw;
int err;
size_t written_count = count;
if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
IB_USER_VERBS_CMD_COMMAND_MASK))
return -EINVAL;
IB_USER_VERBS_CMD_COMMAND_MASK)) {
ret = -EINVAL;
goto out;
}
command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
!uverbs_ex_cmd_table[command])
return -ENOSYS;
!uverbs_ex_cmd_table[command]) {
ret = -ENOSYS;
goto out;
}
if (!file->ucontext)
return -EINVAL;
if (!file->ucontext) {
ret = -EINVAL;
goto out;
}
if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
return -ENOSYS;
if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) {
ret = -ENOSYS;
goto out;
}
if (count < (sizeof(hdr) + sizeof(ex_hdr)))
return -EINVAL;
if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
ret = -EINVAL;
goto out;
}
if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
return -EFAULT;
if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
ret = -EFAULT;
goto out;
}
count -= sizeof(hdr) + sizeof(ex_hdr);
buf += sizeof(hdr) + sizeof(ex_hdr);
if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
return -EINVAL;
if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
ret = -EINVAL;
goto out;
}
if (ex_hdr.cmd_hdr_reserved)
return -EINVAL;
if (ex_hdr.cmd_hdr_reserved) {
ret = -EINVAL;
goto out;
}
if (ex_hdr.response) {
if (!hdr.out_words && !ex_hdr.provider_out_words)
return -EINVAL;
if (!hdr.out_words && !ex_hdr.provider_out_words) {
ret = -EINVAL;
goto out;
}
if (!access_ok(VERIFY_WRITE,
(void __user *) (unsigned long) ex_hdr.response,
(hdr.out_words + ex_hdr.provider_out_words) * 8))
return -EFAULT;
(hdr.out_words + ex_hdr.provider_out_words) * 8)) {
ret = -EFAULT;
goto out;
}
} else {
if (hdr.out_words || ex_hdr.provider_out_words)
return -EINVAL;
if (hdr.out_words || ex_hdr.provider_out_words) {
ret = -EINVAL;
goto out;
}
}
INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
@@ -703,27 +819,43 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
ex_hdr.provider_in_words * 8,
ex_hdr.provider_out_words * 8);
err = uverbs_ex_cmd_table[command](file,
ret = uverbs_ex_cmd_table[command](file,
ib_dev,
&ucore,
&uhw);
if (err)
return err;
return written_count;
if (!ret)
ret = written_count;
} else {
ret = -ENOSYS;
}
return -ENOSYS;
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_device *ib_dev;
int ret = 0;
int srcu_key;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
if (!ib_dev) {
ret = -EIO;
goto out;
}
if (!file->ucontext)
return -ENODEV;
ret = -ENODEV;
else
return file->device->ib_dev->mmap(file->ucontext, vma);
ret = ib_dev->mmap(file->ucontext, vma);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
}
/*
@@ -740,23 +872,43 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
struct ib_uverbs_device *dev;
struct ib_uverbs_file *file;
struct ib_device *ib_dev;
int ret;
int module_dependent;
int srcu_key;
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
if (dev)
kref_get(&dev->ref);
else
if (!atomic_inc_not_zero(&dev->refcount))
return -ENXIO;
if (!try_module_get(dev->ib_dev->owner)) {
ret = -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
mutex_lock(&dev->lists_mutex);
ib_dev = srcu_dereference(dev->ib_dev,
&dev->disassociate_srcu);
if (!ib_dev) {
ret = -EIO;
goto err;
}
file = kmalloc(sizeof *file, GFP_KERNEL);
/* In case IB device supports disassociate ucontext, there is no hard
* dependency between uverbs device and its low level device.
*/
module_dependent = !(ib_dev->disassociate_ucontext);
if (module_dependent) {
if (!try_module_get(ib_dev->owner)) {
ret = -ENODEV;
goto err;
}
}
file = kzalloc(sizeof(*file), GFP_KERNEL);
if (!file) {
ret = -ENOMEM;
goto err_module;
if (module_dependent)
goto err_module;
goto err;
}
file->device = dev;
@@ -766,27 +918,47 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_init(&file->mutex);
filp->private_data = file;
kobject_get(&dev->kobj);
list_add_tail(&file->list, &dev->uverbs_file_list);
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return nonseekable_open(inode, filp);
err_module:
module_put(dev->ib_dev->owner);
module_put(ib_dev->owner);
err:
kref_put(&dev->ref, ib_uverbs_release_dev);
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
if (atomic_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev);
return ret;
}
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_device *dev = file->device;
struct ib_ucontext *ucontext = NULL;
ib_uverbs_cleanup_ucontext(file, file->ucontext);
mutex_lock(&file->device->lists_mutex);
ucontext = file->ucontext;
file->ucontext = NULL;
if (!file->is_closed) {
list_del(&file->list);
file->is_closed = 1;
}
mutex_unlock(&file->device->lists_mutex);
if (ucontext)
ib_uverbs_cleanup_ucontext(file, ucontext);
if (file->async_file)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
kobject_put(&dev->kobj);
return 0;
}
@@ -817,12 +989,21 @@ static struct ib_client uverbs_client = {
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
char *buf)
{
int ret = -ENODEV;
int srcu_key;
struct ib_uverbs_device *dev = dev_get_drvdata(device);
struct ib_device *ib_dev;
if (!dev)
return -ENODEV;
return sprintf(buf, "%s\n", dev->ib_dev->name);
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
ret = sprintf(buf, "%s\n", ib_dev->name);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -830,11 +1011,19 @@ static ssize_t show_dev_abi_version(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_uverbs_device *dev = dev_get_drvdata(device);
int ret = -ENODEV;
int srcu_key;
struct ib_device *ib_dev;
if (!dev)
return -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
return ret;
}
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
@@ -874,6 +1063,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
int devnum;
dev_t base;
struct ib_uverbs_device *uverbs_dev;
int ret;
if (!device->alloc_ucontext)
return;
@@ -882,10 +1072,20 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (!uverbs_dev)
return;
kref_init(&uverbs_dev->ref);
ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
if (ret) {
kfree(uverbs_dev);
return;
}
atomic_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
mutex_init(&uverbs_dev->lists_mutex);
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -906,12 +1106,13 @@ static void ib_uverbs_add_one(struct ib_device *device)
}
spin_unlock(&map_lock);
uverbs_dev->ib_dev = device;
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
@@ -942,15 +1143,79 @@ err_cdev:
clear_bit(devnum, overflow_map);
err:
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
if (atomic_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev);
kobject_put(&uverbs_dev->kobj);
return;
}
static void ib_uverbs_remove_one(struct ib_device *device)
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
struct ib_uverbs_file *file;
struct ib_uverbs_event_file *event_file;
struct ib_event event;
/* Pending running commands to terminate */
synchronize_srcu(&uverbs_dev->disassociate_srcu);
event.event = IB_EVENT_DEVICE_FATAL;
event.element.port_num = 0;
event.device = ib_dev;
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
struct ib_ucontext *ucontext;
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
file->is_closed = 1;
ucontext = file->ucontext;
list_del(&file->list);
file->ucontext = NULL;
kref_get(&file->ref);
mutex_unlock(&uverbs_dev->lists_mutex);
/* We must release the mutex before going ahead and calling
* disassociate_ucontext. disassociate_ucontext might end up
* indirectly calling uverbs_close, for example due to freeing
* the resources (e.g mmput).
*/
ib_uverbs_event_handler(&file->event_handler, &event);
if (ucontext) {
ib_dev->disassociate_ucontext(ucontext);
ib_uverbs_cleanup_ucontext(file, ucontext);
}
mutex_lock(&uverbs_dev->lists_mutex);
kref_put(&file->ref, ib_uverbs_release_file);
}
while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
event_file = list_first_entry(&uverbs_dev->
uverbs_events_file_list,
struct ib_uverbs_event_file,
list);
spin_lock_irq(&event_file->lock);
event_file->is_closed = 1;
spin_unlock_irq(&event_file->lock);
list_del(&event_file->list);
if (event_file->is_async) {
ib_unregister_event_handler(&event_file->uverbs_file->
event_handler);
event_file->uverbs_file->event_handler.device = NULL;
}
wake_up_interruptible(&event_file->poll_wait);
kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
}
mutex_unlock(&uverbs_dev->lists_mutex);
}
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
{
struct ib_uverbs_device *uverbs_dev = client_data;
int wait_clients = 1;
if (!uverbs_dev)
return;
@@ -964,9 +1229,28 @@ static void ib_uverbs_remove_one(struct ib_device *device)
else
clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev);
if (device->disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
* Userspace will see a EIO errno for all future access.
* Upon returning, ib_device may be freed internally and is not
* valid any more.
* uverbs_device is still available until all clients close
* their files, then the uverbs device ref count will be zero
* and its resources will be freed.
* Note: At this point no more files can be opened since the
* cdev was deleted, however active clients can still issue
* commands and close their open files.
*/
rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
ib_uverbs_free_hw_resources(uverbs_dev, device);
wait_clients = 0;
}
if (atomic_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
kobject_put(&uverbs_dev->kobj);
}
static char *uverbs_devnode(struct device *dev, umode_t *mode)

View File

@@ -213,28 +213,79 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
/* Protection domains */
/**
* ib_alloc_pd - Allocates an unused protection domain.
* @device: The device on which to allocate the protection domain.
*
* A protection domain object provides an association between QPs, shared
* receive queues, address handles, memory regions, and memory windows.
*
* Every PD has a local_dma_lkey which can be used as the lkey value for local
* memory operations.
*/
struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
struct ib_pd *pd;
struct ib_device_attr devattr;
int rc;
rc = ib_query_device(device, &devattr);
if (rc)
return ERR_PTR(rc);
pd = device->alloc_pd(device, NULL, NULL);
if (IS_ERR(pd))
return pd;
if (!IS_ERR(pd)) {
pd->device = device;
pd->uobject = NULL;
atomic_set(&pd->usecnt, 0);
pd->device = device;
pd->uobject = NULL;
pd->local_mr = NULL;
atomic_set(&pd->usecnt, 0);
if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else {
struct ib_mr *mr;
mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(mr)) {
ib_dealloc_pd(pd);
return (struct ib_pd *)mr;
}
pd->local_mr = mr;
pd->local_dma_lkey = pd->local_mr->lkey;
}
return pd;
}
EXPORT_SYMBOL(ib_alloc_pd);
int ib_dealloc_pd(struct ib_pd *pd)
/**
* ib_dealloc_pd - Deallocates a protection domain.
* @pd: The protection domain to deallocate.
*
* It is an error to call this function while any resources in the pd still
* exist. The caller is responsible to synchronously destroy them and
* guarantee no new allocations will happen.
*/
void ib_dealloc_pd(struct ib_pd *pd)
{
if (atomic_read(&pd->usecnt))
return -EBUSY;
int ret;
return pd->device->dealloc_pd(pd);
if (pd->local_mr) {
ret = ib_dereg_mr(pd->local_mr);
WARN_ON(ret);
pd->local_mr = NULL;
}
/* uverbs manipulates usecnt with proper locking, while the kabi
requires the caller to guarantee we can't race here. */
WARN_ON(atomic_read(&pd->usecnt));
/* Making delalloc_pd a void return is a WIP, no driver should return
an error here. */
ret = pd->device->dealloc_pd(pd);
WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
}
EXPORT_SYMBOL(ib_dealloc_pd);
@@ -1168,16 +1219,28 @@ int ib_dereg_mr(struct ib_mr *mr)
}
EXPORT_SYMBOL(ib_dereg_mr);
struct ib_mr *ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr)
/**
* ib_alloc_mr() - Allocates a memory region
* @pd: protection domain associated with the region
* @mr_type: memory region type
* @max_num_sg: maximum sg entries available for registration.
*
* Notes:
* Memory registeration page/sg lists must not exceed max_num_sg.
* For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed
* max_num_sg * used_page_size.
*
*/
struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct ib_mr *mr;
if (!pd->device->create_mr)
if (!pd->device->alloc_mr)
return ERR_PTR(-ENOSYS);
mr = pd->device->create_mr(pd, mr_init_attr);
mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
@@ -1188,45 +1251,7 @@ struct ib_mr *ib_create_mr(struct ib_pd *pd,
return mr;
}
EXPORT_SYMBOL(ib_create_mr);
int ib_destroy_mr(struct ib_mr *mr)
{
struct ib_pd *pd;
int ret;
if (atomic_read(&mr->usecnt))
return -EBUSY;
pd = mr->pd;
ret = mr->device->destroy_mr(mr);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_destroy_mr);
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct ib_mr *mr;
if (!pd->device->alloc_fast_reg_mr)
return ERR_PTR(-ENOSYS);
mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
EXPORT_SYMBOL(ib_alloc_mr);
struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
int max_page_list_len)

View File

@@ -1,8 +1,6 @@
obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
obj-$(CONFIG_INFINIBAND_QIB) += qib/
obj-$(CONFIG_INFINIBAND_EHCA) += ehca/
obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/
obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/

View File

@@ -1,6 +0,0 @@
ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG
obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o

View File

@@ -1,15 +0,0 @@
config INFINIBAND_AMSO1100
tristate "Ammasso 1100 HCA support"
depends on PCI && INET
---help---
This is a low-level driver for the Ammasso 1100 host
channel adapter (HCA).
config INFINIBAND_AMSO1100_DEBUG
bool "Verbose debugging output"
depends on INFINIBAND_AMSO1100
default n
---help---
This option causes the amso1100 driver to produce a bunch of
debug messages. Select this if you are developing the driver
or trying to diagnose a problem.

File diff suppressed because it is too large Load Diff

View File

@@ -1,547 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __C2_H
#define __C2_H
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/idr.h>
#include "c2_provider.h"
#include "c2_mq.h"
#include "c2_status.h"
#define DRV_NAME "c2"
#define DRV_VERSION "1.1"
#define PFX DRV_NAME ": "
#define BAR_0 0
#define BAR_2 2
#define BAR_4 4
#define RX_BUF_SIZE (1536 + 8)
#define ETH_JUMBO_MTU 9000
#define C2_MAGIC "CEPHEUS"
#define C2_VERSION 4
#define C2_IVN (18 & 0x7fffffff)
#define C2_REG0_SIZE (16 * 1024)
#define C2_REG2_SIZE (2 * 1024 * 1024)
#define C2_REG4_SIZE (256 * 1024 * 1024)
#define C2_NUM_TX_DESC 341
#define C2_NUM_RX_DESC 256
#define C2_PCI_REGS_OFFSET (0x10000)
#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
#define C2_RXP_HRXDQ_SIZE (4096)
#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
#define C2_TXP_HTXDQ_SIZE (4096)
#define C2_TX_TIMEOUT (6*HZ)
/* CEPHEUS */
static const u8 c2_magic[] = {
0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
};
enum adapter_pci_regs {
C2_REGS_MAGIC = 0x0000,
C2_REGS_VERS = 0x0008,
C2_REGS_IVN = 0x000C,
C2_REGS_PCI_WINSIZE = 0x0010,
C2_REGS_Q0_QSIZE = 0x0014,
C2_REGS_Q0_MSGSIZE = 0x0018,
C2_REGS_Q0_POOLSTART = 0x001C,
C2_REGS_Q0_SHARED = 0x0020,
C2_REGS_Q1_QSIZE = 0x0024,
C2_REGS_Q1_MSGSIZE = 0x0028,
C2_REGS_Q1_SHARED = 0x0030,
C2_REGS_Q2_QSIZE = 0x0034,
C2_REGS_Q2_MSGSIZE = 0x0038,
C2_REGS_Q2_SHARED = 0x0040,
C2_REGS_ENADDR = 0x004C,
C2_REGS_RDMA_ENADDR = 0x0054,
C2_REGS_HRX_CUR = 0x006C,
};
struct c2_adapter_pci_regs {
char reg_magic[8];
u32 version;
u32 ivn;
u32 pci_window_size;
u32 q0_q_size;
u32 q0_msg_size;
u32 q0_pool_start;
u32 q0_shared;
u32 q1_q_size;
u32 q1_msg_size;
u32 q1_pool_start;
u32 q1_shared;
u32 q2_q_size;
u32 q2_msg_size;
u32 q2_pool_start;
u32 q2_shared;
u32 log_start;
u32 log_size;
u8 host_enaddr[8];
u8 rdma_enaddr[8];
u32 crash_entry;
u32 crash_ready[2];
u32 fw_txd_cur;
u32 fw_hrxd_cur;
u32 fw_rxd_cur;
};
enum pci_regs {
C2_HISR = 0x0000,
C2_DISR = 0x0004,
C2_HIMR = 0x0008,
C2_DIMR = 0x000C,
C2_NISR0 = 0x0010,
C2_NISR1 = 0x0014,
C2_NIMR0 = 0x0018,
C2_NIMR1 = 0x001C,
C2_IDIS = 0x0020,
};
enum {
C2_PCI_HRX_INT = 1 << 8,
C2_PCI_HTX_INT = 1 << 17,
C2_PCI_HRX_QUI = 1 << 31,
};
/*
* Cepheus registers in BAR0.
*/
struct c2_pci_regs {
u32 hostisr;
u32 dmaisr;
u32 hostimr;
u32 dmaimr;
u32 netisr0;
u32 netisr1;
u32 netimr0;
u32 netimr1;
u32 int_disable;
};
/* TXP flags */
enum c2_txp_flags {
TXP_HTXD_DONE = 0,
TXP_HTXD_READY = 1 << 0,
TXP_HTXD_UNINIT = 1 << 1,
};
/* RXP flags */
enum c2_rxp_flags {
RXP_HRXD_UNINIT = 0,
RXP_HRXD_READY = 1 << 0,
RXP_HRXD_DONE = 1 << 1,
};
/* RXP status */
enum c2_rxp_status {
RXP_HRXD_ZERO = 0,
RXP_HRXD_OK = 1 << 0,
RXP_HRXD_BUF_OV = 1 << 1,
};
/* TXP descriptor fields */
enum txp_desc {
C2_TXP_FLAGS = 0x0000,
C2_TXP_LEN = 0x0002,
C2_TXP_ADDR = 0x0004,
};
/* RXP descriptor fields */
enum rxp_desc {
C2_RXP_FLAGS = 0x0000,
C2_RXP_STATUS = 0x0002,
C2_RXP_COUNT = 0x0004,
C2_RXP_LEN = 0x0006,
C2_RXP_ADDR = 0x0008,
};
struct c2_txp_desc {
u16 flags;
u16 len;
u64 addr;
} __attribute__ ((packed));
struct c2_rxp_desc {
u16 flags;
u16 status;
u16 count;
u16 len;
u64 addr;
} __attribute__ ((packed));
struct c2_rxp_hdr {
u16 flags;
u16 status;
u16 len;
u16 rsvd;
} __attribute__ ((packed));
struct c2_tx_desc {
u32 len;
u32 status;
dma_addr_t next_offset;
};
struct c2_rx_desc {
u32 len;
u32 status;
dma_addr_t next_offset;
};
struct c2_alloc {
u32 last;
u32 max;
spinlock_t lock;
unsigned long *table;
};
struct c2_array {
struct {
void **page;
int used;
} *page_list;
};
/*
* The MQ shared pointer pool is organized as a linked list of
* chunks. Each chunk contains a linked list of free shared pointers
* that can be allocated to a given user mode client.
*
*/
struct sp_chunk {
struct sp_chunk *next;
dma_addr_t dma_addr;
DEFINE_DMA_UNMAP_ADDR(mapping);
u16 head;
u16 shared_ptr[0];
};
struct c2_pd_table {
u32 last;
u32 max;
spinlock_t lock;
unsigned long *table;
};
struct c2_qp_table {
struct idr idr;
spinlock_t lock;
};
struct c2_element {
struct c2_element *next;
void *ht_desc; /* host descriptor */
void __iomem *hw_desc; /* hardware descriptor */
struct sk_buff *skb;
dma_addr_t mapaddr;
u32 maplen;
};
struct c2_ring {
struct c2_element *to_clean;
struct c2_element *to_use;
struct c2_element *start;
unsigned long count;
};
struct c2_dev {
struct ib_device ibdev;
void __iomem *regs;
void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
void __iomem *mmio_rxp_ring;
spinlock_t lock;
struct pci_dev *pcidev;
struct net_device *netdev;
struct net_device *pseudo_netdev;
unsigned int cur_tx;
unsigned int cur_rx;
u32 adapter_handle;
int device_cap_flags;
void __iomem *kva; /* KVA device memory */
unsigned long pa; /* PA device memory */
void **qptr_array;
struct kmem_cache *host_msg_cache;
struct list_head cca_link; /* adapter list */
struct list_head eh_wakeup_list; /* event wakeup list */
wait_queue_head_t req_vq_wo;
/* Cached RNIC properties */
struct ib_device_attr props;
struct c2_pd_table pd_table;
struct c2_qp_table qp_table;
int ports; /* num of GigE ports */
int devnum;
spinlock_t vqlock; /* sync vbs req MQ */
/* Verbs Queues */
struct c2_mq req_vq; /* Verbs Request MQ */
struct c2_mq rep_vq; /* Verbs Reply MQ */
struct c2_mq aeq; /* Async Events MQ */
/* Kernel client MQs */
struct sp_chunk *kern_mqsp_pool;
/* Device updates these values when posting messages to a host
* target queue */
u16 req_vq_shared;
u16 rep_vq_shared;
u16 aeq_shared;
u16 irq_claimed;
/*
* Shared host target pages for user-accessible MQs.
*/
int hthead; /* index of first free entry */
void *htpages; /* kernel vaddr */
int htlen; /* length of htpages memory */
void *htuva; /* user mapped vaddr */
spinlock_t htlock; /* serialize allocation */
u64 adapter_hint_uva; /* access to the activity FIFO */
// spinlock_t aeq_lock;
// spinlock_t rnic_lock;
__be16 *hint_count;
dma_addr_t hint_count_dma;
u16 hints_read;
int init; /* TRUE if it's ready */
char ae_cache_name[16];
char vq_cache_name[16];
};
struct c2_port {
u32 msg_enable;
struct c2_dev *c2dev;
struct net_device *netdev;
spinlock_t tx_lock;
u32 tx_avail;
struct c2_ring tx_ring;
struct c2_ring rx_ring;
void *mem; /* PCI memory for host rings */
dma_addr_t dma;
unsigned long mem_size;
u32 rx_buf_size;
};
/*
* Activity FIFO registers in BAR0.
*/
#define PCI_BAR0_HOST_HINT 0x100
#define PCI_BAR0_ADAPTER_HINT 0x2000
/*
* Ammasso PCI vendor id and Cepheus PCI device id.
*/
#define CQ_ARMED 0x01
#define CQ_WAIT_FOR_DMA 0x80
/*
* The format of a hint is as follows:
* Lower 16 bits are the count of hints for the queue.
* Next 15 bits are the qp_index
* Upper most bit depends on who reads it:
* If read by producer, then it means Full (1) or Not-Full (0)
* If read by consumer, then it means Empty (1) or Not-Empty (0)
*/
#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
/*
* The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
* struct.
*/
#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
#ifndef readq
static inline u64 readq(const void __iomem * addr)
{
u64 ret = readl(addr + 4);
ret <<= 32;
ret |= readl(addr);
return ret;
}
#endif
#ifndef writeq
static inline void __raw_writeq(u64 val, void __iomem * addr)
{
__raw_writel((u32) (val), addr);
__raw_writel((u32) (val >> 32), (addr + 4));
}
#endif
#define C2_SET_CUR_RX(c2dev, cur_rx) \
__raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
#define C2_GET_CUR_RX(c2dev) \
be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092))
static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
{
return container_of(ibdev, struct c2_dev, ibdev);
}
static inline int c2_errno(void *reply)
{
switch (c2_wr_get_result(reply)) {
case C2_OK:
return 0;
case CCERR_NO_BUFS:
case CCERR_INSUFFICIENT_RESOURCES:
case CCERR_ZERO_RDMA_READ_RESOURCES:
return -ENOMEM;
case CCERR_MR_IN_USE:
case CCERR_QP_IN_USE:
return -EBUSY;
case CCERR_ADDR_IN_USE:
return -EADDRINUSE;
case CCERR_ADDR_NOT_AVAIL:
return -EADDRNOTAVAIL;
case CCERR_CONN_RESET:
return -ECONNRESET;
case CCERR_NOT_IMPLEMENTED:
case CCERR_INVALID_WQE:
return -ENOSYS;
case CCERR_QP_NOT_PRIVILEGED:
return -EPERM;
case CCERR_STACK_ERROR:
return -EPROTO;
case CCERR_ACCESS_VIOLATION:
case CCERR_BASE_AND_BOUNDS_VIOLATION:
return -EFAULT;
case CCERR_STAG_STATE_NOT_INVALID:
case CCERR_INVALID_ADDRESS:
case CCERR_INVALID_CQ:
case CCERR_INVALID_EP:
case CCERR_INVALID_MODIFIER:
case CCERR_INVALID_MTU:
case CCERR_INVALID_PD_ID:
case CCERR_INVALID_QP:
case CCERR_INVALID_RNIC:
case CCERR_INVALID_STAG:
return -EINVAL;
default:
return -EAGAIN;
}
}
/* Device */
extern int c2_register_device(struct c2_dev *c2dev);
extern void c2_unregister_device(struct c2_dev *c2dev);
extern int c2_rnic_init(struct c2_dev *c2dev);
extern void c2_rnic_term(struct c2_dev *c2dev);
extern void c2_rnic_interrupt(struct c2_dev *c2dev);
extern int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
extern int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
/* QPs */
extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
struct ib_qp_attr *attr, int attr_mask);
extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
int ord, int ird);
extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
struct ib_send_wr **bad_wr);
extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
struct ib_recv_wr **bad_wr);
extern void c2_init_qp_table(struct c2_dev *c2dev);
extern void c2_cleanup_qp_table(struct c2_dev *c2dev);
extern void c2_set_qp_state(struct c2_qp *, int);
extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
/* PDs */
extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
extern int c2_init_pd_table(struct c2_dev *c2dev);
extern void c2_cleanup_pd_table(struct c2_dev *c2dev);
/* CQs */
extern int c2_init_cq(struct c2_dev *c2dev, int entries,
struct c2_ucontext *ctx, struct c2_cq *cq);
extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
/* CM */
extern int c2_llp_connect(struct iw_cm_id *cm_id,
struct iw_cm_conn_param *iw_param);
extern int c2_llp_accept(struct iw_cm_id *cm_id,
struct iw_cm_conn_param *iw_param);
extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
u8 pdata_len);
extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
/* MM */
extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
int page_size, int pbl_depth, u32 length,
u32 off, u64 *va, enum c2_acf acf,
struct c2_mr *mr);
extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
/* AE */
extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
/* MQSP Allocator */
extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
struct sp_chunk **root);
extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
extern __be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
dma_addr_t *dma_addr, gfp_t gfp_mask);
extern void c2_free_mqsp(__be16* mqsp);
#endif

View File

@@ -1,327 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "c2.h"
#include <rdma/iw_cm.h>
#include "c2_status.h"
#include "c2_ae.h"
static int c2_convert_cm_status(u32 c2_status)
{
switch (c2_status) {
case C2_CONN_STATUS_SUCCESS:
return 0;
case C2_CONN_STATUS_REJECTED:
return -ENETRESET;
case C2_CONN_STATUS_REFUSED:
return -ECONNREFUSED;
case C2_CONN_STATUS_TIMEDOUT:
return -ETIMEDOUT;
case C2_CONN_STATUS_NETUNREACH:
return -ENETUNREACH;
case C2_CONN_STATUS_HOSTUNREACH:
return -EHOSTUNREACH;
case C2_CONN_STATUS_INVALID_RNIC:
return -EINVAL;
case C2_CONN_STATUS_INVALID_QP:
return -EINVAL;
case C2_CONN_STATUS_INVALID_QP_STATE:
return -EINVAL;
case C2_CONN_STATUS_ADDR_NOT_AVAIL:
return -EADDRNOTAVAIL;
default:
printk(KERN_ERR PFX
"%s - Unable to convert CM status: %d\n",
__func__, c2_status);
return -EIO;
}
}
static const char* to_event_str(int event)
{
static const char* event_str[] = {
"CCAE_REMOTE_SHUTDOWN",
"CCAE_ACTIVE_CONNECT_RESULTS",
"CCAE_CONNECTION_REQUEST",
"CCAE_LLP_CLOSE_COMPLETE",
"CCAE_TERMINATE_MESSAGE_RECEIVED",
"CCAE_LLP_CONNECTION_RESET",
"CCAE_LLP_CONNECTION_LOST",
"CCAE_LLP_SEGMENT_SIZE_INVALID",
"CCAE_LLP_INVALID_CRC",
"CCAE_LLP_BAD_FPDU",
"CCAE_INVALID_DDP_VERSION",
"CCAE_INVALID_RDMA_VERSION",
"CCAE_UNEXPECTED_OPCODE",
"CCAE_INVALID_DDP_QUEUE_NUMBER",
"CCAE_RDMA_READ_NOT_ENABLED",
"CCAE_RDMA_WRITE_NOT_ENABLED",
"CCAE_RDMA_READ_TOO_SMALL",
"CCAE_NO_L_BIT",
"CCAE_TAGGED_INVALID_STAG",
"CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
"CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
"CCAE_TAGGED_INVALID_PD",
"CCAE_WRAP_ERROR",
"CCAE_BAD_CLOSE",
"CCAE_BAD_LLP_CLOSE",
"CCAE_INVALID_MSN_RANGE",
"CCAE_INVALID_MSN_GAP",
"CCAE_IRRQ_OVERFLOW",
"CCAE_IRRQ_MSN_GAP",
"CCAE_IRRQ_MSN_RANGE",
"CCAE_IRRQ_INVALID_STAG",
"CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
"CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
"CCAE_IRRQ_INVALID_PD",
"CCAE_IRRQ_WRAP_ERROR",
"CCAE_CQ_SQ_COMPLETION_OVERFLOW",
"CCAE_CQ_RQ_COMPLETION_ERROR",
"CCAE_QP_SRQ_WQE_ERROR",
"CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
"CCAE_CQ_OVERFLOW",
"CCAE_CQ_OPERATION_ERROR",
"CCAE_SRQ_LIMIT_REACHED",
"CCAE_QP_RQ_LIMIT_REACHED",
"CCAE_SRQ_CATASTROPHIC_ERROR",
"CCAE_RNIC_CATASTROPHIC_ERROR"
};
if (event < CCAE_REMOTE_SHUTDOWN ||
event > CCAE_RNIC_CATASTROPHIC_ERROR)
return "<invalid event>";
event -= CCAE_REMOTE_SHUTDOWN;
return event_str[event];
}
static const char *to_qp_state_str(int state)
{
switch (state) {
case C2_QP_STATE_IDLE:
return "C2_QP_STATE_IDLE";
case C2_QP_STATE_CONNECTING:
return "C2_QP_STATE_CONNECTING";
case C2_QP_STATE_RTS:
return "C2_QP_STATE_RTS";
case C2_QP_STATE_CLOSING:
return "C2_QP_STATE_CLOSING";
case C2_QP_STATE_TERMINATE:
return "C2_QP_STATE_TERMINATE";
case C2_QP_STATE_ERROR:
return "C2_QP_STATE_ERROR";
default:
return "<invalid QP state>";
}
}
void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
{
struct c2_mq *mq = c2dev->qptr_array[mq_index];
union c2wr *wr;
void *resource_user_context;
struct iw_cm_event cm_event;
struct ib_event ib_event;
enum c2_resource_indicator resource_indicator;
enum c2_event_id event_id;
unsigned long flags;
int status;
struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
/*
* retrieve the message
*/
wr = c2_mq_consume(mq);
if (!wr)
return;
memset(&ib_event, 0, sizeof(ib_event));
memset(&cm_event, 0, sizeof(cm_event));
event_id = c2_wr_get_id(wr);
resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
resource_user_context =
(void *) (unsigned long) wr->ae.ae_generic.user_context;
status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
pr_debug("event received c2_dev=%p, event_id=%d, "
"resource_indicator=%d, user_context=%p, status = %d\n",
c2dev, event_id, resource_indicator, resource_user_context,
status);
switch (resource_indicator) {
case C2_RES_IND_QP:{
struct c2_qp *qp = (struct c2_qp *)resource_user_context;
struct iw_cm_id *cm_id = qp->cm_id;
struct c2wr_ae_active_connect_results *res;
if (!cm_id) {
pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
qp);
goto ignore_it;
}
pr_debug("%s: event = %s, user_context=%llx, "
"resource_type=%x, "
"resource=%x, qp_state=%s\n",
__func__,
to_event_str(event_id),
(unsigned long long) wr->ae.ae_generic.user_context,
be32_to_cpu(wr->ae.ae_generic.resource_type),
be32_to_cpu(wr->ae.ae_generic.resource),
to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
switch (event_id) {
case CCAE_ACTIVE_CONNECT_RESULTS:
res = &wr->ae.ae_active_connect_results;
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
laddr->sin_addr.s_addr = res->laddr;
raddr->sin_addr.s_addr = res->raddr;
laddr->sin_port = res->lport;
raddr->sin_port = res->rport;
if (status == 0) {
cm_event.private_data_len =
be32_to_cpu(res->private_data_length);
cm_event.private_data = res->private_data;
} else {
spin_lock_irqsave(&qp->lock, flags);
if (qp->cm_id) {
qp->cm_id->rem_ref(qp->cm_id);
qp->cm_id = NULL;
}
spin_unlock_irqrestore(&qp->lock, flags);
cm_event.private_data_len = 0;
cm_event.private_data = NULL;
}
if (cm_id->event_handler)
cm_id->event_handler(cm_id, &cm_event);
break;
case CCAE_TERMINATE_MESSAGE_RECEIVED:
case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
ib_event.device = &c2dev->ibdev;
ib_event.element.qp = &qp->ibqp;
ib_event.event = IB_EVENT_QP_REQ_ERR;
if (qp->ibqp.event_handler)
qp->ibqp.event_handler(&ib_event,
qp->ibqp.
qp_context);
break;
case CCAE_BAD_CLOSE:
case CCAE_LLP_CLOSE_COMPLETE:
case CCAE_LLP_CONNECTION_RESET:
case CCAE_LLP_CONNECTION_LOST:
BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
spin_lock_irqsave(&qp->lock, flags);
if (qp->cm_id) {
qp->cm_id->rem_ref(qp->cm_id);
qp->cm_id = NULL;
}
spin_unlock_irqrestore(&qp->lock, flags);
cm_event.event = IW_CM_EVENT_CLOSE;
cm_event.status = 0;
if (cm_id->event_handler)
cm_id->event_handler(cm_id, &cm_event);
break;
default:
BUG_ON(1);
pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
"CM_ID=%p\n",
__func__, __LINE__,
event_id, qp, cm_id);
break;
}
break;
}
case C2_RES_IND_EP:{
struct c2wr_ae_connection_request *req =
&wr->ae.ae_connection_request;
struct iw_cm_id *cm_id =
(struct iw_cm_id *)resource_user_context;
pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
if (event_id != CCAE_CONNECTION_REQUEST) {
pr_debug("%s: Invalid event_id: %d\n",
__func__, event_id);
break;
}
cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
laddr->sin_addr.s_addr = req->laddr;
raddr->sin_addr.s_addr = req->raddr;
laddr->sin_port = req->lport;
raddr->sin_port = req->rport;
cm_event.private_data_len =
be32_to_cpu(req->private_data_length);
cm_event.private_data = req->private_data;
/*
* Until ird/ord negotiation via MPAv2 support is added, send
* max supported values
*/
cm_event.ird = cm_event.ord = 128;
if (cm_id->event_handler)
cm_id->event_handler(cm_id, &cm_event);
break;
}
case C2_RES_IND_CQ:{
struct c2_cq *cq =
(struct c2_cq *) resource_user_context;
pr_debug("IB_EVENT_CQ_ERR\n");
ib_event.device = &c2dev->ibdev;
ib_event.element.cq = &cq->ibcq;
ib_event.event = IB_EVENT_CQ_ERR;
if (cq->ibcq.event_handler)
cq->ibcq.event_handler(&ib_event,
cq->ibcq.cq_context);
break;
}
default:
printk("Bad resource indicator = %d\n",
resource_indicator);
break;
}
ignore_it:
c2_mq_free(mq);
}

View File

@@ -1,108 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _C2_AE_H_
#define _C2_AE_H_
/*
* WARNING: If you change this file, also bump C2_IVN_BASE
* in common/include/clustercore/c2_ivn.h.
*/
/*
* Asynchronous Event Identifiers
*
* These start at 0x80 only so it's obvious from inspection that
* they are not work-request statuses. This isn't critical.
*
* NOTE: these event id's must fit in eight bits.
*/
enum c2_event_id {
CCAE_REMOTE_SHUTDOWN = 0x80,
CCAE_ACTIVE_CONNECT_RESULTS,
CCAE_CONNECTION_REQUEST,
CCAE_LLP_CLOSE_COMPLETE,
CCAE_TERMINATE_MESSAGE_RECEIVED,
CCAE_LLP_CONNECTION_RESET,
CCAE_LLP_CONNECTION_LOST,
CCAE_LLP_SEGMENT_SIZE_INVALID,
CCAE_LLP_INVALID_CRC,
CCAE_LLP_BAD_FPDU,
CCAE_INVALID_DDP_VERSION,
CCAE_INVALID_RDMA_VERSION,
CCAE_UNEXPECTED_OPCODE,
CCAE_INVALID_DDP_QUEUE_NUMBER,
CCAE_RDMA_READ_NOT_ENABLED,
CCAE_RDMA_WRITE_NOT_ENABLED,
CCAE_RDMA_READ_TOO_SMALL,
CCAE_NO_L_BIT,
CCAE_TAGGED_INVALID_STAG,
CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
CCAE_TAGGED_INVALID_PD,
CCAE_WRAP_ERROR,
CCAE_BAD_CLOSE,
CCAE_BAD_LLP_CLOSE,
CCAE_INVALID_MSN_RANGE,
CCAE_INVALID_MSN_GAP,
CCAE_IRRQ_OVERFLOW,
CCAE_IRRQ_MSN_GAP,
CCAE_IRRQ_MSN_RANGE,
CCAE_IRRQ_INVALID_STAG,
CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
CCAE_IRRQ_INVALID_PD,
CCAE_IRRQ_WRAP_ERROR,
CCAE_CQ_SQ_COMPLETION_OVERFLOW,
CCAE_CQ_RQ_COMPLETION_ERROR,
CCAE_QP_SRQ_WQE_ERROR,
CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
CCAE_CQ_OVERFLOW,
CCAE_CQ_OPERATION_ERROR,
CCAE_SRQ_LIMIT_REACHED,
CCAE_QP_RQ_LIMIT_REACHED,
CCAE_SRQ_CATASTROPHIC_ERROR,
CCAE_RNIC_CATASTROPHIC_ERROR
/* WARNING If you add more id's, make sure their values fit in eight bits. */
};
/*
* Resource Indicators and Identifiers
*/
enum c2_resource_indicator {
C2_RES_IND_QP = 1,
C2_RES_IND_EP,
C2_RES_IND_CQ,
C2_RES_IND_SRQ,
};
#endif /* _C2_AE_H_ */

View File

@@ -1,142 +0,0 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/bitmap.h>
#include "c2.h"
static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
struct sp_chunk **head)
{
int i;
struct sp_chunk *new_head;
dma_addr_t dma_addr;
new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE,
&dma_addr, gfp_mask);
if (new_head == NULL)
return -ENOMEM;
new_head->dma_addr = dma_addr;
dma_unmap_addr_set(new_head, mapping, new_head->dma_addr);
new_head->next = NULL;
new_head->head = 0;
/* build list where each index is the next free slot */
for (i = 0;
i < (PAGE_SIZE - sizeof(struct sp_chunk) -
sizeof(u16)) / sizeof(u16) - 1;
i++) {
new_head->shared_ptr[i] = i + 1;
}
/* terminate list */
new_head->shared_ptr[i] = 0xFFFF;
*head = new_head;
return 0;
}
int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
struct sp_chunk **root)
{
return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
}
void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
{
struct sp_chunk *next;
while (root) {
next = root->next;
dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
dma_unmap_addr(root, mapping));
root = next;
}
}
__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
dma_addr_t *dma_addr, gfp_t gfp_mask)
{
u16 mqsp;
while (head) {
mqsp = head->head;
if (mqsp != 0xFFFF) {
head->head = head->shared_ptr[mqsp];
break;
} else if (head->next == NULL) {
if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
0) {
head = head->next;
mqsp = head->head;
head->head = head->shared_ptr[mqsp];
break;
} else
return NULL;
} else
head = head->next;
}
if (head) {
*dma_addr = head->dma_addr +
((unsigned long) &(head->shared_ptr[mqsp]) -
(unsigned long) head);
pr_debug("%s addr %p dma_addr %llx\n", __func__,
&(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
return (__force __be16 *) &(head->shared_ptr[mqsp]);
}
return NULL;
}
void c2_free_mqsp(__be16 *mqsp)
{
struct sp_chunk *head;
u16 idx;
/* The chunk containing this ptr begins at the page boundary */
head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
/* Link head to new mqsp */
*mqsp = (__force __be16) head->head;
/* Compute the shared_ptr index */
idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
/* Point this index at the head */
head->shared_ptr[idx] = head->head;
/* Point head at this index */
head->head = idx;
}

View File

@@ -1,461 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/slab.h>
#include "c2.h"
#include "c2_wr.h"
#include "c2_vq.h"
#include <rdma/iw_cm.h>
int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
{
struct c2_dev *c2dev = to_c2dev(cm_id->device);
struct ib_qp *ibqp;
struct c2_qp *qp;
struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
struct c2_vq_req *vq_req;
int err;
struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
if (cm_id->remote_addr.ss_family != AF_INET)
return -ENOSYS;
ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
if (!ibqp)
return -EINVAL;
qp = to_c2qp(ibqp);
/* Associate QP <--> CM_ID */
cm_id->provider_data = qp;
cm_id->add_ref(cm_id);
qp->cm_id = cm_id;
/*
* only support the max private_data length
*/
if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
err = -EINVAL;
goto bail0;
}
/*
* Set the rdma read limits
*/
err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
if (err)
goto bail0;
/*
* Create and send a WR_QP_CONNECT...
*/
wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
if (!wr) {
err = -ENOMEM;
goto bail0;
}
vq_req = vq_req_alloc(c2dev);
if (!vq_req) {
err = -ENOMEM;
goto bail1;
}
c2_wr_set_id(wr, CCWR_QP_CONNECT);
wr->hdr.context = 0;
wr->rnic_handle = c2dev->adapter_handle;
wr->qp_handle = qp->adapter_handle;
wr->remote_addr = raddr->sin_addr.s_addr;
wr->remote_port = raddr->sin_port;
/*
* Move any private data from the callers's buf into
* the WR.
*/
if (iw_param->private_data) {
wr->private_data_length =
cpu_to_be32(iw_param->private_data_len);
memcpy(&wr->private_data[0], iw_param->private_data,
iw_param->private_data_len);
} else
wr->private_data_length = 0;
/*
* Send WR to adapter. NOTE: There is no synch reply from
* the adapter.
*/
err = vq_send_wr(c2dev, (union c2wr *) wr);
vq_req_free(c2dev, vq_req);
bail1:
kfree(wr);
bail0:
if (err) {
/*
* If we fail, release reference on QP and
* disassociate QP from CM_ID
*/
cm_id->provider_data = NULL;
qp->cm_id = NULL;
cm_id->rem_ref(cm_id);
}
return err;
}
int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
{
struct c2_dev *c2dev;
struct c2wr_ep_listen_create_req wr;
struct c2wr_ep_listen_create_rep *reply;
struct c2_vq_req *vq_req;
int err;
struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
if (cm_id->local_addr.ss_family != AF_INET)
return -ENOSYS;
c2dev = to_c2dev(cm_id->device);
if (c2dev == NULL)
return -EINVAL;
/*
* Allocate verbs request.
*/
vq_req = vq_req_alloc(c2dev);
if (!vq_req)
return -ENOMEM;
/*
* Build the WR
*/
c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
wr.hdr.context = (u64) (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
wr.local_addr = laddr->sin_addr.s_addr;
wr.local_port = laddr->sin_port;
wr.backlog = cpu_to_be32(backlog);
wr.user_context = (u64) (unsigned long) cm_id;
/*
* Reference the request struct. Dereferenced in the int handler.
*/
vq_req_get(c2dev, vq_req);
/*
* Send WR to adapter
*/
err = vq_send_wr(c2dev, (union c2wr *) & wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail0;
}
/*
* Wait for reply from adapter
*/
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail0;
/*
* Process reply
*/
reply =
(struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
if (!reply) {
err = -ENOMEM;
goto bail1;
}
if ((err = c2_errno(reply)) != 0)
goto bail1;
/*
* Keep the adapter handle. Used in subsequent destroy
*/
cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
/*
* free vq stuff
*/
vq_repbuf_free(c2dev, reply);
vq_req_free(c2dev, vq_req);
return 0;
bail1:
vq_repbuf_free(c2dev, reply);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}
int c2_llp_service_destroy(struct iw_cm_id *cm_id)
{
struct c2_dev *c2dev;
struct c2wr_ep_listen_destroy_req wr;
struct c2wr_ep_listen_destroy_rep *reply;
struct c2_vq_req *vq_req;
int err;
c2dev = to_c2dev(cm_id->device);
if (c2dev == NULL)
return -EINVAL;
/*
* Allocate verbs request.
*/
vq_req = vq_req_alloc(c2dev);
if (!vq_req)
return -ENOMEM;
/*
* Build the WR
*/
c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
wr.hdr.context = (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
/*
* reference the request struct. dereferenced in the int handler.
*/
vq_req_get(c2dev, vq_req);
/*
* Send WR to adapter
*/
err = vq_send_wr(c2dev, (union c2wr *) & wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail0;
}
/*
* Wait for reply from adapter
*/
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail0;
/*
* Process reply
*/
reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
if (!reply) {
err = -ENOMEM;
goto bail0;
}
if ((err = c2_errno(reply)) != 0)
goto bail1;
bail1:
vq_repbuf_free(c2dev, reply);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}
int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
{
struct c2_dev *c2dev = to_c2dev(cm_id->device);
struct c2_qp *qp;
struct ib_qp *ibqp;
struct c2wr_cr_accept_req *wr; /* variable length WR */
struct c2_vq_req *vq_req;
struct c2wr_cr_accept_rep *reply; /* VQ Reply msg ptr. */
int err;
ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
if (!ibqp)
return -EINVAL;
qp = to_c2qp(ibqp);
/* Set the RDMA read limits */
err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
if (err)
goto bail0;
/* Allocate verbs request. */
vq_req = vq_req_alloc(c2dev);
if (!vq_req) {
err = -ENOMEM;
goto bail0;
}
vq_req->qp = qp;
vq_req->cm_id = cm_id;
vq_req->event = IW_CM_EVENT_ESTABLISHED;
wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
if (!wr) {
err = -ENOMEM;
goto bail1;
}
/* Build the WR */
c2_wr_set_id(wr, CCWR_CR_ACCEPT);
wr->hdr.context = (unsigned long) vq_req;
wr->rnic_handle = c2dev->adapter_handle;
wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
wr->qp_handle = qp->adapter_handle;
/* Replace the cr_handle with the QP after accept */
cm_id->provider_data = qp;
cm_id->add_ref(cm_id);
qp->cm_id = cm_id;
cm_id->provider_data = qp;
/* Validate private_data length */
if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
err = -EINVAL;
goto bail1;
}
if (iw_param->private_data) {
wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
memcpy(&wr->private_data[0],
iw_param->private_data, iw_param->private_data_len);
} else
wr->private_data_length = 0;
/* Reference the request struct. Dereferenced in the int handler. */
vq_req_get(c2dev, vq_req);
/* Send WR to adapter */
err = vq_send_wr(c2dev, (union c2wr *) wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail1;
}
/* Wait for reply from adapter */
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail1;
/* Check that reply is present */
reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
if (!reply) {
err = -ENOMEM;
goto bail1;
}
err = c2_errno(reply);
vq_repbuf_free(c2dev, reply);
if (!err)
c2_set_qp_state(qp, C2_QP_STATE_RTS);
bail1:
kfree(wr);
vq_req_free(c2dev, vq_req);
bail0:
if (err) {
/*
* If we fail, release reference on QP and
* disassociate QP from CM_ID
*/
cm_id->provider_data = NULL;
qp->cm_id = NULL;
cm_id->rem_ref(cm_id);
}
return err;
}
int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
struct c2_dev *c2dev;
struct c2wr_cr_reject_req wr;
struct c2_vq_req *vq_req;
struct c2wr_cr_reject_rep *reply;
int err;
c2dev = to_c2dev(cm_id->device);
/*
* Allocate verbs request.
*/
vq_req = vq_req_alloc(c2dev);
if (!vq_req)
return -ENOMEM;
/*
* Build the WR
*/
c2_wr_set_id(&wr, CCWR_CR_REJECT);
wr.hdr.context = (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
/*
* reference the request struct. dereferenced in the int handler.
*/
vq_req_get(c2dev, vq_req);
/*
* Send WR to adapter
*/
err = vq_send_wr(c2dev, (union c2wr *) & wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail0;
}
/*
* Wait for reply from adapter
*/
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail0;
/*
* Process reply
*/
reply = (struct c2wr_cr_reject_rep *) (unsigned long)
vq_req->reply_msg;
if (!reply) {
err = -ENOMEM;
goto bail0;
}
err = c2_errno(reply);
/*
* free vq stuff
*/
vq_repbuf_free(c2dev, reply);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}

View File

@@ -1,440 +0,0 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/gfp.h>
#include "c2.h"
#include "c2_vq.h"
#include "c2_status.h"
#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
{
struct c2_cq *cq;
unsigned long flags;
spin_lock_irqsave(&c2dev->lock, flags);
cq = c2dev->qptr_array[cqn];
if (!cq) {
spin_unlock_irqrestore(&c2dev->lock, flags);
return NULL;
}
atomic_inc(&cq->refcount);
spin_unlock_irqrestore(&c2dev->lock, flags);
return cq;
}
static void c2_cq_put(struct c2_cq *cq)
{
if (atomic_dec_and_test(&cq->refcount))
wake_up(&cq->wait);
}
void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
{
struct c2_cq *cq;
cq = c2_cq_get(c2dev, mq_index);
if (!cq) {
printk("discarding events on destroyed CQN=%d\n", mq_index);
return;
}
(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
c2_cq_put(cq);
}
void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
{
struct c2_cq *cq;
struct c2_mq *q;
cq = c2_cq_get(c2dev, mq_index);
if (!cq)
return;
spin_lock_irq(&cq->lock);
q = &cq->mq;
if (q && !c2_mq_empty(q)) {
u16 priv = q->priv;
struct c2wr_ce *msg;
while (priv != be16_to_cpu(*q->shared)) {
msg = (struct c2wr_ce *)
(q->msg_pool.host + priv * q->msg_size);
if (msg->qp_user_context == (u64) (unsigned long) qp) {
msg->qp_user_context = (u64) 0;
}
priv = (priv + 1) % q->q_size;
}
}
spin_unlock_irq(&cq->lock);
c2_cq_put(cq);
}
static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
{
switch (status) {
case C2_OK:
return IB_WC_SUCCESS;
case CCERR_FLUSHED:
return IB_WC_WR_FLUSH_ERR;
case CCERR_BASE_AND_BOUNDS_VIOLATION:
return IB_WC_LOC_PROT_ERR;
case CCERR_ACCESS_VIOLATION:
return IB_WC_LOC_ACCESS_ERR;
case CCERR_TOTAL_LENGTH_TOO_BIG:
return IB_WC_LOC_LEN_ERR;
case CCERR_INVALID_WINDOW:
return IB_WC_MW_BIND_ERR;
default:
return IB_WC_GENERAL_ERR;
}
}
static inline int c2_poll_one(struct c2_dev *c2dev,
struct c2_cq *cq, struct ib_wc *entry)
{
struct c2wr_ce *ce;
struct c2_qp *qp;
int is_recv = 0;
ce = c2_mq_consume(&cq->mq);
if (!ce) {
return -EAGAIN;
}
/*
* if the qp returned is null then this qp has already
* been freed and we are unable process the completion.
* try pulling the next message
*/
while ((qp =
(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
c2_mq_free(&cq->mq);
ce = c2_mq_consume(&cq->mq);
if (!ce)
return -EAGAIN;
}
entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
entry->wr_id = ce->hdr.context;
entry->qp = &qp->ibqp;
entry->wc_flags = 0;
entry->slid = 0;
entry->sl = 0;
entry->src_qp = 0;
entry->dlid_path_bits = 0;
entry->pkey_index = 0;
switch (c2_wr_get_id(ce)) {
case C2_WR_TYPE_SEND:
entry->opcode = IB_WC_SEND;
break;
case C2_WR_TYPE_RDMA_WRITE:
entry->opcode = IB_WC_RDMA_WRITE;
break;
case C2_WR_TYPE_RDMA_READ:
entry->opcode = IB_WC_RDMA_READ;
break;
case C2_WR_TYPE_BIND_MW:
entry->opcode = IB_WC_BIND_MW;
break;
case C2_WR_TYPE_RECV:
entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
entry->opcode = IB_WC_RECV;
is_recv = 1;
break;
default:
break;
}
/* consume the WQEs */
if (is_recv)
c2_mq_lconsume(&qp->rq_mq, 1);
else
c2_mq_lconsume(&qp->sq_mq,
be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
/* free the message */
c2_mq_free(&cq->mq);
return 0;
}
int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct c2_dev *c2dev = to_c2dev(ibcq->device);
struct c2_cq *cq = to_c2cq(ibcq);
unsigned long flags;
int npolled, err;
spin_lock_irqsave(&cq->lock, flags);
for (npolled = 0; npolled < num_entries; ++npolled) {
err = c2_poll_one(c2dev, cq, entry + npolled);
if (err)
break;
}
spin_unlock_irqrestore(&cq->lock, flags);
return npolled;
}
int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
{
struct c2_mq_shared __iomem *shared;
struct c2_cq *cq;
unsigned long flags;
int ret = 0;
cq = to_c2cq(ibcq);
shared = cq->mq.peer;
if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
else
return -EINVAL;
writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
/*
* Now read back shared->armed to make the PCI
* write synchronous. This is necessary for
* correct cq notification semantics.
*/
readb(&shared->armed);
if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
spin_lock_irqsave(&cq->lock, flags);
ret = !c2_mq_empty(&cq->mq);
spin_unlock_irqrestore(&cq->lock, flags);
}
return ret;
}
static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
{
dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
mq->msg_pool.host, dma_unmap_addr(mq, mapping));
}
static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
size_t q_size, size_t msg_size)
{
u8 *pool_start;
if (q_size > SIZE_MAX / msg_size)
return -EINVAL;
pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
&mq->host_dma, GFP_KERNEL);
if (!pool_start)
return -ENOMEM;
c2_mq_rep_init(mq,
0, /* index (currently unknown) */
q_size,
msg_size,
pool_start,
NULL, /* peer (currently unknown) */
C2_MQ_HOST_TARGET);
dma_unmap_addr_set(mq, mapping, mq->host_dma);
return 0;
}
int c2_init_cq(struct c2_dev *c2dev, int entries,
struct c2_ucontext *ctx, struct c2_cq *cq)
{
struct c2wr_cq_create_req wr;
struct c2wr_cq_create_rep *reply;
unsigned long peer_pa;
struct c2_vq_req *vq_req;
int err;
might_sleep();
cq->ibcq.cqe = entries - 1;
cq->is_kernel = !ctx;
/* Allocate a shared pointer */
cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
&cq->mq.shared_dma, GFP_KERNEL);
if (!cq->mq.shared)
return -ENOMEM;
/* Allocate pages for the message pool */
err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
if (err)
goto bail0;
vq_req = vq_req_alloc(c2dev);
if (!vq_req) {
err = -ENOMEM;
goto bail1;
}
memset(&wr, 0, sizeof(wr));
c2_wr_set_id(&wr, CCWR_CQ_CREATE);
wr.hdr.context = (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
wr.msg_size = cpu_to_be32(cq->mq.msg_size);
wr.depth = cpu_to_be32(cq->mq.q_size);
wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
wr.user_context = (u64) (unsigned long) (cq);
vq_req_get(c2dev, vq_req);
err = vq_send_wr(c2dev, (union c2wr *) & wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail2;
}
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail2;
reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
if (!reply) {
err = -ENOMEM;
goto bail2;
}
if ((err = c2_errno(reply)) != 0)
goto bail3;
cq->adapter_handle = reply->cq_handle;
cq->mq.index = be32_to_cpu(reply->mq_index);
peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
if (!cq->mq.peer) {
err = -ENOMEM;
goto bail3;
}
vq_repbuf_free(c2dev, reply);
vq_req_free(c2dev, vq_req);
spin_lock_init(&cq->lock);
atomic_set(&cq->refcount, 1);
init_waitqueue_head(&cq->wait);
/*
* Use the MQ index allocated by the adapter to
* store the CQ in the qptr_array
*/
cq->cqn = cq->mq.index;
c2dev->qptr_array[cq->cqn] = cq;
return 0;
bail3:
vq_repbuf_free(c2dev, reply);
bail2:
vq_req_free(c2dev, vq_req);
bail1:
c2_free_cq_buf(c2dev, &cq->mq);
bail0:
c2_free_mqsp(cq->mq.shared);
return err;
}
void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
{
int err;
struct c2_vq_req *vq_req;
struct c2wr_cq_destroy_req wr;
struct c2wr_cq_destroy_rep *reply;
might_sleep();
/* Clear CQ from the qptr array */
spin_lock_irq(&c2dev->lock);
c2dev->qptr_array[cq->mq.index] = NULL;
atomic_dec(&cq->refcount);
spin_unlock_irq(&c2dev->lock);
wait_event(cq->wait, !atomic_read(&cq->refcount));
vq_req = vq_req_alloc(c2dev);
if (!vq_req) {
goto bail0;
}
memset(&wr, 0, sizeof(wr));
c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
wr.hdr.context = (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
wr.cq_handle = cq->adapter_handle;
vq_req_get(c2dev, vq_req);
err = vq_send_wr(c2dev, (union c2wr *) & wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail1;
}
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail1;
reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
if (reply)
vq_repbuf_free(c2dev, reply);
bail1:
vq_req_free(c2dev, vq_req);
bail0:
if (cq->is_kernel) {
c2_free_cq_buf(c2dev, &cq->mq);
}
return;
}

View File

@@ -1,219 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "c2.h"
#include <rdma/iw_cm.h>
#include "c2_vq.h"
static void handle_mq(struct c2_dev *c2dev, u32 index);
static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
/*
* Handle RNIC interrupts
*/
void c2_rnic_interrupt(struct c2_dev *c2dev)
{
unsigned int mq_index;
while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
if (mq_index & 0x80000000) {
break;
}
c2dev->hints_read++;
handle_mq(c2dev, mq_index);
}
}
/*
* Top level MQ handler
*/
static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
{
if (c2dev->qptr_array[mq_index] == NULL) {
pr_debug("handle_mq: stray activity for mq_index=%d\n",
mq_index);
return;
}
switch (mq_index) {
case (0):
/*
* An index of 0 in the activity queue
* indicates the req vq now has messages
* available...
*
* Wake up any waiters waiting on req VQ
* message availability.
*/
wake_up(&c2dev->req_vq_wo);
break;
case (1):
handle_vq(c2dev, mq_index);
break;
case (2):
/* We have to purge the VQ in case there are pending
* accept reply requests that would result in the
* generation of an ESTABLISHED event. If we don't
* generate these first, a CLOSE event could end up
* being delivered before the ESTABLISHED event.
*/
handle_vq(c2dev, 1);
c2_ae_event(c2dev, mq_index);
break;
default:
/* There is no event synchronization between CQ events
* and AE or CM events. In fact, CQE could be
* delivered for all of the I/O up to and including the
* FLUSH for a peer disconenct prior to the ESTABLISHED
* event being delivered to the app. The reason for this
* is that CM events are delivered on a thread, while AE
* and CM events are delivered on interrupt context.
*/
c2_cq_event(c2dev, mq_index);
break;
}
return;
}
/*
* Handles verbs WR replies.
*/
static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
{
void *adapter_msg, *reply_msg;
struct c2wr_hdr *host_msg;
struct c2wr_hdr tmp;
struct c2_mq *reply_vq;
struct c2_vq_req *req;
struct iw_cm_event cm_event;
int err;
reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
/*
* get next msg from mq_index into adapter_msg.
* don't free it yet.
*/
adapter_msg = c2_mq_consume(reply_vq);
if (adapter_msg == NULL) {
return;
}
host_msg = vq_repbuf_alloc(c2dev);
/*
* If we can't get a host buffer, then we'll still
* wakeup the waiter, we just won't give him the msg.
* It is assumed the waiter will deal with this...
*/
if (!host_msg) {
pr_debug("handle_vq: no repbufs!\n");
/*
* just copy the WR header into a local variable.
* this allows us to still demux on the context
*/
host_msg = &tmp;
memcpy(host_msg, adapter_msg, sizeof(tmp));
reply_msg = NULL;
} else {
memcpy(host_msg, adapter_msg, reply_vq->msg_size);
reply_msg = host_msg;
}
/*
* consume the msg from the MQ
*/
c2_mq_free(reply_vq);
/*
* wakeup the waiter.
*/
req = (struct c2_vq_req *) (unsigned long) host_msg->context;
if (req == NULL) {
/*
* We should never get here, as the adapter should
* never send us a reply that we're not expecting.
*/
if (reply_msg != NULL)
vq_repbuf_free(c2dev, host_msg);
pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
return;
}
if (reply_msg)
err = c2_errno(reply_msg);
else
err = -ENOMEM;
if (!err) switch (req->event) {
case IW_CM_EVENT_ESTABLISHED:
c2_set_qp_state(req->qp,
C2_QP_STATE_RTS);
/*
* Until ird/ord negotiation via MPAv2 support is added, send
* max supported values
*/
cm_event.ird = cm_event.ord = 128;
case IW_CM_EVENT_CLOSE:
/*
* Move the QP to RTS if this is
* the established event
*/
cm_event.event = req->event;
cm_event.status = 0;
cm_event.local_addr = req->cm_id->local_addr;
cm_event.remote_addr = req->cm_id->remote_addr;
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
req->cm_id->event_handler(req->cm_id, &cm_event);
break;
default:
break;
}
req->reply_msg = (u64) (unsigned long) (reply_msg);
atomic_set(&req->reply_ready, 1);
wake_up(&req->wait_object);
/*
* If the request was cancelled, then this put will
* free the vq_req memory...and reply_msg!!!
*/
vq_req_put(c2dev, req);
}

View File

@@ -1,377 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include "c2.h"
#include "c2_vq.h"
#define PBL_VIRT 1
#define PBL_PHYS 2
/*
* Send all the PBL messages to convey the remainder of the PBL
* Wait for the adapter's reply on the last one.
* This is indicated by setting the MEM_PBL_COMPLETE in the flags.
*
* NOTE: vq_req is _not_ freed by this function. The VQ Host
* Reply buffer _is_ freed by this function.
*/
static int
send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index,
unsigned long va, u32 pbl_depth,
struct c2_vq_req *vq_req, int pbl_type)
{
u32 pbe_count; /* amt that fits in a PBL msg */
u32 count; /* amt in this PBL MSG. */
struct c2wr_nsmr_pbl_req *wr; /* PBL WR ptr */
struct c2wr_nsmr_pbl_rep *reply; /* reply ptr */
int err, pbl_virt, pbl_index, i;
switch (pbl_type) {
case PBL_VIRT:
pbl_virt = 1;
break;
case PBL_PHYS:
pbl_virt = 0;
break;
default:
return -EINVAL;
break;
}
pbe_count = (c2dev->req_vq.msg_size -
sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
if (!wr) {
return -ENOMEM;
}
c2_wr_set_id(wr, CCWR_NSMR_PBL);
/*
* Only the last PBL message will generate a reply from the verbs,
* so we set the context to 0 indicating there is no kernel verbs
* handler blocked awaiting this reply.
*/
wr->hdr.context = 0;
wr->rnic_handle = c2dev->adapter_handle;
wr->stag_index = stag_index; /* already swapped */
wr->flags = 0;
pbl_index = 0;
while (pbl_depth) {
count = min(pbe_count, pbl_depth);
wr->addrs_length = cpu_to_be32(count);
/*
* If this is the last message, then reference the
* vq request struct cuz we're gonna wait for a reply.
* also make this PBL msg as the last one.
*/
if (count == pbl_depth) {
/*
* reference the request struct. dereferenced in the
* int handler.
*/
vq_req_get(c2dev, vq_req);
wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
/*
* This is the last PBL message.
* Set the context to our VQ Request Object so we can
* wait for the reply.
*/
wr->hdr.context = (unsigned long) vq_req;
}
/*
* If pbl_virt is set then va is a virtual address
* that describes a virtually contiguous memory
* allocation. The wr needs the start of each virtual page
* to be converted to the corresponding physical address
* of the page. If pbl_virt is not set then va is an array
* of physical addresses and there is no conversion to do.
* Just fill in the wr with what is in the array.
*/
for (i = 0; i < count; i++) {
if (pbl_virt) {
va += PAGE_SIZE;
} else {
wr->paddrs[i] =
cpu_to_be64(((u64 *)va)[pbl_index + i]);
}
}
/*
* Send WR to adapter
*/
err = vq_send_wr(c2dev, (union c2wr *) wr);
if (err) {
if (count <= pbe_count) {
vq_req_put(c2dev, vq_req);
}
goto bail0;
}
pbl_depth -= count;
pbl_index += count;
}
/*
* Now wait for the reply...
*/
err = vq_wait_for_reply(c2dev, vq_req);
if (err) {
goto bail0;
}
/*
* Process reply
*/
reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
if (!reply) {
err = -ENOMEM;
goto bail0;
}
err = c2_errno(reply);
vq_repbuf_free(c2dev, reply);
bail0:
kfree(wr);
return err;
}
#define C2_PBL_MAX_DEPTH 131072
int
c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
int page_size, int pbl_depth, u32 length,
u32 offset, u64 *va, enum c2_acf acf,
struct c2_mr *mr)
{
struct c2_vq_req *vq_req;
struct c2wr_nsmr_register_req *wr;
struct c2wr_nsmr_register_rep *reply;
u16 flags;
int i, pbe_count, count;
int err;
if (!va || !length || !addr_list || !pbl_depth)
return -EINTR;
/*
* Verify PBL depth is within rnic max
*/
if (pbl_depth > C2_PBL_MAX_DEPTH) {
return -EINTR;
}
/*
* allocate verbs request object
*/
vq_req = vq_req_alloc(c2dev);
if (!vq_req)
return -ENOMEM;
wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
if (!wr) {
err = -ENOMEM;
goto bail0;
}
/*
* build the WR
*/
c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
wr->hdr.context = (unsigned long) vq_req;
wr->rnic_handle = c2dev->adapter_handle;
flags = (acf | MEM_VA_BASED | MEM_REMOTE);
/*
* compute how many pbes can fit in the message
*/
pbe_count = (c2dev->req_vq.msg_size -
sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
if (pbl_depth <= pbe_count) {
flags |= MEM_PBL_COMPLETE;
}
wr->flags = cpu_to_be16(flags);
wr->stag_key = 0; //stag_key;
wr->va = cpu_to_be64(*va);
wr->pd_id = mr->pd->pd_id;
wr->pbe_size = cpu_to_be32(page_size);
wr->length = cpu_to_be32(length);
wr->pbl_depth = cpu_to_be32(pbl_depth);
wr->fbo = cpu_to_be32(offset);
count = min(pbl_depth, pbe_count);
wr->addrs_length = cpu_to_be32(count);
/*
* fill out the PBL for this message
*/
for (i = 0; i < count; i++) {
wr->paddrs[i] = cpu_to_be64(addr_list[i]);
}
/*
* regerence the request struct
*/
vq_req_get(c2dev, vq_req);
/*
* send the WR to the adapter
*/
err = vq_send_wr(c2dev, (union c2wr *) wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail1;
}
/*
* wait for reply from adapter
*/
err = vq_wait_for_reply(c2dev, vq_req);
if (err) {
goto bail1;
}
/*
* process reply
*/
reply =
(struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
if (!reply) {
err = -ENOMEM;
goto bail1;
}
if ((err = c2_errno(reply))) {
goto bail2;
}
//*p_pb_entries = be32_to_cpu(reply->pbl_depth);
mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
vq_repbuf_free(c2dev, reply);
/*
* if there are still more PBEs we need to send them to
* the adapter and wait for a reply on the final one.
* reuse vq_req for this purpose.
*/
pbl_depth -= count;
if (pbl_depth) {
vq_req->reply_msg = (unsigned long) NULL;
atomic_set(&vq_req->reply_ready, 0);
err = send_pbl_messages(c2dev,
cpu_to_be32(mr->ibmr.lkey),
(unsigned long) &addr_list[i],
pbl_depth, vq_req, PBL_PHYS);
if (err) {
goto bail1;
}
}
vq_req_free(c2dev, vq_req);
kfree(wr);
return err;
bail2:
vq_repbuf_free(c2dev, reply);
bail1:
kfree(wr);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}
int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
{
struct c2_vq_req *vq_req; /* verbs request object */
struct c2wr_stag_dealloc_req wr; /* work request */
struct c2wr_stag_dealloc_rep *reply; /* WR reply */
int err;
/*
* allocate verbs request object
*/
vq_req = vq_req_alloc(c2dev);
if (!vq_req) {
return -ENOMEM;
}
/*
* Build the WR
*/
c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
wr.hdr.context = (u64) (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
wr.stag_index = cpu_to_be32(stag_index);
/*
* reference the request struct. dereferenced in the int handler.
*/
vq_req_get(c2dev, vq_req);
/*
* Send WR to adapter
*/
err = vq_send_wr(c2dev, (union c2wr *) & wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail0;
}
/*
* Wait for reply from adapter
*/
err = vq_wait_for_reply(c2dev, vq_req);
if (err) {
goto bail0;
}
/*
* Process reply
*/
reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
if (!reply) {
err = -ENOMEM;
goto bail0;
}
err = c2_errno(reply);
vq_repbuf_free(c2dev, reply);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}

View File

@@ -1,174 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "c2.h"
#include "c2_mq.h"
void *c2_mq_alloc(struct c2_mq *q)
{
BUG_ON(q->magic != C2_MQ_MAGIC);
BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
if (c2_mq_full(q)) {
return NULL;
} else {
#ifdef DEBUG
struct c2wr_hdr *m =
(struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
#ifdef CCMSGMAGIC
BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
m->magic = cpu_to_be32(CCWR_MAGIC);
#endif
return m;
#else
return q->msg_pool.host + q->priv * q->msg_size;
#endif
}
}
void c2_mq_produce(struct c2_mq *q)
{
BUG_ON(q->magic != C2_MQ_MAGIC);
BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
if (!c2_mq_full(q)) {
q->priv = (q->priv + 1) % q->q_size;
q->hint_count++;
/* Update peer's offset. */
__raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
}
}
void *c2_mq_consume(struct c2_mq *q)
{
BUG_ON(q->magic != C2_MQ_MAGIC);
BUG_ON(q->type != C2_MQ_HOST_TARGET);
if (c2_mq_empty(q)) {
return NULL;
} else {
#ifdef DEBUG
struct c2wr_hdr *m = (struct c2wr_hdr *)
(q->msg_pool.host + q->priv * q->msg_size);
#ifdef CCMSGMAGIC
BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
#endif
return m;
#else
return q->msg_pool.host + q->priv * q->msg_size;
#endif
}
}
void c2_mq_free(struct c2_mq *q)
{
BUG_ON(q->magic != C2_MQ_MAGIC);
BUG_ON(q->type != C2_MQ_HOST_TARGET);
if (!c2_mq_empty(q)) {
#ifdef CCMSGMAGIC
{
struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
(q->msg_pool.adapter + q->priv * q->msg_size);
__raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
}
#endif
q->priv = (q->priv + 1) % q->q_size;
/* Update peer's offset. */
__raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
}
}
void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
{
BUG_ON(q->magic != C2_MQ_MAGIC);
BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
while (wqe_count--) {
BUG_ON(c2_mq_empty(q));
*q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
}
}
#if 0
u32 c2_mq_count(struct c2_mq *q)
{
s32 count;
if (q->type == C2_MQ_HOST_TARGET)
count = be16_to_cpu(*q->shared) - q->priv;
else
count = q->priv - be16_to_cpu(*q->shared);
if (count < 0)
count += q->q_size;
return (u32) count;
}
#endif /* 0 */
void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
{
BUG_ON(!q->shared);
/* This code assumes the byte swapping has already been done! */
q->index = index;
q->q_size = q_size;
q->msg_size = msg_size;
q->msg_pool.adapter = pool_start;
q->peer = (struct c2_mq_shared __iomem *) peer;
q->magic = C2_MQ_MAGIC;
q->type = type;
q->priv = 0;
q->hint_count = 0;
return;
}
void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
u8 *pool_start, u16 __iomem *peer, u32 type)
{
BUG_ON(!q->shared);
/* This code assumes the byte swapping has already been done! */
q->index = index;
q->q_size = q_size;
q->msg_size = msg_size;
q->msg_pool.host = pool_start;
q->peer = (struct c2_mq_shared __iomem *) peer;
q->magic = C2_MQ_MAGIC;
q->type = type;
q->priv = 0;
q->hint_count = 0;
return;
}

View File

@@ -1,106 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _C2_MQ_H_
#define _C2_MQ_H_
#include <linux/kernel.h>
#include <linux/dma-mapping.h>
#include "c2_wr.h"
enum c2_shared_regs {
C2_SHARED_ARMED = 0x10,
C2_SHARED_NOTIFY = 0x18,
C2_SHARED_SHARED = 0x40,
};
struct c2_mq_shared {
u16 unused1;
u8 armed;
u8 notification_type;
u32 unused2;
u16 shared;
/* Pad to 64 bytes. */
u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
};
enum c2_mq_type {
C2_MQ_HOST_TARGET = 1,
C2_MQ_ADAPTER_TARGET = 2,
};
/*
* c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
* c2_user_mq_t (which is the same format) is for user-mode MQs...
*/
#define C2_MQ_MAGIC 0x4d512020 /* 'MQ ' */
struct c2_mq {
u32 magic;
union {
u8 *host;
u8 __iomem *adapter;
} msg_pool;
dma_addr_t host_dma;
DEFINE_DMA_UNMAP_ADDR(mapping);
u16 hint_count;
u16 priv;
struct c2_mq_shared __iomem *peer;
__be16 *shared;
dma_addr_t shared_dma;
u32 q_size;
u32 msg_size;
u32 index;
enum c2_mq_type type;
};
static __inline__ int c2_mq_empty(struct c2_mq *q)
{
return q->priv == be16_to_cpu(*q->shared);
}
static __inline__ int c2_mq_full(struct c2_mq *q)
{
return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
}
extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
extern void *c2_mq_alloc(struct c2_mq *q);
extern void c2_mq_produce(struct c2_mq *q);
extern void *c2_mq_consume(struct c2_mq *q);
extern void c2_mq_free(struct c2_mq *q);
extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
u8 *pool_start, u16 __iomem *peer, u32 type);
#endif /* _C2_MQ_H_ */

View File

@@ -1,90 +0,0 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include "c2.h"
#include "c2_provider.h"
int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
{
u32 obj;
int ret = 0;
spin_lock(&c2dev->pd_table.lock);
obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
c2dev->pd_table.last);
if (obj >= c2dev->pd_table.max)
obj = find_first_zero_bit(c2dev->pd_table.table,
c2dev->pd_table.max);
if (obj < c2dev->pd_table.max) {
pd->pd_id = obj;
__set_bit(obj, c2dev->pd_table.table);
c2dev->pd_table.last = obj+1;
if (c2dev->pd_table.last >= c2dev->pd_table.max)
c2dev->pd_table.last = 0;
} else
ret = -ENOMEM;
spin_unlock(&c2dev->pd_table.lock);
return ret;
}
void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
{
spin_lock(&c2dev->pd_table.lock);
__clear_bit(pd->pd_id, c2dev->pd_table.table);
spin_unlock(&c2dev->pd_table.lock);
}
int c2_init_pd_table(struct c2_dev *c2dev)
{
c2dev->pd_table.last = 0;
c2dev->pd_table.max = c2dev->props.max_pd;
spin_lock_init(&c2dev->pd_table.lock);
c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
sizeof(long), GFP_KERNEL);
if (!c2dev->pd_table.table)
return -ENOMEM;
bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
return 0;
}
void c2_cleanup_pd_table(struct c2_dev *c2dev)
{
kfree(c2dev->pd_table.table);
}

View File

@@ -1,912 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/inetdevice.h>
#include <linux/delay.h>
#include <linux/ethtool.h>
#include <linux/mii.h>
#include <linux/if_vlan.h>
#include <linux/crc32.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
#include <linux/if_arp.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/byteorder.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include "c2.h"
#include "c2_provider.h"
#include "c2_user.h"
static int c2_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw)
{
struct c2_dev *c2dev = to_c2dev(ibdev);
pr_debug("%s:%u\n", __func__, __LINE__);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
*props = c2dev->props;
return 0;
}
static int c2_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
pr_debug("%s:%u\n", __func__, __LINE__);
props->max_mtu = IB_MTU_4096;
props->lid = 0;
props->lmc = 0;
props->sm_lid = 0;
props->sm_sl = 0;
props->state = IB_PORT_ACTIVE;
props->phys_state = 0;
props->port_cap_flags =
IB_PORT_CM_SUP |
IB_PORT_REINIT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
props->qkey_viol_cntr = 0;
props->active_width = 1;
props->active_speed = IB_SPEED_SDR;
return 0;
}
static int c2_query_pkey(struct ib_device *ibdev,
u8 port, u16 index, u16 * pkey)
{
pr_debug("%s:%u\n", __func__, __LINE__);
*pkey = 0;
return 0;
}
static int c2_query_gid(struct ib_device *ibdev, u8 port,
int index, union ib_gid *gid)
{
struct c2_dev *c2dev = to_c2dev(ibdev);
pr_debug("%s:%u\n", __func__, __LINE__);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
return 0;
}
/* Allocate the user context data structure. This keeps track
* of all objects associated with a particular user-mode client.
*/
static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct c2_ucontext *context;
pr_debug("%s:%u\n", __func__, __LINE__);
context = kmalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
return &context->ibucontext;
}
static int c2_dealloc_ucontext(struct ib_ucontext *context)
{
pr_debug("%s:%u\n", __func__, __LINE__);
kfree(context);
return 0;
}
static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
{
pr_debug("%s:%u\n", __func__, __LINE__);
return -ENOSYS;
}
static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct c2_pd *pd;
int err;
pr_debug("%s:%u\n", __func__, __LINE__);
pd = kmalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
if (context) {
if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
c2_pd_free(to_c2dev(ibdev), pd);
kfree(pd);
return ERR_PTR(-EFAULT);
}
}
return &pd->ibpd;
}
static int c2_dealloc_pd(struct ib_pd *pd)
{
pr_debug("%s:%u\n", __func__, __LINE__);
c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
kfree(pd);
return 0;
}
static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
pr_debug("%s:%u\n", __func__, __LINE__);
return ERR_PTR(-ENOSYS);
}
static int c2_ah_destroy(struct ib_ah *ah)
{
pr_debug("%s:%u\n", __func__, __LINE__);
return -ENOSYS;
}
static void c2_add_ref(struct ib_qp *ibqp)
{
struct c2_qp *qp;
BUG_ON(!ibqp);
qp = to_c2qp(ibqp);
atomic_inc(&qp->refcount);
}
static void c2_rem_ref(struct ib_qp *ibqp)
{
struct c2_qp *qp;
BUG_ON(!ibqp);
qp = to_c2qp(ibqp);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
{
struct c2_dev* c2dev = to_c2dev(device);
struct c2_qp *qp;
qp = c2_find_qpn(c2dev, qpn);
pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
__func__, qp, qpn, device,
(qp?atomic_read(&qp->refcount):0));
return (qp?&qp->ibqp:NULL);
}
static struct ib_qp *c2_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct c2_qp *qp;
int err;
pr_debug("%s:%u\n", __func__, __LINE__);
if (init_attr->create_flags)
return ERR_PTR(-EINVAL);
switch (init_attr->qp_type) {
case IB_QPT_RC:
qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp) {
pr_debug("%s: Unable to allocate QP\n", __func__);
return ERR_PTR(-ENOMEM);
}
spin_lock_init(&qp->lock);
if (pd->uobject) {
/* userspace specific */
}
err = c2_alloc_qp(to_c2dev(pd->device),
to_c2pd(pd), init_attr, qp);
if (err && pd->uobject) {
/* userspace specific */
}
break;
default:
pr_debug("%s: Invalid QP type: %d\n", __func__,
init_attr->qp_type);
return ERR_PTR(-EINVAL);
}
if (err) {
kfree(qp);
return ERR_PTR(err);
}
return &qp->ibqp;
}
static int c2_destroy_qp(struct ib_qp *ib_qp)
{
struct c2_qp *qp = to_c2qp(ib_qp);
pr_debug("%s:%u qp=%p,qp->state=%d\n",
__func__, __LINE__, ib_qp, qp->state);
c2_free_qp(to_c2dev(ib_qp->device), qp);
kfree(qp);
return 0;
}
static struct ib_cq *c2_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
int entries = attr->cqe;
struct c2_cq *cq;
int err;
if (attr->flags)
return ERR_PTR(-EINVAL);
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq) {
pr_debug("%s: Unable to allocate CQ\n", __func__);
return ERR_PTR(-ENOMEM);
}
err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
if (err) {
pr_debug("%s: error initializing CQ\n", __func__);
kfree(cq);
return ERR_PTR(err);
}
return &cq->ibcq;
}
static int c2_destroy_cq(struct ib_cq *ib_cq)
{
struct c2_cq *cq = to_c2cq(ib_cq);
pr_debug("%s:%u\n", __func__, __LINE__);
c2_free_cq(to_c2dev(ib_cq->device), cq);
kfree(cq);
return 0;
}
static inline u32 c2_convert_access(int acc)
{
return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
}
static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 * iova_start)
{
struct c2_mr *mr;
u64 *page_list;
u32 total_len;
int err, i, j, k, page_shift, pbl_depth;
pbl_depth = 0;
total_len = 0;
page_shift = PAGE_SHIFT;
/*
* If there is only 1 buffer we assume this could
* be a map of all phy mem...use a 32k page_shift.
*/
if (num_phys_buf == 1)
page_shift += 3;
for (i = 0; i < num_phys_buf; i++) {
if (buffer_list[i].addr & ~PAGE_MASK) {
pr_debug("Unaligned Memory Buffer: 0x%x\n",
(unsigned int) buffer_list[i].addr);
return ERR_PTR(-EINVAL);
}
if (!buffer_list[i].size) {
pr_debug("Invalid Buffer Size\n");
return ERR_PTR(-EINVAL);
}
total_len += buffer_list[i].size;
pbl_depth += ALIGN(buffer_list[i].size,
(1 << page_shift)) >> page_shift;
}
page_list = vmalloc(sizeof(u64) * pbl_depth);
if (!page_list) {
pr_debug("couldn't vmalloc page_list of size %zd\n",
(sizeof(u64) * pbl_depth));
return ERR_PTR(-ENOMEM);
}
for (i = 0, j = 0; i < num_phys_buf; i++) {
int naddrs;
naddrs = ALIGN(buffer_list[i].size,
(1 << page_shift)) >> page_shift;
for (k = 0; k < naddrs; k++)
page_list[j++] = (buffer_list[i].addr +
(k << page_shift));
}
mr = kmalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
vfree(page_list);
return ERR_PTR(-ENOMEM);
}
mr->pd = to_c2pd(ib_pd);
mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__func__, page_shift, pbl_depth, total_len,
(unsigned long long) *iova_start,
(unsigned long long) page_list[0],
(unsigned long long) page_list[pbl_depth-1]);
err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
(1 << page_shift), pbl_depth,
total_len, 0, iova_start,
c2_convert_access(acc), mr);
vfree(page_list);
if (err) {
kfree(mr);
return ERR_PTR(err);
}
return &mr->ibmr;
}
static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
{
struct ib_phys_buf bl;
u64 kva = 0;
pr_debug("%s:%u\n", __func__, __LINE__);
/* AMSO1100 limit */
bl.size = 0xffffffff;
bl.addr = 0;
return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
}
static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
u64 *pages;
u64 kva = 0;
int shift, n, len;
int i, k, entry;
int err = 0;
struct scatterlist *sg;
struct c2_pd *c2pd = to_c2pd(pd);
struct c2_mr *c2mr;
pr_debug("%s:%u\n", __func__, __LINE__);
c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
if (!c2mr)
return ERR_PTR(-ENOMEM);
c2mr->pd = c2pd;
c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(c2mr->umem)) {
err = PTR_ERR(c2mr->umem);
kfree(c2mr);
return ERR_PTR(err);
}
shift = ffs(c2mr->umem->page_size) - 1;
n = c2mr->umem->nmap;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
if (!pages) {
err = -ENOMEM;
goto err;
}
i = 0;
for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] =
sg_dma_address(sg) +
(c2mr->umem->page_size * k);
}
}
kva = virt;
err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
pages,
c2mr->umem->page_size,
i,
length,
ib_umem_offset(c2mr->umem),
&kva,
c2_convert_access(acc),
c2mr);
kfree(pages);
if (err)
goto err;
return &c2mr->ibmr;
err:
ib_umem_release(c2mr->umem);
kfree(c2mr);
return ERR_PTR(err);
}
static int c2_dereg_mr(struct ib_mr *ib_mr)
{
struct c2_mr *mr = to_c2mr(ib_mr);
int err;
pr_debug("%s:%u\n", __func__, __LINE__);
err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
if (err)
pr_debug("c2_stag_dealloc failed: %d\n", err);
else {
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
}
return err;
}
static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
pr_debug("%s:%u\n", __func__, __LINE__);
return sprintf(buf, "%x\n", c2dev->props.hw_ver);
}
static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
pr_debug("%s:%u\n", __func__, __LINE__);
return sprintf(buf, "%x.%x.%x\n",
(int) (c2dev->props.fw_ver >> 32),
(int) (c2dev->props.fw_ver >> 16) & 0xffff,
(int) (c2dev->props.fw_ver & 0xffff));
}
static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
char *buf)
{
pr_debug("%s:%u\n", __func__, __LINE__);
return sprintf(buf, "AMSO1100\n");
}
static ssize_t show_board(struct device *dev, struct device_attribute *attr,
char *buf)
{
pr_debug("%s:%u\n", __func__, __LINE__);
return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *c2_dev_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_fw_ver,
&dev_attr_hca_type,
&dev_attr_board_id
};
static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
int err;
err =
c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
attr_mask);
return err;
}
static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
pr_debug("%s:%u\n", __func__, __LINE__);
return -ENOSYS;
}
static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
pr_debug("%s:%u\n", __func__, __LINE__);
return -ENOSYS;
}
static int c2_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct ib_mad_hdr *in_mad,
size_t in_mad_size,
struct ib_mad_hdr *out_mad,
size_t *out_mad_size,
u16 *out_mad_pkey_index)
{
pr_debug("%s:%u\n", __func__, __LINE__);
return -ENOSYS;
}
static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
{
pr_debug("%s:%u\n", __func__, __LINE__);
/* Request a connection */
return c2_llp_connect(cm_id, iw_param);
}
static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
{
pr_debug("%s:%u\n", __func__, __LINE__);
/* Accept the new connection */
return c2_llp_accept(cm_id, iw_param);
}
static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
int err;
pr_debug("%s:%u\n", __func__, __LINE__);
err = c2_llp_reject(cm_id, pdata, pdata_len);
return err;
}
static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
{
int err;
pr_debug("%s:%u\n", __func__, __LINE__);
err = c2_llp_service_create(cm_id, backlog);
pr_debug("%s:%u err=%d\n",
__func__, __LINE__,
err);
return err;
}
static int c2_service_destroy(struct iw_cm_id *cm_id)
{
int err;
pr_debug("%s:%u\n", __func__, __LINE__);
err = c2_llp_service_destroy(cm_id);
return err;
}
static int c2_pseudo_up(struct net_device *netdev)
{
struct in_device *ind;
struct c2_dev *c2dev = netdev->ml_priv;
ind = in_dev_get(netdev);
if (!ind)
return 0;
pr_debug("adding...\n");
for_ifa(ind) {
#ifdef DEBUG
u8 *ip = (u8 *) & ifa->ifa_address;
pr_debug("%s: %d.%d.%d.%d\n",
ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
#endif
c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
}
endfor_ifa(ind);
in_dev_put(ind);
return 0;
}
static int c2_pseudo_down(struct net_device *netdev)
{
struct in_device *ind;
struct c2_dev *c2dev = netdev->ml_priv;
ind = in_dev_get(netdev);
if (!ind)
return 0;
pr_debug("deleting...\n");
for_ifa(ind) {
#ifdef DEBUG
u8 *ip = (u8 *) & ifa->ifa_address;
pr_debug("%s: %d.%d.%d.%d\n",
ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
#endif
c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
}
endfor_ifa(ind);
in_dev_put(ind);
return 0;
}
static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
kfree_skb(skb);
return NETDEV_TX_OK;
}
static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
{
if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
return -EINVAL;
netdev->mtu = new_mtu;
/* TODO: Tell rnic about new rmda interface mtu */
return 0;
}
static const struct net_device_ops c2_pseudo_netdev_ops = {
.ndo_open = c2_pseudo_up,
.ndo_stop = c2_pseudo_down,
.ndo_start_xmit = c2_pseudo_xmit_frame,
.ndo_change_mtu = c2_pseudo_change_mtu,
.ndo_validate_addr = eth_validate_addr,
};
static void setup(struct net_device *netdev)
{
netdev->netdev_ops = &c2_pseudo_netdev_ops;
netdev->watchdog_timeo = 0;
netdev->type = ARPHRD_ETHER;
netdev->mtu = 1500;
netdev->hard_header_len = ETH_HLEN;
netdev->addr_len = ETH_ALEN;
netdev->tx_queue_len = 0;
netdev->flags |= IFF_NOARP;
}
static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
{
char name[IFNAMSIZ];
struct net_device *netdev;
/* change ethxxx to iwxxx */
strcpy(name, "iw");
strcat(name, &c2dev->netdev->name[3]);
netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, setup);
if (!netdev) {
printk(KERN_ERR PFX "%s - etherdev alloc failed",
__func__);
return NULL;
}
netdev->ml_priv = c2dev;
SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
/* Print out the MAC address */
pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
#if 0
/* Disable network packets */
netif_stop_queue(netdev);
#endif
return netdev;
}
static int c2_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
int err;
err = c2_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
int c2_register_device(struct c2_dev *dev)
{
int ret = -ENOMEM;
int i;
/* Register pseudo network device */
dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
if (!dev->pseudo_netdev)
goto out;
ret = register_netdev(dev->pseudo_netdev);
if (ret)
goto out_free_netdev;
pr_debug("%s:%u\n", __func__, __LINE__);
strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
dev->ibdev.owner = THIS_MODULE;
dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_POLL_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_POST_SEND) |
(1ull << IB_USER_VERBS_CMD_POST_RECV);
dev->ibdev.node_type = RDMA_NODE_RNIC;
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dma_device = &dev->pcidev->dev;
dev->ibdev.query_device = c2_query_device;
dev->ibdev.query_port = c2_query_port;
dev->ibdev.query_pkey = c2_query_pkey;
dev->ibdev.query_gid = c2_query_gid;
dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
dev->ibdev.mmap = c2_mmap_uar;
dev->ibdev.alloc_pd = c2_alloc_pd;
dev->ibdev.dealloc_pd = c2_dealloc_pd;
dev->ibdev.create_ah = c2_ah_create;
dev->ibdev.destroy_ah = c2_ah_destroy;
dev->ibdev.create_qp = c2_create_qp;
dev->ibdev.modify_qp = c2_modify_qp;
dev->ibdev.destroy_qp = c2_destroy_qp;
dev->ibdev.create_cq = c2_create_cq;
dev->ibdev.destroy_cq = c2_destroy_cq;
dev->ibdev.poll_cq = c2_poll_cq;
dev->ibdev.get_dma_mr = c2_get_dma_mr;
dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
dev->ibdev.reg_user_mr = c2_reg_user_mr;
dev->ibdev.dereg_mr = c2_dereg_mr;
dev->ibdev.get_port_immutable = c2_port_immutable;
dev->ibdev.alloc_fmr = NULL;
dev->ibdev.unmap_fmr = NULL;
dev->ibdev.dealloc_fmr = NULL;
dev->ibdev.map_phys_fmr = NULL;
dev->ibdev.attach_mcast = c2_multicast_attach;
dev->ibdev.detach_mcast = c2_multicast_detach;
dev->ibdev.process_mad = c2_process_mad;
dev->ibdev.req_notify_cq = c2_arm_cq;
dev->ibdev.post_send = c2_post_send;
dev->ibdev.post_recv = c2_post_receive;
dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
if (dev->ibdev.iwcm == NULL) {
ret = -ENOMEM;
goto out_unregister_netdev;
}
dev->ibdev.iwcm->add_ref = c2_add_ref;
dev->ibdev.iwcm->rem_ref = c2_rem_ref;
dev->ibdev.iwcm->get_qp = c2_get_qp;
dev->ibdev.iwcm->connect = c2_connect;
dev->ibdev.iwcm->accept = c2_accept;
dev->ibdev.iwcm->reject = c2_reject;
dev->ibdev.iwcm->create_listen = c2_service_create;
dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto out_free_iwcm;
for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
ret = device_create_file(&dev->ibdev.dev,
c2_dev_attributes[i]);
if (ret)
goto out_unregister_ibdev;
}
goto out;
out_unregister_ibdev:
ib_unregister_device(&dev->ibdev);
out_free_iwcm:
kfree(dev->ibdev.iwcm);
out_unregister_netdev:
unregister_netdev(dev->pseudo_netdev);
out_free_netdev:
free_netdev(dev->pseudo_netdev);
out:
pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
return ret;
}
void c2_unregister_device(struct c2_dev *dev)
{
pr_debug("%s:%u\n", __func__, __LINE__);
unregister_netdev(dev->pseudo_netdev);
free_netdev(dev->pseudo_netdev);
ib_unregister_device(&dev->ibdev);
}

View File

@@ -1,182 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef C2_PROVIDER_H
#define C2_PROVIDER_H
#include <linux/inetdevice.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include "c2_mq.h"
#include <rdma/iw_cm.h>
#define C2_MPT_FLAG_ATOMIC (1 << 14)
#define C2_MPT_FLAG_REMOTE_WRITE (1 << 13)
#define C2_MPT_FLAG_REMOTE_READ (1 << 12)
#define C2_MPT_FLAG_LOCAL_WRITE (1 << 11)
#define C2_MPT_FLAG_LOCAL_READ (1 << 10)
struct c2_buf_list {
void *buf;
DEFINE_DMA_UNMAP_ADDR(mapping);
};
/* The user context keeps track of objects allocated for a
* particular user-mode client. */
struct c2_ucontext {
struct ib_ucontext ibucontext;
};
struct c2_mtt;
/* All objects associated with a PD are kept in the
* associated user context if present.
*/
struct c2_pd {
struct ib_pd ibpd;
u32 pd_id;
};
struct c2_mr {
struct ib_mr ibmr;
struct c2_pd *pd;
struct ib_umem *umem;
};
struct c2_av;
enum c2_ah_type {
C2_AH_ON_HCA,
C2_AH_PCI_POOL,
C2_AH_KMALLOC
};
struct c2_ah {
struct ib_ah ibah;
};
struct c2_cq {
struct ib_cq ibcq;
spinlock_t lock;
atomic_t refcount;
int cqn;
int is_kernel;
wait_queue_head_t wait;
u32 adapter_handle;
struct c2_mq mq;
};
struct c2_wq {
spinlock_t lock;
};
struct iw_cm_id;
struct c2_qp {
struct ib_qp ibqp;
struct iw_cm_id *cm_id;
spinlock_t lock;
atomic_t refcount;
wait_queue_head_t wait;
int qpn;
u32 adapter_handle;
u32 send_sgl_depth;
u32 recv_sgl_depth;
u32 rdma_write_sgl_depth;
u8 state;
struct c2_mq sq_mq;
struct c2_mq rq_mq;
};
struct c2_cr_query_attrs {
u32 local_addr;
u32 remote_addr;
u16 local_port;
u16 remote_port;
};
static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct c2_pd, ibpd);
}
static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
{
return container_of(ibucontext, struct c2_ucontext, ibucontext);
}
static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct c2_mr, ibmr);
}
static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
{
return container_of(ibah, struct c2_ah, ibah);
}
static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct c2_cq, ibcq);
}
static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct c2_qp, ibqp);
}
static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
{
struct in_device *ind;
int ret = 0;
ind = in_dev_get(netdev);
if (!ind)
return 0;
for_ifa(ind) {
if (ifa->ifa_address == addr) {
ret = 1;
break;
}
}
endfor_ifa(ind);
in_dev_put(ind);
return ret;
}
#endif /* C2_PROVIDER_H */

File diff suppressed because it is too large Load Diff

View File

@@ -1,655 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/delay.h>
#include <linux/ethtool.h>
#include <linux/mii.h>
#include <linux/if_vlan.h>
#include <linux/crc32.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
#include <linux/mm.h>
#include <linux/inet.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/route.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/byteorder.h>
#include <rdma/ib_smi.h>
#include "c2.h"
#include "c2_vq.h"
/* Device capabilities */
#define C2_MIN_PAGESIZE 1024
#define C2_MAX_MRS 32768
#define C2_MAX_QPS 16000
#define C2_MAX_WQE_SZ 256
#define C2_MAX_QP_WR ((128*1024)/C2_MAX_WQE_SZ)
#define C2_MAX_SGES 4
#define C2_MAX_SGE_RD 1
#define C2_MAX_CQS 32768
#define C2_MAX_CQES 4096
#define C2_MAX_PDS 16384
/*
* Send the adapter INIT message to the amso1100
*/
static int c2_adapter_init(struct c2_dev *c2dev)
{
struct c2wr_init_req wr;
int err;
memset(&wr, 0, sizeof(wr));
c2_wr_set_id(&wr, CCWR_INIT);
wr.hdr.context = 0;
wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
/* Post the init message */
err = vq_send_wr(c2dev, (union c2wr *) & wr);
return err;
}
/*
* Send the adapter TERM message to the amso1100
*/
static void c2_adapter_term(struct c2_dev *c2dev)
{
struct c2wr_init_req wr;
memset(&wr, 0, sizeof(wr));
c2_wr_set_id(&wr, CCWR_TERM);
wr.hdr.context = 0;
/* Post the init message */
vq_send_wr(c2dev, (union c2wr *) & wr);
c2dev->init = 0;
return;
}
/*
* Query the adapter
*/
static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
{
struct c2_vq_req *vq_req;
struct c2wr_rnic_query_req wr;
struct c2wr_rnic_query_rep *reply;
int err;
vq_req = vq_req_alloc(c2dev);
if (!vq_req)
return -ENOMEM;
c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
wr.hdr.context = (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
vq_req_get(c2dev, vq_req);
err = vq_send_wr(c2dev, (union c2wr *) &wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail1;
}
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail1;
reply =
(struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
if (!reply)
err = -ENOMEM;
else
err = c2_errno(reply);
if (err)
goto bail2;
props->fw_ver =
((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) |
(be32_to_cpu(reply->fw_ver_patch) & 0xFFFF);
memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
props->max_mr_size = 0xFFFFFFFF;
props->page_size_cap = ~(C2_MIN_PAGESIZE-1);
props->vendor_id = be32_to_cpu(reply->vendor_id);
props->vendor_part_id = be32_to_cpu(reply->part_number);
props->hw_ver = be32_to_cpu(reply->hw_version);
props->max_qp = be32_to_cpu(reply->max_qps);
props->max_qp_wr = be32_to_cpu(reply->max_qp_depth);
props->device_cap_flags = c2dev->device_cap_flags;
props->max_sge = C2_MAX_SGES;
props->max_sge_rd = C2_MAX_SGE_RD;
props->max_cq = be32_to_cpu(reply->max_cqs);
props->max_cqe = be32_to_cpu(reply->max_cq_depth);
props->max_mr = be32_to_cpu(reply->max_mrs);
props->max_pd = be32_to_cpu(reply->max_pds);
props->max_qp_rd_atom = be32_to_cpu(reply->max_qp_ird);
props->max_ee_rd_atom = 0;
props->max_res_rd_atom = be32_to_cpu(reply->max_global_ird);
props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
props->max_ee_init_rd_atom = 0;
props->atomic_cap = IB_ATOMIC_NONE;
props->max_ee = 0;
props->max_rdd = 0;
props->max_mw = be32_to_cpu(reply->max_mws);
props->max_raw_ipv6_qp = 0;
props->max_raw_ethy_qp = 0;
props->max_mcast_grp = 0;
props->max_mcast_qp_attach = 0;
props->max_total_mcast_qp_attach = 0;
props->max_ah = 0;
props->max_fmr = 0;
props->max_map_per_fmr = 0;
props->max_srq = 0;
props->max_srq_wr = 0;
props->max_srq_sge = 0;
props->max_pkeys = 0;
props->local_ca_ack_delay = 0;
bail2:
vq_repbuf_free(c2dev, reply);
bail1:
vq_req_free(c2dev, vq_req);
return err;
}
/*
* Add an IP address to the RNIC interface
*/
int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
{
struct c2_vq_req *vq_req;
struct c2wr_rnic_setconfig_req *wr;
struct c2wr_rnic_setconfig_rep *reply;
struct c2_netaddr netaddr;
int err, len;
vq_req = vq_req_alloc(c2dev);
if (!vq_req)
return -ENOMEM;
len = sizeof(struct c2_netaddr);
wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
if (!wr) {
err = -ENOMEM;
goto bail0;
}
c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
wr->hdr.context = (unsigned long) vq_req;
wr->rnic_handle = c2dev->adapter_handle;
wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
netaddr.ip_addr = inaddr;
netaddr.netmask = inmask;
netaddr.mtu = 0;
memcpy(wr->data, &netaddr, len);
vq_req_get(c2dev, vq_req);
err = vq_send_wr(c2dev, (union c2wr *) wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail1;
}
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail1;
reply =
(struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
if (!reply) {
err = -ENOMEM;
goto bail1;
}
err = c2_errno(reply);
vq_repbuf_free(c2dev, reply);
bail1:
kfree(wr);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}
/*
* Delete an IP address from the RNIC interface
*/
int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
{
struct c2_vq_req *vq_req;
struct c2wr_rnic_setconfig_req *wr;
struct c2wr_rnic_setconfig_rep *reply;
struct c2_netaddr netaddr;
int err, len;
vq_req = vq_req_alloc(c2dev);
if (!vq_req)
return -ENOMEM;
len = sizeof(struct c2_netaddr);
wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
if (!wr) {
err = -ENOMEM;
goto bail0;
}
c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
wr->hdr.context = (unsigned long) vq_req;
wr->rnic_handle = c2dev->adapter_handle;
wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
netaddr.ip_addr = inaddr;
netaddr.netmask = inmask;
netaddr.mtu = 0;
memcpy(wr->data, &netaddr, len);
vq_req_get(c2dev, vq_req);
err = vq_send_wr(c2dev, (union c2wr *) wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail1;
}
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail1;
reply =
(struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
if (!reply) {
err = -ENOMEM;
goto bail1;
}
err = c2_errno(reply);
vq_repbuf_free(c2dev, reply);
bail1:
kfree(wr);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}
/*
* Open a single RNIC instance to use with all
* low level openib calls
*/
static int c2_rnic_open(struct c2_dev *c2dev)
{
struct c2_vq_req *vq_req;
union c2wr wr;
struct c2wr_rnic_open_rep *reply;
int err;
vq_req = vq_req_alloc(c2dev);
if (vq_req == NULL) {
return -ENOMEM;
}
memset(&wr, 0, sizeof(wr));
c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
wr.rnic_open.req.port_num = cpu_to_be16(0);
wr.rnic_open.req.user_context = (unsigned long) c2dev;
vq_req_get(c2dev, vq_req);
err = vq_send_wr(c2dev, &wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail0;
}
err = vq_wait_for_reply(c2dev, vq_req);
if (err) {
goto bail0;
}
reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
if (!reply) {
err = -ENOMEM;
goto bail0;
}
if ((err = c2_errno(reply)) != 0) {
goto bail1;
}
c2dev->adapter_handle = reply->rnic_handle;
bail1:
vq_repbuf_free(c2dev, reply);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}
/*
* Close the RNIC instance
*/
static int c2_rnic_close(struct c2_dev *c2dev)
{
struct c2_vq_req *vq_req;
union c2wr wr;
struct c2wr_rnic_close_rep *reply;
int err;
vq_req = vq_req_alloc(c2dev);
if (vq_req == NULL) {
return -ENOMEM;
}
memset(&wr, 0, sizeof(wr));
c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
vq_req_get(c2dev, vq_req);
err = vq_send_wr(c2dev, &wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail0;
}
err = vq_wait_for_reply(c2dev, vq_req);
if (err) {
goto bail0;
}
reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
if (!reply) {
err = -ENOMEM;
goto bail0;
}
if ((err = c2_errno(reply)) != 0) {
goto bail1;
}
c2dev->adapter_handle = 0;
bail1:
vq_repbuf_free(c2dev, reply);
bail0:
vq_req_free(c2dev, vq_req);
return err;
}
/*
* Called by c2_probe to initialize the RNIC. This principally
* involves initializing the various limits and resource pools that
* comprise the RNIC instance.
*/
int c2_rnic_init(struct c2_dev *c2dev)
{
int err;
u32 qsize, msgsize;
void *q1_pages;
void *q2_pages;
void __iomem *mmio_regs;
/* Device capabilities */
c2dev->device_cap_flags =
(IB_DEVICE_RESIZE_MAX_WR |
IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_WINDOW);
/* Allocate the qptr_array */
c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *));
if (!c2dev->qptr_array) {
return -ENOMEM;
}
/* Initialize the qptr_array */
c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
c2dev->qptr_array[2] = (void *) &c2dev->aeq;
/* Initialize data structures */
init_waitqueue_head(&c2dev->req_vq_wo);
spin_lock_init(&c2dev->vqlock);
spin_lock_init(&c2dev->lock);
/* Allocate MQ shared pointer pool for kernel clients. User
* mode client pools are hung off the user context
*/
err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
if (err) {
goto bail0;
}
/* Allocate shared pointers for Q0, Q1, and Q2 from
* the shared pointer pool.
*/
c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
&c2dev->hint_count_dma,
GFP_KERNEL);
c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
&c2dev->req_vq.shared_dma,
GFP_KERNEL);
c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
&c2dev->rep_vq.shared_dma,
GFP_KERNEL);
c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
&c2dev->aeq.shared_dma, GFP_KERNEL);
if (!c2dev->hint_count || !c2dev->req_vq.shared ||
!c2dev->rep_vq.shared || !c2dev->aeq.shared) {
err = -ENOMEM;
goto bail1;
}
mmio_regs = c2dev->kva;
/* Initialize the Verbs Request Queue */
c2_mq_req_init(&c2dev->req_vq, 0,
be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)),
be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
mmio_regs +
be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
mmio_regs +
be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)),
C2_MQ_ADAPTER_TARGET);
/* Initialize the Verbs Reply Queue */
qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE));
msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
&c2dev->rep_vq.host_dma, GFP_KERNEL);
if (!q1_pages) {
err = -ENOMEM;
goto bail1;
}
dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
(unsigned long long) c2dev->rep_vq.host_dma);
c2_mq_rep_init(&c2dev->rep_vq,
1,
qsize,
msgsize,
q1_pages,
mmio_regs +
be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)),
C2_MQ_HOST_TARGET);
/* Initialize the Asynchronus Event Queue */
qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE));
msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
&c2dev->aeq.host_dma, GFP_KERNEL);
if (!q2_pages) {
err = -ENOMEM;
goto bail2;
}
dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
(unsigned long long) c2dev->aeq.host_dma);
c2_mq_rep_init(&c2dev->aeq,
2,
qsize,
msgsize,
q2_pages,
mmio_regs +
be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)),
C2_MQ_HOST_TARGET);
/* Initialize the verbs request allocator */
err = vq_init(c2dev);
if (err)
goto bail3;
/* Enable interrupts on the adapter */
writel(0, c2dev->regs + C2_IDIS);
/* create the WR init message */
err = c2_adapter_init(c2dev);
if (err)
goto bail4;
c2dev->init++;
/* open an adapter instance */
err = c2_rnic_open(c2dev);
if (err)
goto bail4;
/* Initialize cached the adapter limits */
err = c2_rnic_query(c2dev, &c2dev->props);
if (err)
goto bail5;
/* Initialize the PD pool */
err = c2_init_pd_table(c2dev);
if (err)
goto bail5;
/* Initialize the QP pool */
c2_init_qp_table(c2dev);
return 0;
bail5:
c2_rnic_close(c2dev);
bail4:
vq_term(c2dev);
bail3:
dma_free_coherent(&c2dev->pcidev->dev,
c2dev->aeq.q_size * c2dev->aeq.msg_size,
q2_pages, dma_unmap_addr(&c2dev->aeq, mapping));
bail2:
dma_free_coherent(&c2dev->pcidev->dev,
c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping));
bail1:
c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
bail0:
vfree(c2dev->qptr_array);
return err;
}
/*
* Called by c2_remove to cleanup the RNIC resources.
*/
void c2_rnic_term(struct c2_dev *c2dev)
{
/* Close the open adapter instance */
c2_rnic_close(c2dev);
/* Send the TERM message to the adapter */
c2_adapter_term(c2dev);
/* Disable interrupts on the adapter */
writel(1, c2dev->regs + C2_IDIS);
/* Free the QP pool */
c2_cleanup_qp_table(c2dev);
/* Free the PD pool */
c2_cleanup_pd_table(c2dev);
/* Free the verbs request allocator */
vq_term(c2dev);
/* Free the asynchronus event queue */
dma_free_coherent(&c2dev->pcidev->dev,
c2dev->aeq.q_size * c2dev->aeq.msg_size,
c2dev->aeq.msg_pool.host,
dma_unmap_addr(&c2dev->aeq, mapping));
/* Free the verbs reply queue */
dma_free_coherent(&c2dev->pcidev->dev,
c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
c2dev->rep_vq.msg_pool.host,
dma_unmap_addr(&c2dev->rep_vq, mapping));
/* Free the MQ shared pointer pool */
c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
/* Free the qptr_array */
vfree(c2dev->qptr_array);
return;
}

View File

@@ -1,158 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _C2_STATUS_H_
#define _C2_STATUS_H_
/*
* Verbs Status Codes
*/
enum c2_status {
C2_OK = 0, /* This must be zero */
CCERR_INSUFFICIENT_RESOURCES = 1,
CCERR_INVALID_MODIFIER = 2,
CCERR_INVALID_MODE = 3,
CCERR_IN_USE = 4,
CCERR_INVALID_RNIC = 5,
CCERR_INTERRUPTED_OPERATION = 6,
CCERR_INVALID_EH = 7,
CCERR_INVALID_CQ = 8,
CCERR_CQ_EMPTY = 9,
CCERR_NOT_IMPLEMENTED = 10,
CCERR_CQ_DEPTH_TOO_SMALL = 11,
CCERR_PD_IN_USE = 12,
CCERR_INVALID_PD = 13,
CCERR_INVALID_SRQ = 14,
CCERR_INVALID_ADDRESS = 15,
CCERR_INVALID_NETMASK = 16,
CCERR_INVALID_QP = 17,
CCERR_INVALID_QP_STATE = 18,
CCERR_TOO_MANY_WRS_POSTED = 19,
CCERR_INVALID_WR_TYPE = 20,
CCERR_INVALID_SGL_LENGTH = 21,
CCERR_INVALID_SQ_DEPTH = 22,
CCERR_INVALID_RQ_DEPTH = 23,
CCERR_INVALID_ORD = 24,
CCERR_INVALID_IRD = 25,
CCERR_QP_ATTR_CANNOT_CHANGE = 26,
CCERR_INVALID_STAG = 27,
CCERR_QP_IN_USE = 28,
CCERR_OUTSTANDING_WRS = 29,
CCERR_STAG_IN_USE = 30,
CCERR_INVALID_STAG_INDEX = 31,
CCERR_INVALID_SGL_FORMAT = 32,
CCERR_ADAPTER_TIMEOUT = 33,
CCERR_INVALID_CQ_DEPTH = 34,
CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
CCERR_INVALID_EP = 36,
CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
CCERR_FLUSHED = 38,
CCERR_INVALID_WQE = 39,
CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
CCERR_REMOTE_TERMINATION_ERROR = 41,
CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
CCERR_ACCESS_VIOLATION = 43,
CCERR_INVALID_PD_ID = 44,
CCERR_WRAP_ERROR = 45,
CCERR_INV_STAG_ACCESS_ERROR = 46,
CCERR_ZERO_RDMA_READ_RESOURCES = 47,
CCERR_QP_NOT_PRIVILEGED = 48,
CCERR_STAG_STATE_NOT_INVALID = 49,
CCERR_INVALID_PAGE_SIZE = 50,
CCERR_INVALID_BUFFER_SIZE = 51,
CCERR_INVALID_PBE = 52,
CCERR_INVALID_FBO = 53,
CCERR_INVALID_LENGTH = 54,
CCERR_INVALID_ACCESS_RIGHTS = 55,
CCERR_PBL_TOO_BIG = 56,
CCERR_INVALID_VA = 57,
CCERR_INVALID_REGION = 58,
CCERR_INVALID_WINDOW = 59,
CCERR_TOTAL_LENGTH_TOO_BIG = 60,
CCERR_INVALID_QP_ID = 61,
CCERR_ADDR_IN_USE = 62,
CCERR_ADDR_NOT_AVAIL = 63,
CCERR_NET_DOWN = 64,
CCERR_NET_UNREACHABLE = 65,
CCERR_CONN_ABORTED = 66,
CCERR_CONN_RESET = 67,
CCERR_NO_BUFS = 68,
CCERR_CONN_TIMEDOUT = 69,
CCERR_CONN_REFUSED = 70,
CCERR_HOST_UNREACHABLE = 71,
CCERR_INVALID_SEND_SGL_DEPTH = 72,
CCERR_INVALID_RECV_SGL_DEPTH = 73,
CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
CCERR_INSUFFICIENT_PRIVILEGES = 75,
CCERR_STACK_ERROR = 76,
CCERR_INVALID_VERSION = 77,
CCERR_INVALID_MTU = 78,
CCERR_INVALID_IMAGE = 79,
CCERR_PENDING = 98, /* not an error; user internally by adapter */
CCERR_DEFER = 99, /* not an error; used internally by adapter */
CCERR_FAILED_WRITE = 100,
CCERR_FAILED_ERASE = 101,
CCERR_FAILED_VERIFICATION = 102,
CCERR_NOT_FOUND = 103,
};
/*
* CCAE_ACTIVE_CONNECT_RESULTS status result codes.
*/
enum c2_connect_status {
C2_CONN_STATUS_SUCCESS = C2_OK,
C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
};
/*
* Flash programming status codes.
*/
enum c2_flash_status {
C2_FLASH_STATUS_SUCCESS = 0x0000,
C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
C2_FLASH_STATUS_ECLBS = 0x0400,
C2_FLASH_STATUS_PSLBS = 0x0800,
C2_FLASH_STATUS_VPENS = 0x1000,
};
#endif /* _C2_STATUS_H_ */

View File

@@ -1,82 +0,0 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef C2_USER_H
#define C2_USER_H
#include <linux/types.h>
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
* In particular do not use pointer types -- pass pointers in __u64
* instead.
*/
struct c2_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 uarc_size;
};
struct c2_alloc_pd_resp {
__u32 pdn;
__u32 reserved;
};
struct c2_create_cq {
__u32 lkey;
__u32 pdn;
__u64 arm_db_page;
__u64 set_db_page;
__u32 arm_db_index;
__u32 set_db_index;
};
struct c2_create_cq_resp {
__u32 cqn;
__u32 reserved;
};
struct c2_create_qp {
__u32 lkey;
__u32 reserved;
__u64 sq_db_page;
__u64 rq_db_page;
__u32 sq_db_index;
__u32 rq_db_index;
};
#endif /* C2_USER_H */

View File

@@ -1,260 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include "c2_vq.h"
#include "c2_provider.h"
/*
* Verbs Request Objects:
*
* VQ Request Objects are allocated by the kernel verbs handlers.
* They contain a wait object, a refcnt, an atomic bool indicating that the
* adapter has replied, and a copy of the verb reply work request.
* A pointer to the VQ Request Object is passed down in the context
* field of the work request message, and reflected back by the adapter
* in the verbs reply message. The function handle_vq() in the interrupt
* path will use this pointer to:
* 1) append a copy of the verbs reply message
* 2) mark that the reply is ready
* 3) wake up the kernel verbs handler blocked awaiting the reply.
*
*
* The kernel verbs handlers do a "get" to put a 2nd reference on the
* VQ Request object. If the kernel verbs handler exits before the adapter
* can respond, this extra reference will keep the VQ Request object around
* until the adapter's reply can be processed. The reason we need this is
* because a pointer to this object is stuffed into the context field of
* the verbs work request message, and reflected back in the reply message.
* It is used in the interrupt handler (handle_vq()) to wake up the appropriate
* kernel verb handler that is blocked awaiting the verb reply.
* So handle_vq() will do a "put" on the object when it's done accessing it.
* NOTE: If we guarantee that the kernel verb handler will never bail before
* getting the reply, then we don't need these refcnts.
*
*
* VQ Request objects are freed by the kernel verbs handlers only
* after the verb has been processed, or when the adapter fails and
* does not reply.
*
*
* Verbs Reply Buffers:
*
* VQ Reply bufs are local host memory copies of a
* outstanding Verb Request reply
* message. The are always allocated by the kernel verbs handlers, and _may_ be
* freed by either the kernel verbs handler -or- the interrupt handler. The
* kernel verbs handler _must_ free the repbuf, then free the vq request object
* in that order.
*/
int vq_init(struct c2_dev *c2dev)
{
sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
(char) ('0' + c2dev->devnum));
c2dev->host_msg_cache =
kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
SLAB_HWCACHE_ALIGN, NULL);
if (c2dev->host_msg_cache == NULL) {
return -ENOMEM;
}
return 0;
}
void vq_term(struct c2_dev *c2dev)
{
kmem_cache_destroy(c2dev->host_msg_cache);
}
/* vq_req_alloc - allocate a VQ Request Object and initialize it.
* The refcnt is set to 1.
*/
struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
{
struct c2_vq_req *r;
r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
if (r) {
init_waitqueue_head(&r->wait_object);
r->reply_msg = 0;
r->event = 0;
r->cm_id = NULL;
r->qp = NULL;
atomic_set(&r->refcnt, 1);
atomic_set(&r->reply_ready, 0);
}
return r;
}
/* vq_req_free - free the VQ Request Object. It is assumed the verbs handler
* has already free the VQ Reply Buffer if it existed.
*/
void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
{
r->reply_msg = 0;
if (atomic_dec_and_test(&r->refcnt)) {
kfree(r);
}
}
/* vq_req_get - reference a VQ Request Object. Done
* only in the kernel verbs handlers.
*/
void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
{
atomic_inc(&r->refcnt);
}
/* vq_req_put - dereference and potentially free a VQ Request Object.
*
* This is only called by handle_vq() on the
* interrupt when it is done processing
* a verb reply message. If the associated
* kernel verbs handler has already bailed,
* then this put will actually free the VQ
* Request object _and_ the VQ Reply Buffer
* if it exists.
*/
void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
{
if (atomic_dec_and_test(&r->refcnt)) {
if (r->reply_msg != 0)
vq_repbuf_free(c2dev,
(void *) (unsigned long) r->reply_msg);
kfree(r);
}
}
/*
* vq_repbuf_alloc - allocate a VQ Reply Buffer.
*/
void *vq_repbuf_alloc(struct c2_dev *c2dev)
{
return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC);
}
/*
* vq_send_wr - post a verbs request message to the Verbs Request Queue.
* If a message is not available in the MQ, then block until one is available.
* NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
* When the adapter drains the Verbs Request Queue,
* it inserts MQ index 0 in to the
* adapter->host activity fifo and interrupts the host.
*/
int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
{
void *msg;
wait_queue_t __wait;
/*
* grab adapter vq lock
*/
spin_lock(&c2dev->vqlock);
/*
* allocate msg
*/
msg = c2_mq_alloc(&c2dev->req_vq);
/*
* If we cannot get a msg, then we'll wait
* When a messages are available, the int handler will wake_up()
* any waiters.
*/
while (msg == NULL) {
pr_debug("%s:%d no available msg in VQ, waiting...\n",
__func__, __LINE__);
init_waitqueue_entry(&__wait, current);
add_wait_queue(&c2dev->req_vq_wo, &__wait);
spin_unlock(&c2dev->vqlock);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (!c2_mq_full(&c2dev->req_vq)) {
break;
}
if (!signal_pending(current)) {
schedule_timeout(1 * HZ); /* 1 second... */
continue;
}
set_current_state(TASK_RUNNING);
remove_wait_queue(&c2dev->req_vq_wo, &__wait);
return -EINTR;
}
set_current_state(TASK_RUNNING);
remove_wait_queue(&c2dev->req_vq_wo, &__wait);
spin_lock(&c2dev->vqlock);
msg = c2_mq_alloc(&c2dev->req_vq);
}
/*
* copy wr into adapter msg
*/
memcpy(msg, wr, c2dev->req_vq.msg_size);
/*
* post msg
*/
c2_mq_produce(&c2dev->req_vq);
/*
* release adapter vq lock
*/
spin_unlock(&c2dev->vqlock);
return 0;
}
/*
* vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
*/
int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
{
if (!wait_event_timeout(req->wait_object,
atomic_read(&req->reply_ready),
60*HZ))
return -ETIMEDOUT;
return 0;
}
/*
* vq_repbuf_free - Free a Verbs Reply Buffer.
*/
void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
{
kmem_cache_free(c2dev->host_msg_cache, reply);
}

View File

@@ -1,63 +0,0 @@
/*
* Copyright (c) 2005 Ammasso, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _C2_VQ_H_
#define _C2_VQ_H_
#include <linux/sched.h>
#include "c2.h"
#include "c2_wr.h"
#include "c2_provider.h"
struct c2_vq_req {
u64 reply_msg; /* ptr to reply msg */
wait_queue_head_t wait_object; /* wait object for vq reqs */
atomic_t reply_ready; /* set when reply is ready */
atomic_t refcnt; /* used to cancel WRs... */
int event;
struct iw_cm_id *cm_id;
struct c2_qp *qp;
};
extern int vq_init(struct c2_dev *c2dev);
extern void vq_term(struct c2_dev *c2dev);
extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
#endif /* _C2_VQ_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -800,7 +800,9 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
return 0;
}
static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct iwch_dev *rhp;
struct iwch_pd *php;
@@ -809,6 +811,10 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
u32 stag = 0;
int ret = 0;
if (mr_type != IB_MR_TYPE_MEM_REG ||
max_num_sg > T3_MAX_FASTREG_DEPTH)
return ERR_PTR(-EINVAL);
php = to_iwch_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
@@ -816,10 +822,10 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
goto err;
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, pbl_depth);
ret = iwch_alloc_pbl(mhp, max_num_sg);
if (ret)
goto err1;
mhp->attr.pbl_size = pbl_depth;
mhp->attr.pbl_size = max_num_sg;
ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
mhp->attr.pbl_size, mhp->attr.pbl_addr);
if (ret)
@@ -1443,7 +1449,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.alloc_mw = iwch_alloc_mw;
dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr;
dev->ibdev.alloc_mr = iwch_alloc_mr;
dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
dev->ibdev.attach_mcast = iwch_multicast_attach;

View File

@@ -50,6 +50,7 @@
#include <rdma/ib_addr.h>
#include "iw_cxgb4.h"
#include "clip_tbl.h"
static char *states[] = {
"idle",
@@ -115,11 +116,11 @@ module_param(ep_timeout_secs, int, 0644);
MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
"in seconds (default=60)");
static int mpa_rev = 1;
static int mpa_rev = 2;
module_param(mpa_rev, int, 0644);
MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
"1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
" compliant (default=1)");
" compliant (default=2)");
static int markers_enabled;
module_param(markers_enabled, int, 0644);
@@ -298,6 +299,16 @@ void _c4iw_free_ep(struct kref *kref)
if (test_bit(QP_REFERENCED, &ep->com.flags))
deref_qp(ep);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
if (ep->com.remote_addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)
&ep->com.mapped_local_addr;
cxgb4_clip_release(
ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr,
1);
}
remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
dst_release(ep->dst);
@@ -442,6 +453,12 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
kfree_skb(skb);
connect_reply_upcall(ep, -EHOSTUNREACH);
state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)&ep->com.mapped_local_addr;
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
dst_release(ep->dst);
@@ -640,6 +657,7 @@ static int send_connect(struct c4iw_ep *ep)
struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
&ep->com.mapped_remote_addr;
int win;
int ret;
wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
roundup(sizev4, 16) :
@@ -693,6 +711,11 @@ static int send_connect(struct c4iw_ep *ep)
opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
opt2 |= T5_ISS_F;
}
if (ep->com.remote_addr.ss_family == AF_INET6)
cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&la6->sin6_addr.s6_addr, 1);
t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
@@ -790,7 +813,11 @@ static int send_connect(struct c4iw_ep *ep)
}
set_bit(ACT_OPEN_REQ, &ep->com.history);
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
if (ret && ep->com.remote_addr.ss_family == AF_INET6)
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&la6->sin6_addr.s6_addr, 1);
return ret;
}
static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
@@ -2091,6 +2118,15 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
case CPL_ERR_CONN_EXIST:
if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
set_bit(ACT_RETRY_INUSE, &ep->com.history);
if (ep->com.remote_addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)
&ep->com.mapped_local_addr;
cxgb4_clip_release(
ep->com.dev->rdev.lldi.ports[0],
(const u32 *)
&sin6->sin6_addr.s6_addr, 1);
}
remove_handle(ep->com.dev, &ep->com.dev->atid_idr,
atid);
cxgb4_free_atid(t, atid);
@@ -2118,6 +2154,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
connect_reply_upcall(ep, status2errno(status));
state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)&ep->com.mapped_local_addr;
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
if (status && act_open_has_tid(status))
cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
@@ -2302,6 +2344,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
struct dst_entry *dst;
__u8 local_ip[16], peer_ip[16];
__be16 local_port, peer_port;
struct sockaddr_in6 *sin6;
int err;
u16 peer_mss = ntohs(req->tcpopt.mss);
int iptype;
@@ -2400,9 +2443,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
sin->sin_port = peer_port;
sin->sin_addr.s_addr = *(__be32 *)peer_ip;
} else {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
&child_ep->com.mapped_local_addr;
sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr;
sin6->sin6_family = PF_INET6;
sin6->sin6_port = local_port;
memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
@@ -2436,6 +2477,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid);
accept_cr(child_ep, skb, req);
set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
if (iptype == 6) {
sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr;
cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
goto out;
reject:
reject_cr(dev, hwtid, skb);
@@ -2672,6 +2718,15 @@ out:
if (release)
release_ep_resources(ep);
else if (ep->retry_with_mpa_v1) {
if (ep->com.remote_addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)
&ep->com.mapped_local_addr;
cxgb4_clip_release(
ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr,
1);
}
remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
dst_release(ep->dst);
@@ -2976,7 +3031,7 @@ static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr;
struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
if (get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
memcpy(la6->sin6_addr.s6_addr, &addr, 16);
memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
return 0;
@@ -3186,6 +3241,9 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
err, ep->stid,
sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
else
cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
return err;
}
@@ -3334,6 +3392,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
ep->com.dev->rdev.lldi.ports[0], ep->stid,
ep->com.dev->rdev.lldi.rxq_ids[0], 0);
} else {
struct sockaddr_in6 *sin6;
c4iw_init_wr_wait(&ep->com.wr_wait);
err = cxgb4_remove_server(
ep->com.dev->rdev.lldi.ports[0], ep->stid,
@@ -3342,6 +3401,9 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
goto done;
err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
0, 0, __func__);
sin6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
@@ -3461,6 +3523,12 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
mutex_unlock(&dev->rdev.stats.lock);
connect_reply_upcall(ep, status2errno(req->retval));
state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)&ep->com.mapped_local_addr;
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
remove_handle(dev, &dev->atid_idr, atid);
cxgb4_free_atid(dev->rdev.lldi.tids, atid);
dst_release(ep->dst);

View File

@@ -970,7 +970,9 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list);
struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
struct ib_device *device,
int page_list_len);
struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
int c4iw_dealloc_mw(struct ib_mw *mw);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,

View File

@@ -853,7 +853,9 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
return 0;
}
struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
@@ -862,6 +864,10 @@ struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
u32 stag = 0;
int ret = 0;
if (mr_type != IB_MR_TYPE_MEM_REG ||
max_num_sg > t4_max_fr_depth(use_dsgl))
return ERR_PTR(-EINVAL);
php = to_c4iw_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
@@ -871,10 +877,10 @@ struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
}
mhp->rhp = rhp;
ret = alloc_pbl(mhp, pbl_depth);
ret = alloc_pbl(mhp, max_num_sg);
if (ret)
goto err1;
mhp->attr.pbl_size = pbl_depth;
mhp->attr.pbl_size = max_num_sg;
ret = allocate_stag(&rhp->rdev, &stag, php->pdid,
mhp->attr.pbl_size, mhp->attr.pbl_addr);
if (ret)

View File

@@ -556,7 +556,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.alloc_mw = c4iw_alloc_mw;
dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
dev->ibdev.alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr;
dev->ibdev.alloc_mr = c4iw_alloc_mr;
dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
dev->ibdev.attach_mcast = c4iw_multicast_attach;

View File

@@ -1,14 +0,0 @@
config INFINIBAND_IPATH
tristate "QLogic HTX HCA support"
depends on 64BIT && NET && HT_IRQ
---help---
This is a driver for the obsolete QLogic Hyper-Transport
IB host channel adapter (model QHT7140),
including InfiniBand verbs support. This driver allows these
devices to be used with both kernel upper level protocols such
as IP-over-InfiniBand as well as with userspace applications
(in conjunction with InfiniBand userspace access).
For QLogic PCIe QLE based cards, use the QIB driver instead.
If you have this hardware you will need to boot with PAT disabled
on your x86-64 systems, use the nopat kernel parameter.

View File

@@ -1,37 +0,0 @@
ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-DIPATH_KERN_TYPE=0
obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
ib_ipath-y := \
ipath_cq.o \
ipath_diag.o \
ipath_dma.o \
ipath_driver.o \
ipath_eeprom.o \
ipath_file_ops.o \
ipath_fs.o \
ipath_init_chip.o \
ipath_intr.o \
ipath_keys.o \
ipath_mad.o \
ipath_mmap.o \
ipath_mr.o \
ipath_qp.o \
ipath_rc.o \
ipath_ruc.o \
ipath_sdma.o \
ipath_srq.o \
ipath_stats.o \
ipath_sysfs.o \
ipath_uc.o \
ipath_ud.o \
ipath_user_pages.o \
ipath_user_sdma.o \
ipath_verbs_mcast.o \
ipath_verbs.o
ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o

View File

@@ -1,851 +0,0 @@
/*
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IPATH_COMMON_H
#define _IPATH_COMMON_H
/*
* This file contains defines, structures, etc. that are used
* to communicate between kernel and user code.
*/
/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
#define IPATH_SRC_OUI_1 0x00
#define IPATH_SRC_OUI_2 0x11
#define IPATH_SRC_OUI_3 0x75
/* version of protocol header (known to chip also). In the long run,
* we should be able to generate and accept a range of version numbers;
* for now we only accept one, and it's compiled in.
*/
#define IPS_PROTO_VERSION 2
/*
* These are compile time constants that you may want to enable or disable
* if you are trying to debug problems with code or performance.
* IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
* fastpath code
* IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
* traced in faspath code
* _IPATH_TRACING define as 0 if you want to remove all tracing in a
* compilation unit
* _IPATH_DEBUGGING define as 0 if you want to remove debug prints
*/
/*
* The value in the BTH QP field that InfiniPath uses to differentiate
* an infinipath protocol IB packet vs standard IB transport
*/
#define IPATH_KD_QP 0x656b79
/*
* valid states passed to ipath_set_linkstate() user call
*/
#define IPATH_IB_LINKDOWN 0
#define IPATH_IB_LINKARM 1
#define IPATH_IB_LINKACTIVE 2
#define IPATH_IB_LINKDOWN_ONLY 3
#define IPATH_IB_LINKDOWN_SLEEP 4
#define IPATH_IB_LINKDOWN_DISABLE 5
#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */
#define IPATH_IB_LINK_HRTBT 9 /* enable heartbeat, normal, non-loopback */
/*
* These 3 values (SDR and DDR may be ORed for auto-speed
* negotiation) are used for the 3rd argument to path_f_set_ib_cfg
* with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
* are also the the possible values for ipath_link_speed_enabled and active
* The values were chosen to match values used within the IB spec.
*/
#define IPATH_IB_SDR 1
#define IPATH_IB_DDR 2
/*
* stats maintained by the driver. For now, at least, this is global
* to all minor devices.
*/
struct infinipath_stats {
/* number of interrupts taken */
__u64 sps_ints;
/* number of interrupts for errors */
__u64 sps_errints;
/* number of errors from chip (not incl. packet errors or CRC) */
__u64 sps_errs;
/* number of packet errors from chip other than CRC */
__u64 sps_pkterrs;
/* number of packets with CRC errors (ICRC and VCRC) */
__u64 sps_crcerrs;
/* number of hardware errors reported (parity, etc.) */
__u64 sps_hwerrs;
/* number of times IB link changed state unexpectedly */
__u64 sps_iblink;
__u64 sps_unused; /* was fastrcvint, no longer implemented */
/* number of kernel (port0) packets received */
__u64 sps_port0pkts;
/* number of "ethernet" packets sent by driver */
__u64 sps_ether_spkts;
/* number of "ethernet" packets received by driver */
__u64 sps_ether_rpkts;
/* number of SMA packets sent by driver. Obsolete. */
__u64 sps_sma_spkts;
/* number of SMA packets received by driver. Obsolete. */
__u64 sps_sma_rpkts;
/* number of times all ports rcvhdrq was full and packet dropped */
__u64 sps_hdrqfull;
/* number of times all ports egrtid was full and packet dropped */
__u64 sps_etidfull;
/*
* number of times we tried to send from driver, but no pio buffers
* avail
*/
__u64 sps_nopiobufs;
/* number of ports currently open */
__u64 sps_ports;
/* list of pkeys (other than default) accepted (0 means not set) */
__u16 sps_pkeys[4];
__u16 sps_unused16[4]; /* available; maintaining compatible layout */
/* number of user ports per chip (not IB ports) */
__u32 sps_nports;
/* not our interrupt, or already handled */
__u32 sps_nullintr;
/* max number of packets handled per receive call */
__u32 sps_maxpkts_call;
/* avg number of packets handled per receive call */
__u32 sps_avgpkts_call;
/* total number of pages locked */
__u64 sps_pagelocks;
/* total number of pages unlocked */
__u64 sps_pageunlocks;
/*
* Number of packets dropped in kernel other than errors (ether
* packets if ipath not configured, etc.)
*/
__u64 sps_krdrops;
__u64 sps_txeparity; /* PIO buffer parity error, recovered */
/* pad for future growth */
__u64 __sps_pad[45];
};
/*
* These are the status bits readable (in ascii form, 64bit value)
* from the "status" sysfs file.
*/
#define IPATH_STATUS_INITTED 0x1 /* basic initialization done */
#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */
/* Device has been disabled via admin request */
#define IPATH_STATUS_ADMIN_DISABLED 0x4
/* Chip has been found and initted */
#define IPATH_STATUS_CHIP_PRESENT 0x20
/* IB link is at ACTIVE, usable for data traffic */
#define IPATH_STATUS_IB_READY 0x40
/* link is configured, LID, MTU, etc. have been set */
#define IPATH_STATUS_IB_CONF 0x80
/* no link established, probably no cable */
#define IPATH_STATUS_IB_NOCABLE 0x100
/* A Fatal hardware error has occurred. */
#define IPATH_STATUS_HWERROR 0x200
/*
* The list of usermode accessible registers. Also see Reg_* later in file.
*/
typedef enum _ipath_ureg {
/* (RO) DMA RcvHdr to be used next. */
ur_rcvhdrtail = 0,
/* (RW) RcvHdr entry to be processed next by host. */
ur_rcvhdrhead = 1,
/* (RO) Index of next Eager index to use. */
ur_rcvegrindextail = 2,
/* (RW) Eager TID to be processed next */
ur_rcvegrindexhead = 3,
/* For internal use only; max register number. */
_IPATH_UregMax
} ipath_ureg;
/* bit values for spi_runtime_flags */
#define IPATH_RUNTIME_HT 0x1
#define IPATH_RUNTIME_PCIE 0x2
#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
#define IPATH_RUNTIME_MASTER 0x10
#define IPATH_RUNTIME_NODMA_RTAIL 0x80
#define IPATH_RUNTIME_SDMA 0x200
#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
/*
* This structure is returned by ipath_userinit() immediately after
* open to get implementation-specific info, and info specific to this
* instance.
*
* This struct must have explict pad fields where type sizes
* may result in different alignments between 32 and 64 bit
* programs, since the 64 bit * bit kernel requires the user code
* to have matching offsets
*/
struct ipath_base_info {
/* version of hardware, for feature checking. */
__u32 spi_hw_version;
/* version of software, for feature checking. */
__u32 spi_sw_version;
/* InfiniPath port assigned, goes into sent packets */
__u16 spi_port;
__u16 spi_subport;
/*
* IB MTU, packets IB data must be less than this.
* The MTU is in bytes, and will be a multiple of 4 bytes.
*/
__u32 spi_mtu;
/*
* Size of a PIO buffer. Any given packet's total size must be less
* than this (in words). Included is the starting control word, so
* if 513 is returned, then total pkt size is 512 words or less.
*/
__u32 spi_piosize;
/* size of the TID cache in infinipath, in entries */
__u32 spi_tidcnt;
/* size of the TID Eager list in infinipath, in entries */
__u32 spi_tidegrcnt;
/* size of a single receive header queue entry in words. */
__u32 spi_rcvhdrent_size;
/*
* Count of receive header queue entries allocated.
* This may be less than the spu_rcvhdrcnt passed in!.
*/
__u32 spi_rcvhdr_cnt;
/* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
__u32 spi_runtime_flags;
/* address where receive buffer queue is mapped into */
__u64 spi_rcvhdr_base;
/* user program. */
/* base address of eager TID receive buffers. */
__u64 spi_rcv_egrbufs;
/* Allocated by initialization code, not by protocol. */
/*
* Size of each TID buffer in host memory, starting at
* spi_rcv_egrbufs. The buffers are virtually contiguous.
*/
__u32 spi_rcv_egrbufsize;
/*
* The special QP (queue pair) value that identifies an infinipath
* protocol packet from standard IB packets. More, probably much
* more, to be added.
*/
__u32 spi_qpair;
/*
* User register base for init code, not to be used directly by
* protocol or applications.
*/
__u64 __spi_uregbase;
/*
* Maximum buffer size in bytes that can be used in a single TID
* entry (assuming the buffer is aligned to this boundary). This is
* the minimum of what the hardware and software support Guaranteed
* to be a power of 2.
*/
__u32 spi_tid_maxsize;
/*
* alignment of each pio send buffer (byte count
* to add to spi_piobufbase to get to second buffer)
*/
__u32 spi_pioalign;
/*
* The index of the first pio buffer available to this process;
* needed to do lookup in spi_pioavailaddr; not added to
* spi_piobufbase.
*/
__u32 spi_pioindex;
/* number of buffers mapped for this process */
__u32 spi_piocnt;
/*
* Base address of writeonly pio buffers for this process.
* Each buffer has spi_piosize words, and is aligned on spi_pioalign
* boundaries. spi_piocnt buffers are mapped from this address
*/
__u64 spi_piobufbase;
/*
* Base address of readonly memory copy of the pioavail registers.
* There are 2 bits for each buffer.
*/
__u64 spi_pioavailaddr;
/*
* Address where driver updates a copy of the interface and driver
* status (IPATH_STATUS_*) as a 64 bit value. It's followed by a
* string indicating hardware error, if there was one.
*/
__u64 spi_status;
/* number of chip ports available to user processes */
__u32 spi_nports;
/* unit number of chip we are using */
__u32 spi_unit;
/* num bufs in each contiguous set */
__u32 spi_rcv_egrperchunk;
/* size in bytes of each contiguous set */
__u32 spi_rcv_egrchunksize;
/* total size of mmap to cover full rcvegrbuffers */
__u32 spi_rcv_egrbuftotlen;
__u32 spi_filler_for_align;
/* address of readonly memory copy of the rcvhdrq tail register. */
__u64 spi_rcvhdr_tailaddr;
/* shared memory pages for subports if port is shared */
__u64 spi_subport_uregbase;
__u64 spi_subport_rcvegrbuf;
__u64 spi_subport_rcvhdr_base;
/* shared memory page for hardware port if it is shared */
__u64 spi_port_uregbase;
__u64 spi_port_rcvegrbuf;
__u64 spi_port_rcvhdr_base;
__u64 spi_port_rcvhdr_tailaddr;
} __attribute__ ((aligned(8)));
/*
* This version number is given to the driver by the user code during
* initialization in the spu_userversion field of ipath_user_info, so
* the driver can check for compatibility with user code.
*
* The major version changes when data structures
* change in an incompatible way. The driver must be the same or higher
* for initialization to succeed. In some cases, a higher version
* driver will not interoperate with older software, and initialization
* will return an error.
*/
#define IPATH_USER_SWMAJOR 1
/*
* Minor version differences are always compatible
* a within a major version, however if user software is larger
* than driver software, some new features and/or structure fields
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
#define IPATH_USER_SWMINOR 6
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
#define IPATH_KERN_TYPE 0
/*
* Similarly, this is the kernel version going back to the user. It's
* slightly different, in that we want to tell if the driver was built as
* part of a QLogic release, or from the driver from openfabrics.org,
* kernel.org, or a standard distribution, for support reasons.
* The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
*
* It's returned by the driver to the user code during initialization in the
* spi_sw_version field of ipath_base_info, so the user code can in turn
* check for compatibility with the kernel.
*/
#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
/*
* This structure is passed to ipath_userinit() to tell the driver where
* user code buffers are, sizes, etc. The offsets and sizes of the
* fields must remain unchanged, for binary compatibility. It can
* be extended, if userversion is changed so user code can tell, if needed
*/
struct ipath_user_info {
/*
* version of user software, to detect compatibility issues.
* Should be set to IPATH_USER_SWVERSION.
*/
__u32 spu_userversion;
/* desired number of receive header queue entries */
__u32 spu_rcvhdrcnt;
/* size of struct base_info to write to */
__u32 spu_base_info_size;
/*
* number of words in KD protocol header
* This tells InfiniPath how many words to copy to rcvhdrq. If 0,
* kernel uses a default. Once set, attempts to set any other value
* are an error (EAGAIN) until driver is reloaded.
*/
__u32 spu_rcvhdrsize;
/*
* If two or more processes wish to share a port, each process
* must set the spu_subport_cnt and spu_subport_id to the same
* values. The only restriction on the spu_subport_id is that
* it be unique for a given node.
*/
__u16 spu_subport_cnt;
__u16 spu_subport_id;
__u32 spu_unused; /* kept for compatible layout */
/*
* address of struct base_info to write to
*/
__u64 spu_base_info;
} __attribute__ ((aligned(8)));
/* User commands. */
#define IPATH_CMD_MIN 16
#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */
#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
#define IPATH_CMD_USER_INIT 24 /* set up userspace */
#define IPATH_CMD_UNUSED_1 25
#define IPATH_CMD_UNUSED_2 26
#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */
#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */
/* 30 is unused */
#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */
#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */
/*
* Poll types
*/
#define IPATH_POLL_TYPE_URGENT 0x01
#define IPATH_POLL_TYPE_OVERFLOW 0x02
struct ipath_port_info {
__u32 num_active; /* number of active units */
__u32 unit; /* unit (chip) assigned to caller */
__u16 port; /* port on unit assigned to caller */
__u16 subport; /* subport on unit assigned to caller */
__u16 num_ports; /* number of ports available on unit */
__u16 num_subports; /* number of subports opened on port */
};
struct ipath_tid_info {
__u32 tidcnt;
/* make structure same size in 32 and 64 bit */
__u32 tid__unused;
/* virtual address of first page in transfer */
__u64 tidvaddr;
/* pointer (same size 32/64 bit) to __u16 tid array */
__u64 tidlist;
/*
* pointer (same size 32/64 bit) to bitmap of TIDs used
* for this call; checked for being large enough at open
*/
__u64 tidmap;
};
struct ipath_cmd {
__u32 type; /* command type */
union {
struct ipath_tid_info tid_info;
struct ipath_user_info user_info;
/*
* address in userspace where we should put the sdma
* inflight counter
*/
__u64 sdma_inflight;
/*
* address in userspace where we should put the sdma
* completion counter
*/
__u64 sdma_complete;
/* address in userspace of struct ipath_port_info to
write result to */
__u64 port_info;
/* enable/disable receipt of packets */
__u32 recv_ctrl;
/* enable/disable armlaunch errors (non-zero to enable) */
__u32 armlaunch_ctrl;
/* partition key to set */
__u16 part_key;
/* user address of __u32 bitmask of active slaves */
__u64 slave_mask_addr;
/* type of polling we want */
__u16 poll_type;
} cmd;
};
struct ipath_iovec {
/* Pointer to data, but same size 32 and 64 bit */
__u64 iov_base;
/*
* Length of data; don't need 64 bits, but want
* ipath_sendpkt to remain same size as before 32 bit changes, so...
*/
__u64 iov_len;
};
/*
* Describes a single packet for send. Each packet can have one or more
* buffers, but the total length (exclusive of IB headers) must be less
* than the MTU, and if using the PIO method, entire packet length,
* including IB headers, must be less than the ipath_piosize value (words).
* Use of this necessitates including sys/uio.h
*/
struct __ipath_sendpkt {
__u32 sps_flags; /* flags for packet (TBD) */
__u32 sps_cnt; /* number of entries to use in sps_iov */
/* array of iov's describing packet. TEMPORARY */
struct ipath_iovec sps_iov[4];
};
/*
* diagnostics can send a packet by "writing" one of the following
* two structs to diag data special file
* The first is the legacy version for backward compatibility
*/
struct ipath_diag_pkt {
__u32 unit;
__u64 data;
__u32 len;
};
/* The second diag_pkt struct is the expanded version that allows
* more control over the packet, specifically, by allowing a custom
* pbc (+ static rate) qword, so that special modes and deliberate
* changes to CRCs can be used. The elements were also re-ordered
* for better alignment and to avoid padding issues.
*/
struct ipath_diag_xpkt {
__u64 data;
__u64 pbc_wd;
__u32 unit;
__u32 len;
};
/*
* Data layout in I2C flash (for GUID, etc.)
* All fields are little-endian binary unless otherwise stated
*/
#define IPATH_FLASH_VERSION 2
struct ipath_flash {
/* flash layout version (IPATH_FLASH_VERSION) */
__u8 if_fversion;
/* checksum protecting if_length bytes */
__u8 if_csum;
/*
* valid length (in use, protected by if_csum), including
* if_fversion and if_csum themselves)
*/
__u8 if_length;
/* the GUID, in network order */
__u8 if_guid[8];
/* number of GUIDs to use, starting from if_guid */
__u8 if_numguid;
/* the (last 10 characters of) board serial number, in ASCII */
char if_serial[12];
/* board mfg date (YYYYMMDD ASCII) */
char if_mfgdate[8];
/* last board rework/test date (YYYYMMDD ASCII) */
char if_testdate[8];
/* logging of error counts, TBD */
__u8 if_errcntp[4];
/* powered on hours, updated at driver unload */
__u8 if_powerhour[2];
/* ASCII free-form comment field */
char if_comment[32];
/* Backwards compatible prefix for longer QLogic Serial Numbers */
char if_sprefix[4];
/* 82 bytes used, min flash size is 128 bytes */
__u8 if_future[46];
};
/*
* These are the counters implemented in the chip, and are listed in order.
* The InterCaps naming is taken straight from the chip spec.
*/
struct infinipath_counters {
__u64 LBIntCnt;
__u64 LBFlowStallCnt;
__u64 TxSDmaDescCnt; /* was Reserved1 */
__u64 TxUnsupVLErrCnt;
__u64 TxDataPktCnt;
__u64 TxFlowPktCnt;
__u64 TxDwordCnt;
__u64 TxLenErrCnt;
__u64 TxMaxMinLenErrCnt;
__u64 TxUnderrunCnt;
__u64 TxFlowStallCnt;
__u64 TxDroppedPktCnt;
__u64 RxDroppedPktCnt;
__u64 RxDataPktCnt;
__u64 RxFlowPktCnt;
__u64 RxDwordCnt;
__u64 RxLenErrCnt;
__u64 RxMaxMinLenErrCnt;
__u64 RxICRCErrCnt;
__u64 RxVCRCErrCnt;
__u64 RxFlowCtrlErrCnt;
__u64 RxBadFormatCnt;
__u64 RxLinkProblemCnt;
__u64 RxEBPCnt;
__u64 RxLPCRCErrCnt;
__u64 RxBufOvflCnt;
__u64 RxTIDFullErrCnt;
__u64 RxTIDValidErrCnt;
__u64 RxPKeyMismatchCnt;
__u64 RxP0HdrEgrOvflCnt;
__u64 RxP1HdrEgrOvflCnt;
__u64 RxP2HdrEgrOvflCnt;
__u64 RxP3HdrEgrOvflCnt;
__u64 RxP4HdrEgrOvflCnt;
__u64 RxP5HdrEgrOvflCnt;
__u64 RxP6HdrEgrOvflCnt;
__u64 RxP7HdrEgrOvflCnt;
__u64 RxP8HdrEgrOvflCnt;
__u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */
__u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */
__u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */
__u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */
__u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */
__u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */
__u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */
__u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */
__u64 IBStatusChangeCnt;
__u64 IBLinkErrRecoveryCnt;
__u64 IBLinkDownedCnt;
__u64 IBSymbolErrCnt;
/* The following are new for IBA7220 */
__u64 RxVL15DroppedPktCnt;
__u64 RxOtherLocalPhyErrCnt;
__u64 PcieRetryBufDiagQwordCnt;
__u64 ExcessBufferOvflCnt;
__u64 LocalLinkIntegrityErrCnt;
__u64 RxVlErrCnt;
__u64 RxDlidFltrCnt;
};
/*
* The next set of defines are for packet headers, and chip register
* and memory bits that are visible to and/or used by user-mode software
* The other bits that are used only by the driver or diags are in
* ipath_registers.h
*/
/* RcvHdrFlags bits */
#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
#define INFINIPATH_RHF_LENGTH_SHIFT 0
#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
#define INFINIPATH_RHF_SEQ_MASK 0xF
#define INFINIPATH_RHF_SEQ_SHIFT 0
#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
#define INFINIPATH_RHF_H_ICRCERR 0x80000000
#define INFINIPATH_RHF_H_VCRCERR 0x40000000
#define INFINIPATH_RHF_H_PARITYERR 0x20000000
#define INFINIPATH_RHF_H_LENERR 0x10000000
#define INFINIPATH_RHF_H_MTUERR 0x08000000
#define INFINIPATH_RHF_H_IHDRERR 0x04000000
#define INFINIPATH_RHF_H_TIDERR 0x02000000
#define INFINIPATH_RHF_H_MKERR 0x01000000
#define INFINIPATH_RHF_H_IBERR 0x00800000
#define INFINIPATH_RHF_H_ERR_MASK 0xFF800000
#define INFINIPATH_RHF_L_USE_EGR 0x80000000
#define INFINIPATH_RHF_L_SWA 0x00008000
#define INFINIPATH_RHF_L_SWB 0x00004000
/* infinipath header fields */
#define INFINIPATH_I_VERS_MASK 0xF
#define INFINIPATH_I_VERS_SHIFT 28
#define INFINIPATH_I_PORT_MASK 0xF
#define INFINIPATH_I_PORT_SHIFT 24
#define INFINIPATH_I_TID_MASK 0x7FF
#define INFINIPATH_I_TID_SHIFT 13
#define INFINIPATH_I_OFFSET_MASK 0x1FFF
#define INFINIPATH_I_OFFSET_SHIFT 0
/* K_PktFlags bits */
#define INFINIPATH_KPF_INTR 0x1
#define INFINIPATH_KPF_SUBPORT_MASK 0x3
#define INFINIPATH_KPF_SUBPORT_SHIFT 1
#define INFINIPATH_MAX_SUBPORT 4
/* SendPIO per-buffer control */
#define INFINIPATH_SP_TEST 0x40
#define INFINIPATH_SP_TESTEBP 0x20
#define INFINIPATH_SP_TRIGGER_SHIFT 15
/* SendPIOAvail bits */
#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
/* infinipath header format */
struct ipath_header {
/*
* Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
* 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
* Port 4, TID 11, offset 13.
*/
__le32 ver_port_tid_offset;
__le16 chksum;
__le16 pkt_flags;
};
/* infinipath user message header format.
* This structure contains the first 4 fields common to all protocols
* that employ infinipath.
*/
struct ipath_message_header {
__be16 lrh[4];
__be32 bth[3];
/* fields below this point are in host byte order */
struct ipath_header iph;
__u8 sub_opcode;
};
/* infinipath ethernet header format */
struct ether_header {
__be16 lrh[4];
__be32 bth[3];
struct ipath_header iph;
__u8 sub_opcode;
__u8 cmd;
__be16 lid;
__u16 mac[3];
__u8 frag_num;
__u8 seq_num;
__le32 len;
/* MUST be of word size due to PIO write requirements */
__le32 csum;
__le16 csum_offset;
__le16 flags;
__u16 first_2_bytes;
__u8 unused[2]; /* currently unused */
};
/* IB - LRH header consts */
#define IPATH_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define IPATH_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
/* misc. */
#define SIZE_OF_CRC 1
#define IPATH_DEFAULT_P_KEY 0xFFFF
#define IPATH_PERMISSIVE_LID 0xFFFF
#define IPATH_AETH_CREDIT_SHIFT 24
#define IPATH_AETH_CREDIT_MASK 0x1F
#define IPATH_AETH_CREDIT_INVAL 0x1F
#define IPATH_PSN_MASK 0xFFFFFF
#define IPATH_MSN_MASK 0xFFFFFF
#define IPATH_QPN_MASK 0xFFFFFF
#define IPATH_MULTICAST_LID_BASE 0xC000
#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
#define IPATH_MULTICAST_QPN 0xFFFFFF
/* Receive Header Queue: receive type (from infinipath) */
#define RCVHQ_RCV_TYPE_EXPECTED 0
#define RCVHQ_RCV_TYPE_EAGER 1
#define RCVHQ_RCV_TYPE_NON_KD 2
#define RCVHQ_RCV_TYPE_ERROR 3
/* sub OpCodes - ith4x */
#define IPATH_ITH4X_OPCODE_ENCAP 0x81
#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
#define IPATH_HEADER_QUEUE_WORDS 9
/* functions for extracting fields from rcvhdrq entries for the driver.
*/
static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
{
return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
}
static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
{
return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
& INFINIPATH_RHF_RCVTYPE_MASK;
}
static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
{
return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
& INFINIPATH_RHF_LENGTH_MASK) << 2;
}
static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
{
return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
& INFINIPATH_RHF_EGRINDEX_MASK;
}
static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
{
return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
& INFINIPATH_RHF_SEQ_MASK;
}
static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
{
return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
& INFINIPATH_RHF_HDRQ_OFFSET_MASK;
}
static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
{
return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
}
static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
{
return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
& INFINIPATH_I_VERS_MASK;
}
#endif /* _IPATH_COMMON_H */

View File

@@ -1,483 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
/**
* ipath_cq_enter - add a new entry to the completion queue
* @cq: completion queue
* @entry: work completion entry to add
* @sig: true if @entry is a solicitated entry
*
* This may be called with qp->s_lock held.
*/
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
struct ipath_cq_wc *wc;
unsigned long flags;
u32 head;
u32 next;
spin_lock_irqsave(&cq->lock, flags);
/*
* Note that the head pointer might be writable by user processes.
* Take care to verify it is a sane value.
*/
wc = cq->queue;
head = wc->head;
if (head >= (unsigned) cq->ibcq.cqe) {
head = cq->ibcq.cqe;
next = 0;
} else
next = head + 1;
if (unlikely(next == wc->tail)) {
spin_unlock_irqrestore(&cq->lock, flags);
if (cq->ibcq.event_handler) {
struct ib_event ev;
ev.device = cq->ibcq.device;
ev.element.cq = &cq->ibcq;
ev.event = IB_EVENT_CQ_ERR;
cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
}
return;
}
if (cq->ip) {
wc->uqueue[head].wr_id = entry->wr_id;
wc->uqueue[head].status = entry->status;
wc->uqueue[head].opcode = entry->opcode;
wc->uqueue[head].vendor_err = entry->vendor_err;
wc->uqueue[head].byte_len = entry->byte_len;
wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
wc->uqueue[head].qp_num = entry->qp->qp_num;
wc->uqueue[head].src_qp = entry->src_qp;
wc->uqueue[head].wc_flags = entry->wc_flags;
wc->uqueue[head].pkey_index = entry->pkey_index;
wc->uqueue[head].slid = entry->slid;
wc->uqueue[head].sl = entry->sl;
wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
wc->uqueue[head].port_num = entry->port_num;
/* Make sure entry is written before the head index. */
smp_wmb();
} else
wc->kqueue[head] = *entry;
wc->head = next;
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
cq->notify = IB_CQ_NONE;
cq->triggered++;
/*
* This will cause send_complete() to be called in
* another thread.
*/
tasklet_hi_schedule(&cq->comptask);
}
spin_unlock_irqrestore(&cq->lock, flags);
if (entry->status != IB_WC_SUCCESS)
to_idev(cq->ibcq.device)->n_wqe_errs++;
}
/**
* ipath_poll_cq - poll for work completion entries
* @ibcq: the completion queue to poll
* @num_entries: the maximum number of entries to return
* @entry: pointer to array where work completions are placed
*
* Returns the number of completion entries polled.
*
* This may be called from interrupt context. Also called by ib_poll_cq()
* in the generic verbs code.
*/
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct ipath_cq *cq = to_icq(ibcq);
struct ipath_cq_wc *wc;
unsigned long flags;
int npolled;
u32 tail;
/* The kernel can only poll a kernel completion queue */
if (cq->ip) {
npolled = -EINVAL;
goto bail;
}
spin_lock_irqsave(&cq->lock, flags);
wc = cq->queue;
tail = wc->tail;
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
if (tail == wc->head)
break;
/* The kernel doesn't need a RMB since it has the lock. */
*entry = wc->kqueue[tail];
if (tail >= cq->ibcq.cqe)
tail = 0;
else
tail++;
}
wc->tail = tail;
spin_unlock_irqrestore(&cq->lock, flags);
bail:
return npolled;
}
static void send_complete(unsigned long data)
{
struct ipath_cq *cq = (struct ipath_cq *)data;
/*
* The completion handler will most likely rearm the notification
* and poll for all pending entries. If a new completion entry
* is added while we are in this routine, tasklet_hi_schedule()
* won't call us again until we return so we check triggered to
* see if we need to call the handler again.
*/
for (;;) {
u8 triggered = cq->triggered;
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
if (cq->triggered == triggered)
return;
}
}
/**
* ipath_create_cq - create a completion queue
* @ibdev: the device this completion queue is attached to
* @attr: creation attributes
* @context: unused by the InfiniPath driver
* @udata: unused by the InfiniPath driver
*
* Returns a pointer to the completion queue or negative errno values
* for failure.
*
* Called by ib_create_cq() in the generic verbs code.
*/
struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
int entries = attr->cqe;
struct ipath_ibdev *dev = to_idev(ibdev);
struct ipath_cq *cq;
struct ipath_cq_wc *wc;
struct ib_cq *ret;
u32 sz;
if (attr->flags)
return ERR_PTR(-EINVAL);
if (entries < 1 || entries > ib_ipath_max_cqes) {
ret = ERR_PTR(-EINVAL);
goto done;
}
/* Allocate the completion queue structure. */
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq) {
ret = ERR_PTR(-ENOMEM);
goto done;
}
/*
* Allocate the completion queue entries and head/tail pointers.
* This is allocated separately so that it can be resized and
* also mapped into user space.
* We need to use vmalloc() in order to support mmap and large
* numbers of entries.
*/
sz = sizeof(*wc);
if (udata && udata->outlen >= sizeof(__u64))
sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
else
sz += sizeof(struct ib_wc) * (entries + 1);
wc = vmalloc_user(sz);
if (!wc) {
ret = ERR_PTR(-ENOMEM);
goto bail_cq;
}
/*
* Return the address of the WC as the offset to mmap.
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
int err;
cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
if (!cq->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wc;
}
err = ib_copy_to_udata(udata, &cq->ip->offset,
sizeof(cq->ip->offset));
if (err) {
ret = ERR_PTR(err);
goto bail_ip;
}
} else
cq->ip = NULL;
spin_lock(&dev->n_cqs_lock);
if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
spin_unlock(&dev->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
dev->n_cqs_allocated++;
spin_unlock(&dev->n_cqs_lock);
if (cq->ip) {
spin_lock_irq(&dev->pending_lock);
list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
* an error.
*/
cq->ibcq.cqe = entries;
cq->notify = IB_CQ_NONE;
cq->triggered = 0;
spin_lock_init(&cq->lock);
tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
wc->head = 0;
wc->tail = 0;
cq->queue = wc;
ret = &cq->ibcq;
goto done;
bail_ip:
kfree(cq->ip);
bail_wc:
vfree(wc);
bail_cq:
kfree(cq);
done:
return ret;
}
/**
* ipath_destroy_cq - destroy a completion queue
* @ibcq: the completion queue to destroy.
*
* Returns 0 for success.
*
* Called by ib_destroy_cq() in the generic verbs code.
*/
int ipath_destroy_cq(struct ib_cq *ibcq)
{
struct ipath_ibdev *dev = to_idev(ibcq->device);
struct ipath_cq *cq = to_icq(ibcq);
tasklet_kill(&cq->comptask);
spin_lock(&dev->n_cqs_lock);
dev->n_cqs_allocated--;
spin_unlock(&dev->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, ipath_release_mmap_info);
else
vfree(cq->queue);
kfree(cq);
return 0;
}
/**
* ipath_req_notify_cq - change the notification type for a completion queue
* @ibcq: the completion queue
* @notify_flags: the type of notification to request
*
* Returns 0 for success.
*
* This may be called from interrupt context. Also called by
* ib_req_notify_cq() in the generic verbs code.
*/
int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
{
struct ipath_cq *cq = to_icq(ibcq);
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&cq->lock, flags);
/*
* Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
* any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
*/
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
cq->queue->head != cq->queue->tail)
ret = 1;
spin_unlock_irqrestore(&cq->lock, flags);
return ret;
}
/**
* ipath_resize_cq - change the size of the CQ
* @ibcq: the completion queue
*
* Returns 0 for success.
*/
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct ipath_cq *cq = to_icq(ibcq);
struct ipath_cq_wc *old_wc;
struct ipath_cq_wc *wc;
u32 head, tail, n;
int ret;
u32 sz;
if (cqe < 1 || cqe > ib_ipath_max_cqes) {
ret = -EINVAL;
goto bail;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
sz = sizeof(*wc);
if (udata && udata->outlen >= sizeof(__u64))
sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
else
sz += sizeof(struct ib_wc) * (cqe + 1);
wc = vmalloc_user(sz);
if (!wc) {
ret = -ENOMEM;
goto bail;
}
/* Check that we can write the offset to mmap. */
if (udata && udata->outlen >= sizeof(__u64)) {
__u64 offset = 0;
ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
if (ret)
goto bail_free;
}
spin_lock_irq(&cq->lock);
/*
* Make sure head and tail are sane since they
* might be user writable.
*/
old_wc = cq->queue;
head = old_wc->head;
if (head > (u32) cq->ibcq.cqe)
head = (u32) cq->ibcq.cqe;
tail = old_wc->tail;
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
if (head < tail)
n = cq->ibcq.cqe + 1 + head - tail;
else
n = head - tail;
if (unlikely((u32)cqe < n)) {
ret = -EINVAL;
goto bail_unlock;
}
for (n = 0; tail != head; n++) {
if (cq->ip)
wc->uqueue[n] = old_wc->uqueue[tail];
else
wc->kqueue[n] = old_wc->kqueue[tail];
if (tail == (u32) cq->ibcq.cqe)
tail = 0;
else
tail++;
}
cq->ibcq.cqe = cqe;
wc->head = n;
wc->tail = 0;
cq->queue = wc;
spin_unlock_irq(&cq->lock);
vfree(old_wc);
if (cq->ip) {
struct ipath_ibdev *dev = to_idev(ibcq->device);
struct ipath_mmap_info *ip = cq->ip;
ipath_update_mmap_info(dev, ip, sz, wc);
/*
* Return the offset to mmap.
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
ret = ib_copy_to_udata(udata, &ip->offset,
sizeof(ip->offset));
if (ret)
goto bail;
}
spin_lock_irq(&dev->pending_lock);
if (list_empty(&ip->pending_mmaps))
list_add(&ip->pending_mmaps, &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
ret = 0;
goto bail;
bail_unlock:
spin_unlock_irq(&cq->lock);
bail_free:
vfree(wc);
bail:
return ret;
}

View File

@@ -1,99 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IPATH_DEBUG_H
#define _IPATH_DEBUG_H
#ifndef _IPATH_DEBUGGING /* debugging enabled or not */
#define _IPATH_DEBUGGING 1
#endif
#if _IPATH_DEBUGGING
/*
* Mask values for debugging. The scheme allows us to compile out any
* of the debug tracing stuff, and if compiled in, to enable or disable
* dynamically. This can be set at modprobe time also:
* modprobe infinipath.ko infinipath_debug=7
*/
#define __IPATH_INFO 0x1 /* generic low verbosity stuff */
#define __IPATH_DBG 0x2 /* generic debug */
#define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */
/* leave some low verbosity spots open */
#define __IPATH_VERBDBG 0x40 /* very verbose debug */
#define __IPATH_PKTDBG 0x80 /* print packet data */
/* print process startup (init)/exit messages */
#define __IPATH_PROCDBG 0x100
/* print mmap/fault stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x200
#define __IPATH_ERRPKTDBG 0x400
#define __IPATH_USER_SEND 0x1000 /* use user mode send */
#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */
#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */
#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */
#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */
#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */
#else /* _IPATH_DEBUGGING */
/*
* define all of these even with debugging off, for the few places that do
* if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
* compiler eliminate the code
*/
#define __IPATH_INFO 0x0 /* generic low verbosity stuff */
#define __IPATH_DBG 0x0 /* generic debug */
#define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */
#define __IPATH_VERBDBG 0x0 /* very verbose debug */
#define __IPATH_PKTDBG 0x0 /* print packet data */
#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */
/* print mmap/fault stuff, not using VDBG any more */
#define __IPATH_MMDBG 0x0
#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */
#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */
#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */
#define __IPATH_LINKVERBDBG 0x0 /* very verbose linkchange debug */
#endif /* _IPATH_DEBUGGING */
#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
#endif /* _IPATH_DEBUG_H */

View File

@@ -1,551 +0,0 @@
/*
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
* This file contains support for diagnostic functions. It is accessed by
* opening the ipath_diag device, normally minor number 129. Diagnostic use
* of the InfiniPath chip may render the chip or board unusable until the
* driver is unloaded, or in some cases, until the system is rebooted.
*
* Accesses to the chip through this interface are not similar to going
* through the /sys/bus/pci resource mmap interface.
*/
#include <linux/io.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/export.h>
#include <asm/uaccess.h>
#include "ipath_kernel.h"
#include "ipath_common.h"
int ipath_diag_inuse;
static int diag_set_link;
static int ipath_diag_open(struct inode *in, struct file *fp);
static int ipath_diag_release(struct inode *in, struct file *fp);
static ssize_t ipath_diag_read(struct file *fp, char __user *data,
size_t count, loff_t *off);
static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
size_t count, loff_t *off);
static const struct file_operations diag_file_ops = {
.owner = THIS_MODULE,
.write = ipath_diag_write,
.read = ipath_diag_read,
.open = ipath_diag_open,
.release = ipath_diag_release,
.llseek = default_llseek,
};
static ssize_t ipath_diagpkt_write(struct file *fp,
const char __user *data,
size_t count, loff_t *off);
static const struct file_operations diagpkt_file_ops = {
.owner = THIS_MODULE,
.write = ipath_diagpkt_write,
.llseek = noop_llseek,
};
static atomic_t diagpkt_count = ATOMIC_INIT(0);
static struct cdev *diagpkt_cdev;
static struct device *diagpkt_dev;
int ipath_diag_add(struct ipath_devdata *dd)
{
char name[16];
int ret = 0;
if (atomic_inc_return(&diagpkt_count) == 1) {
ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
"ipath_diagpkt", &diagpkt_file_ops,
&diagpkt_cdev, &diagpkt_dev);
if (ret) {
ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
"device: %d", ret);
goto done;
}
}
snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
&diag_file_ops, &dd->diag_cdev,
&dd->diag_dev);
if (ret)
ipath_dev_err(dd, "Couldn't create %s device: %d",
name, ret);
done:
return ret;
}
void ipath_diag_remove(struct ipath_devdata *dd)
{
if (atomic_dec_and_test(&diagpkt_count))
ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
}
/**
* ipath_read_umem64 - read a 64-bit quantity from the chip into user space
* @dd: the infinipath device
* @uaddr: the location to store the data in user memory
* @caddr: the source chip address (full pointer, not offset)
* @count: number of bytes to copy (multiple of 32 bits)
*
* This function also localizes all chip memory accesses.
* The copy should be written such that we read full cacheline packets
* from the chip. This is usually used for a single qword
*
* NOTE: This assumes the chip address is 64-bit aligned.
*/
static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
const void __iomem *caddr, size_t count)
{
const u64 __iomem *reg_addr = caddr;
const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
int ret;
/* not very efficient, but it works for now */
if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
while (reg_addr < reg_end) {
u64 data = readq(reg_addr);
if (copy_to_user(uaddr, &data, sizeof(u64))) {
ret = -EFAULT;
goto bail;
}
reg_addr++;
uaddr += sizeof(u64);
}
ret = 0;
bail:
return ret;
}
/**
* ipath_write_umem64 - write a 64-bit quantity to the chip from user space
* @dd: the infinipath device
* @caddr: the destination chip address (full pointer, not offset)
* @uaddr: the source of the data in user memory
* @count: the number of bytes to copy (multiple of 32 bits)
*
* This is usually used for a single qword
* NOTE: This assumes the chip address is 64-bit aligned.
*/
static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
const void __user *uaddr, size_t count)
{
u64 __iomem *reg_addr = caddr;
const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
int ret;
/* not very efficient, but it works for now */
if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
while (reg_addr < reg_end) {
u64 data;
if (copy_from_user(&data, uaddr, sizeof(data))) {
ret = -EFAULT;
goto bail;
}
writeq(data, reg_addr);
reg_addr++;
uaddr += sizeof(u64);
}
ret = 0;
bail:
return ret;
}
/**
* ipath_read_umem32 - read a 32-bit quantity from the chip into user space
* @dd: the infinipath device
* @uaddr: the location to store the data in user memory
* @caddr: the source chip address (full pointer, not offset)
* @count: number of bytes to copy
*
* read 32 bit values, not 64 bit; for memories that only
* support 32 bit reads; usually a single dword.
*/
static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
const void __iomem *caddr, size_t count)
{
const u32 __iomem *reg_addr = caddr;
const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
int ret;
if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
reg_end > (u32 __iomem *) dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
/* not very efficient, but it works for now */
while (reg_addr < reg_end) {
u32 data = readl(reg_addr);
if (copy_to_user(uaddr, &data, sizeof(data))) {
ret = -EFAULT;
goto bail;
}
reg_addr++;
uaddr += sizeof(u32);
}
ret = 0;
bail:
return ret;
}
/**
* ipath_write_umem32 - write a 32-bit quantity to the chip from user space
* @dd: the infinipath device
* @caddr: the destination chip address (full pointer, not offset)
* @uaddr: the source of the data in user memory
* @count: number of bytes to copy
*
* write 32 bit values, not 64 bit; for memories that only
* support 32 bit write; usually a single dword.
*/
static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
const void __user *uaddr, size_t count)
{
u32 __iomem *reg_addr = caddr;
const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
int ret;
if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
reg_end > (u32 __iomem *) dd->ipath_kregend) {
ret = -EINVAL;
goto bail;
}
while (reg_addr < reg_end) {
u32 data;
if (copy_from_user(&data, uaddr, sizeof(data))) {
ret = -EFAULT;
goto bail;
}
writel(data, reg_addr);
reg_addr++;
uaddr += sizeof(u32);
}
ret = 0;
bail:
return ret;
}
static int ipath_diag_open(struct inode *in, struct file *fp)
{
int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
struct ipath_devdata *dd;
int ret;
mutex_lock(&ipath_mutex);
if (ipath_diag_inuse) {
ret = -EBUSY;
goto bail;
}
dd = ipath_lookup(unit);
if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
!dd->ipath_kregbase) {
ret = -ENODEV;
goto bail;
}
fp->private_data = dd;
ipath_diag_inuse = -2;
diag_set_link = 0;
ret = 0;
/* Only expose a way to reset the device if we
make it into diag mode. */
ipath_expose_reset(&dd->pcidev->dev);
bail:
mutex_unlock(&ipath_mutex);
return ret;
}
/**
* ipath_diagpkt_write - write an IB packet
* @fp: the diag data device file pointer
* @data: ipath_diag_pkt structure saying where to get the packet
* @count: size of data to write
* @off: unused by this code
*/
static ssize_t ipath_diagpkt_write(struct file *fp,
const char __user *data,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
u32 plen, pbufn, maxlen_reserve;
struct ipath_diag_pkt odp;
struct ipath_diag_xpkt dp;
u32 *tmpbuf = NULL;
struct ipath_devdata *dd;
ssize_t ret = 0;
u64 val;
u32 l_state, lt_state; /* LinkState, LinkTrainingState */
if (count == sizeof(dp)) {
if (copy_from_user(&dp, data, sizeof(dp))) {
ret = -EFAULT;
goto bail;
}
} else if (count == sizeof(odp)) {
if (copy_from_user(&odp, data, sizeof(odp))) {
ret = -EFAULT;
goto bail;
}
dp.len = odp.len;
dp.unit = odp.unit;
dp.data = odp.data;
dp.pbc_wd = 0;
} else {
ret = -EINVAL;
goto bail;
}
/* send count must be an exact number of dwords */
if (dp.len & 3) {
ret = -EINVAL;
goto bail;
}
plen = dp.len >> 2;
dd = ipath_lookup(dp.unit);
if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
!dd->ipath_kregbase) {
ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
dp.unit);
ret = -ENODEV;
goto bail;
}
if (ipath_diag_inuse && !diag_set_link &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
diag_set_link = 1;
ipath_cdbg(VERBOSE, "Trying to set to set link active for "
"diag pkt\n");
ipath_set_linkstate(dd, IPATH_IB_LINKARM);
ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
}
if (!(dd->ipath_flags & IPATH_INITTED)) {
/* no hardware, freeze, etc. */
ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
ret = -ENODEV;
goto bail;
}
/*
* Want to skip check for l_state if using custom PBC,
* because we might be trying to force an SM packet out.
* first-cut, skip _all_ state checking in that case.
*/
val = ipath_ib_state(dd, dd->ipath_lastibcstat);
lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
(val != dd->ib_init && val != dd->ib_arm &&
val != dd->ib_active))) {
ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
dd->ipath_unit, (unsigned long long) val);
ret = -EINVAL;
goto bail;
}
/*
* need total length before first word written, plus 2 Dwords. One Dword
* is for padding so we get the full user data when not aligned on
* a word boundary. The other Dword is to make sure we have room for the
* ICRC which gets tacked on later.
*/
maxlen_reserve = 2 * sizeof(u32);
if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
dp.len, dd->ipath_ibmaxlen);
ret = -EINVAL;
goto bail;
}
plen = sizeof(u32) + dp.len;
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
"failing\n");
ret = -ENOMEM;
goto bail;
}
if (copy_from_user(tmpbuf,
(const void __user *) (unsigned long) dp.data,
dp.len)) {
ret = -EFAULT;
goto bail;
}
plen >>= 2; /* in dwords */
piobuf = ipath_getpiobuf(dd, plen, &pbufn);
if (!piobuf) {
ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
dd->ipath_unit);
ret = -EBUSY;
goto bail;
}
/* disarm it just to be extra sure */
ipath_disarm_piobufs(dd, pbufn, 1);
if (ipath_debug & __IPATH_PKTDBG)
ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
dd->ipath_unit, plen - 1, pbufn);
if (dp.pbc_wd == 0)
dp.pbc_wd = plen;
writeq(dp.pbc_wd, piobuf);
/*
* Copy all by the trigger word, then flush, so it's written
* to chip before trigger word, then write trigger word, then
* flush again, so packet is sent.
*/
if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
ipath_flush_wc();
__iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
ipath_flush_wc();
__raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
} else
__iowrite32_copy(piobuf + 2, tmpbuf, plen);
ipath_flush_wc();
ret = sizeof(dp);
bail:
vfree(tmpbuf);
return ret;
}
static int ipath_diag_release(struct inode *in, struct file *fp)
{
mutex_lock(&ipath_mutex);
ipath_diag_inuse = 0;
fp->private_data = NULL;
mutex_unlock(&ipath_mutex);
return 0;
}
static ssize_t ipath_diag_read(struct file *fp, char __user *data,
size_t count, loff_t *off)
{
struct ipath_devdata *dd = fp->private_data;
void __iomem *kreg_base;
ssize_t ret;
kreg_base = dd->ipath_kregbase;
if (count == 0)
ret = 0;
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
else if (ipath_diag_inuse < 1 && (*off || count != 8))
ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit reads */
ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
else
ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
if (ret >= 0) {
*off += count;
ret = count;
if (ipath_diag_inuse == -2)
ipath_diag_inuse++;
}
return ret;
}
static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
struct ipath_devdata *dd = fp->private_data;
void __iomem *kreg_base;
ssize_t ret;
kreg_base = dd->ipath_kregbase;
if (count == 0)
ret = 0;
else if ((count % 4) || (*off % 4))
/* address or length is not 32-bit aligned, hence invalid */
ret = -EINVAL;
else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
ret = -EINVAL; /* before any other write allowed */
else if ((count % 8) || (*off % 8))
/* address or length not 64-bit aligned; do 32-bit writes */
ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
else
ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
if (ret >= 0) {
*off += count;
ret = count;
if (ipath_diag_inuse == -1)
ipath_diag_inuse = 1; /* all read/write OK now */
}
return ret;
}

View File

@@ -1,179 +0,0 @@
/*
* Copyright (c) 2006 QLogic, Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/scatterlist.h>
#include <linux/gfp.h>
#include <rdma/ib_verbs.h>
#include "ipath_verbs.h"
#define BAD_DMA_ADDRESS ((u64) 0)
/*
* The following functions implement driver specific replacements
* for the ib_dma_*() functions.
*
* These functions return kernel virtual addresses instead of
* device bus addresses since the driver uses the CPU to copy
* data instead of using hardware DMA.
*/
static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
{
return dma_addr == BAD_DMA_ADDRESS;
}
static u64 ipath_dma_map_single(struct ib_device *dev,
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
return (u64) cpu_addr;
}
static void ipath_dma_unmap_single(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
}
static u64 ipath_dma_map_page(struct ib_device *dev,
struct page *page,
unsigned long offset,
size_t size,
enum dma_data_direction direction)
{
u64 addr;
BUG_ON(!valid_dma_direction(direction));
if (offset + size > PAGE_SIZE) {
addr = BAD_DMA_ADDRESS;
goto done;
}
addr = (u64) page_address(page);
if (addr)
addr += offset;
/* TODO: handle highmem pages */
done:
return addr;
}
static void ipath_dma_unmap_page(struct ib_device *dev,
u64 addr, size_t size,
enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
}
static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction direction)
{
struct scatterlist *sg;
u64 addr;
int i;
int ret = nents;
BUG_ON(!valid_dma_direction(direction));
for_each_sg(sgl, sg, nents, i) {
addr = (u64) page_address(sg_page(sg));
/* TODO: handle highmem pages */
if (!addr) {
ret = 0;
break;
}
sg->dma_address = addr + sg->offset;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->dma_length = sg->length;
#endif
}
return ret;
}
static void ipath_unmap_sg(struct ib_device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
BUG_ON(!valid_dma_direction(direction));
}
static void ipath_sync_single_for_cpu(struct ib_device *dev,
u64 addr,
size_t size,
enum dma_data_direction dir)
{
}
static void ipath_sync_single_for_device(struct ib_device *dev,
u64 addr,
size_t size,
enum dma_data_direction dir)
{
}
static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
u64 *dma_handle, gfp_t flag)
{
struct page *p;
void *addr = NULL;
p = alloc_pages(flag, get_order(size));
if (p)
addr = page_address(p);
if (dma_handle)
*dma_handle = (u64) addr;
return addr;
}
static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
void *cpu_addr, u64 dma_handle)
{
free_pages((unsigned long) cpu_addr, get_order(size));
}
struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
.mapping_error = ipath_mapping_error,
.map_single = ipath_dma_map_single,
.unmap_single = ipath_dma_unmap_single,
.map_page = ipath_dma_map_page,
.unmap_page = ipath_dma_unmap_page,
.map_sg = ipath_map_sg,
.unmap_sg = ipath_unmap_sg,
.sync_single_for_cpu = ipath_sync_single_for_cpu,
.sync_single_for_device = ipath_sync_single_for_device,
.alloc_coherent = ipath_dma_alloc_coherent,
.free_coherent = ipath_dma_free_coherent
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,422 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/namei.h>
#include <linux/slab.h>
#include "ipath_kernel.h"
#define IPATHFS_MAGIC 0x726a77
static struct super_block *ipath_super;
static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, const struct file_operations *fops,
void *data)
{
int error;
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
error = -EPERM;
goto bail;
}
inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_private = data;
if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
inc_nlink(dir);
}
inode->i_fop = fops;
d_instantiate(dentry, inode);
error = 0;
bail:
return error;
}
static int create_file(const char *name, umode_t mode,
struct dentry *parent, struct dentry **dentry,
const struct file_operations *fops, void *data)
{
int error;
mutex_lock(&d_inode(parent)->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
error = ipathfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
mutex_unlock(&d_inode(parent)->i_mutex);
return error;
}
static ssize_t atomic_stats_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
sizeof ipath_stats);
}
static const struct file_operations atomic_stats_ops = {
.read = atomic_stats_read,
.llseek = default_llseek,
};
static ssize_t atomic_counters_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct infinipath_counters counters;
struct ipath_devdata *dd;
dd = file_inode(file)->i_private;
dd->ipath_f_read_counters(dd, &counters);
return simple_read_from_buffer(buf, count, ppos, &counters,
sizeof counters);
}
static const struct file_operations atomic_counters_ops = {
.read = atomic_counters_read,
.llseek = default_llseek,
};
static ssize_t flash_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct ipath_devdata *dd;
ssize_t ret;
loff_t pos;
char *tmp;
pos = *ppos;
if ( pos < 0) {
ret = -EINVAL;
goto bail;
}
if (pos >= sizeof(struct ipath_flash)) {
ret = 0;
goto bail;
}
if (count > sizeof(struct ipath_flash) - pos)
count = sizeof(struct ipath_flash) - pos;
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
goto bail;
}
dd = file_inode(file)->i_private;
if (ipath_eeprom_read(dd, pos, tmp, count)) {
ipath_dev_err(dd, "failed to read from flash\n");
ret = -ENXIO;
goto bail_tmp;
}
if (copy_to_user(buf, tmp, count)) {
ret = -EFAULT;
goto bail_tmp;
}
*ppos = pos + count;
ret = count;
bail_tmp:
kfree(tmp);
bail:
return ret;
}
static ssize_t flash_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct ipath_devdata *dd;
ssize_t ret;
loff_t pos;
char *tmp;
pos = *ppos;
if (pos != 0) {
ret = -EINVAL;
goto bail;
}
if (count != sizeof(struct ipath_flash)) {
ret = -EINVAL;
goto bail;
}
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
goto bail;
}
if (copy_from_user(tmp, buf, count)) {
ret = -EFAULT;
goto bail_tmp;
}
dd = file_inode(file)->i_private;
if (ipath_eeprom_write(dd, pos, tmp, count)) {
ret = -ENXIO;
ipath_dev_err(dd, "failed to write to flash\n");
goto bail_tmp;
}
*ppos = pos + count;
ret = count;
bail_tmp:
kfree(tmp);
bail:
return ret;
}
static const struct file_operations flash_ops = {
.read = flash_read,
.write = flash_write,
.llseek = default_llseek,
};
static int create_device_files(struct super_block *sb,
struct ipath_devdata *dd)
{
struct dentry *dir, *tmp;
char unit[10];
int ret;
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
&simple_dir_operations, dd);
if (ret) {
printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
goto bail;
}
ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
&atomic_counters_ops, dd);
if (ret) {
printk(KERN_ERR "create_file(%s/atomic_counters) "
"failed: %d\n", unit, ret);
goto bail;
}
ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
&flash_ops, dd);
if (ret) {
printk(KERN_ERR "create_file(%s/flash) "
"failed: %d\n", unit, ret);
goto bail;
}
bail:
return ret;
}
static int remove_file(struct dentry *parent, char *name)
{
struct dentry *tmp;
int ret;
tmp = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(tmp)) {
ret = PTR_ERR(tmp);
goto bail;
}
spin_lock(&tmp->d_lock);
if (simple_positive(tmp)) {
dget_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
simple_unlink(d_inode(parent), tmp);
} else
spin_unlock(&tmp->d_lock);
ret = 0;
bail:
/*
* We don't expect clients to care about the return value, but
* it's there if they need it.
*/
return ret;
}
static int remove_device_files(struct super_block *sb,
struct ipath_devdata *dd)
{
struct dentry *dir, *root;
char unit[10];
int ret;
root = dget(sb->s_root);
mutex_lock(&d_inode(root)->i_mutex);
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
dir = lookup_one_len(unit, root, strlen(unit));
if (IS_ERR(dir)) {
ret = PTR_ERR(dir);
printk(KERN_ERR "Lookup of %s failed\n", unit);
goto bail;
}
remove_file(dir, "flash");
remove_file(dir, "atomic_counters");
d_delete(dir);
ret = simple_rmdir(d_inode(root), dir);
bail:
mutex_unlock(&d_inode(root)->i_mutex);
dput(root);
return ret;
}
static int ipathfs_fill_super(struct super_block *sb, void *data,
int silent)
{
struct ipath_devdata *dd, *tmp;
unsigned long flags;
int ret;
static struct tree_descr files[] = {
[2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
{""},
};
ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
if (ret) {
printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
goto bail;
}
spin_lock_irqsave(&ipath_devs_lock, flags);
list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
spin_unlock_irqrestore(&ipath_devs_lock, flags);
ret = create_device_files(sb, dd);
if (ret)
goto bail;
spin_lock_irqsave(&ipath_devs_lock, flags);
}
spin_unlock_irqrestore(&ipath_devs_lock, flags);
bail:
return ret;
}
static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
struct dentry *ret;
ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
if (!IS_ERR(ret))
ipath_super = ret->d_sb;
return ret;
}
static void ipathfs_kill_super(struct super_block *s)
{
kill_litter_super(s);
ipath_super = NULL;
}
int ipathfs_add_device(struct ipath_devdata *dd)
{
int ret;
if (ipath_super == NULL) {
ret = 0;
goto bail;
}
ret = create_device_files(ipath_super, dd);
bail:
return ret;
}
int ipathfs_remove_device(struct ipath_devdata *dd)
{
int ret;
if (ipath_super == NULL) {
ret = 0;
goto bail;
}
ret = remove_device_files(ipath_super, dd);
bail:
return ret;
}
static struct file_system_type ipathfs_fs_type = {
.owner = THIS_MODULE,
.name = "ipathfs",
.mount = ipathfs_mount,
.kill_sb = ipathfs_kill_super,
};
MODULE_ALIAS_FS("ipathfs");
int __init ipath_init_ipathfs(void)
{
return register_filesystem(&ipathfs_fs_type);
}
void __exit ipath_exit_ipathfs(void)
{
unregister_filesystem(&ipathfs_fs_type);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,270 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <asm/io.h>
#include "ipath_verbs.h"
#include "ipath_kernel.h"
/**
* ipath_alloc_lkey - allocate an lkey
* @rkt: lkey table in which to allocate the lkey
* @mr: memory region that this lkey protects
*
* Returns 1 if successful, otherwise returns 0.
*/
int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
{
unsigned long flags;
u32 r;
u32 n;
int ret;
spin_lock_irqsave(&rkt->lock, flags);
/* Find the next available LKEY */
r = n = rkt->next;
for (;;) {
if (rkt->table[r] == NULL)
break;
r = (r + 1) & (rkt->max - 1);
if (r == n) {
spin_unlock_irqrestore(&rkt->lock, flags);
ipath_dbg("LKEY table full\n");
ret = 0;
goto bail;
}
}
rkt->next = (r + 1) & (rkt->max - 1);
/*
* Make sure lkey is never zero which is reserved to indicate an
* unrestricted LKEY.
*/
rkt->gen++;
mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
<< 8);
if (mr->lkey == 0) {
mr->lkey |= 1 << 8;
rkt->gen++;
}
rkt->table[r] = mr;
spin_unlock_irqrestore(&rkt->lock, flags);
ret = 1;
bail:
return ret;
}
/**
* ipath_free_lkey - free an lkey
* @rkt: table from which to free the lkey
* @lkey: lkey id to free
*/
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
{
unsigned long flags;
u32 r;
if (lkey == 0)
return;
r = lkey >> (32 - ib_ipath_lkey_table_size);
spin_lock_irqsave(&rkt->lock, flags);
rkt->table[r] = NULL;
spin_unlock_irqrestore(&rkt->lock, flags);
}
/**
* ipath_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against
* @isge: outgoing internal SGE
* @sge: SGE to check
* @acc: access flags
*
* Return 1 if valid and successful, otherwise returns 0.
*
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc)
{
struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct ipath_mregion *mr;
unsigned n, m;
size_t off;
int ret;
/*
* We use LKEY == zero for kernel virtual addresses
* (see ipath_get_dma_mr and ipath_dma.c).
*/
if (sge->lkey == 0) {
/* always a kernel port, no locking needed */
struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
if (pd->user) {
ret = 0;
goto bail;
}
isge->mr = NULL;
isge->vaddr = (void *) sge->addr;
isge->length = sge->length;
isge->sge_length = sge->length;
ret = 1;
goto bail;
}
mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc)) {
ret = 0;
goto bail;
}
off += mr->offset;
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
if (n >= IPATH_SEGSZ) {
m++;
n = 0;
}
}
isge->mr = mr;
isge->vaddr = mr->map[m]->segs[n].vaddr + off;
isge->length = mr->map[m]->segs[n].length - off;
isge->sge_length = sge->length;
isge->m = m;
isge->n = n;
ret = 1;
bail:
return ret;
}
/**
* ipath_rkey_ok - check the IB virtual address, length, and RKEY
* @dev: infiniband device
* @ss: SGE state
* @len: length of data
* @vaddr: virtual address to place data
* @rkey: rkey to check
* @acc: access flags
*
* Return 1 if successful, otherwise 0.
*/
int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_lkey_table *rkt = &dev->lk_table;
struct ipath_sge *sge = &ss->sge;
struct ipath_mregion *mr;
unsigned n, m;
size_t off;
int ret;
/*
* We use RKEY == zero for kernel virtual addresses
* (see ipath_get_dma_mr and ipath_dma.c).
*/
if (rkey == 0) {
/* always a kernel port, no locking needed */
struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
if (pd->user) {
ret = 0;
goto bail;
}
sge->mr = NULL;
sge->vaddr = (void *) vaddr;
sge->length = len;
sge->sge_length = len;
ss->sg_list = NULL;
ss->num_sge = 1;
ret = 1;
goto bail;
}
mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
if (unlikely(mr == NULL || mr->lkey != rkey ||
qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0)) {
ret = 0;
goto bail;
}
off += mr->offset;
m = 0;
n = 0;
while (off >= mr->map[m]->segs[n].length) {
off -= mr->map[m]->segs[n].length;
n++;
if (n >= IPATH_SEGSZ) {
m++;
n = 0;
}
}
sge->mr = mr;
sge->vaddr = mr->map[m]->segs[n].vaddr + off;
sge->length = mr->map[m]->segs[n].length - off;
sge->sge_length = len;
sge->m = m;
sge->n = n;
ss->sg_list = NULL;
ss->num_sge = 1;
ret = 1;
bail:
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,174 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <asm/pgtable.h>
#include "ipath_verbs.h"
/**
* ipath_release_mmap_info - free mmap info structure
* @ref: a pointer to the kref within struct ipath_mmap_info
*/
void ipath_release_mmap_info(struct kref *ref)
{
struct ipath_mmap_info *ip =
container_of(ref, struct ipath_mmap_info, ref);
struct ipath_ibdev *dev = to_idev(ip->context->device);
spin_lock_irq(&dev->pending_lock);
list_del(&ip->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
vfree(ip->obj);
kfree(ip);
}
/*
* open and close keep track of how many times the CQ is mapped,
* to avoid releasing it.
*/
static void ipath_vma_open(struct vm_area_struct *vma)
{
struct ipath_mmap_info *ip = vma->vm_private_data;
kref_get(&ip->ref);
}
static void ipath_vma_close(struct vm_area_struct *vma)
{
struct ipath_mmap_info *ip = vma->vm_private_data;
kref_put(&ip->ref, ipath_release_mmap_info);
}
static const struct vm_operations_struct ipath_vm_ops = {
.open = ipath_vma_open,
.close = ipath_vma_close,
};
/**
* ipath_mmap - create a new mmap region
* @context: the IB user context of the process making the mmap() call
* @vma: the VMA to be initialized
* Return zero if the mmap is OK. Otherwise, return an errno.
*/
int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct ipath_ibdev *dev = to_idev(context->device);
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
unsigned long size = vma->vm_end - vma->vm_start;
struct ipath_mmap_info *ip, *pp;
int ret = -EINVAL;
/*
* Search the device's list of objects waiting for a mmap call.
* Normally, this list is very short since a call to create a
* CQ, QP, or SRQ is soon followed by a call to mmap().
*/
spin_lock_irq(&dev->pending_lock);
list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
pending_mmaps) {
/* Only the creator is allowed to mmap the object */
if (context != ip->context || (__u64) offset != ip->offset)
continue;
/* Don't allow a mmap larger than the object. */
if (size > ip->size)
break;
list_del_init(&ip->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret)
goto done;
vma->vm_ops = &ipath_vm_ops;
vma->vm_private_data = ip;
ipath_vma_open(vma);
goto done;
}
spin_unlock_irq(&dev->pending_lock);
done:
return ret;
}
/*
* Allocate information for ipath_mmap
*/
struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
u32 size,
struct ib_ucontext *context,
void *obj) {
struct ipath_mmap_info *ip;
ip = kmalloc(sizeof *ip, GFP_KERNEL);
if (!ip)
goto bail;
size = PAGE_ALIGN(size);
spin_lock_irq(&dev->mmap_offset_lock);
if (dev->mmap_offset == 0)
dev->mmap_offset = PAGE_SIZE;
ip->offset = dev->mmap_offset;
dev->mmap_offset += size;
spin_unlock_irq(&dev->mmap_offset_lock);
INIT_LIST_HEAD(&ip->pending_mmaps);
ip->size = size;
ip->context = context;
ip->obj = obj;
kref_init(&ip->ref);
bail:
return ip;
}
void ipath_update_mmap_info(struct ipath_ibdev *dev,
struct ipath_mmap_info *ip,
u32 size, void *obj) {
size = PAGE_ALIGN(size);
spin_lock_irq(&dev->mmap_offset_lock);
if (dev->mmap_offset == 0)
dev->mmap_offset = PAGE_SIZE;
ip->offset = dev->mmap_offset;
dev->mmap_offset += size;
spin_unlock_irq(&dev->mmap_offset_lock);
ip->size = size;
ip->obj = obj;
}

View File

@@ -1,425 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/slab.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_smi.h>
#include "ipath_verbs.h"
/* Fast memory region */
struct ipath_fmr {
struct ib_fmr ibfmr;
u8 page_shift;
struct ipath_mregion mr; /* must be last */
};
static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct ipath_fmr, ibfmr);
}
/**
* ipath_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
* @acc: access flags
*
* Returns the memory region on success, otherwise returns an errno.
* Note that all DMA addresses should be created via the
* struct ib_dma_mapping_ops functions (see ipath_dma.c).
*/
struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
{
struct ipath_mr *mr;
struct ib_mr *ret;
mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
mr->mr.access_flags = acc;
ret = &mr->ibmr;
bail:
return ret;
}
static struct ipath_mr *alloc_mr(int count,
struct ipath_lkey_table *lk_table)
{
struct ipath_mr *mr;
int m, i = 0;
/* Allocate struct plus pointers to first level page tables. */
m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
if (!mr)
goto done;
/* Allocate first level page tables. */
for (; i < m; i++) {
mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
if (!mr->mr.map[i])
goto bail;
}
mr->mr.mapsz = m;
/*
* ib_reg_phys_mr() will initialize mr->ibmr except for
* lkey and rkey.
*/
if (!ipath_alloc_lkey(lk_table, &mr->mr))
goto bail;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
goto done;
bail:
while (i) {
i--;
kfree(mr->mr.map[i]);
}
kfree(mr);
mr = NULL;
done:
return mr;
}
/**
* ipath_reg_phys_mr - register a physical memory region
* @pd: protection domain for this memory region
* @buffer_list: pointer to the list of physical buffers to register
* @num_phys_buf: the number of physical buffers to register
* @iova_start: the starting address passed over IB which maps to this MR
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start)
{
struct ipath_mr *mr;
int n, m, i;
struct ib_mr *ret;
mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
if (mr == NULL) {
ret = ERR_PTR(-ENOMEM);
goto bail;
}
mr->mr.pd = pd;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.length = 0;
mr->mr.offset = 0;
mr->mr.access_flags = acc;
mr->mr.max_segs = num_phys_buf;
mr->umem = NULL;
m = 0;
n = 0;
for (i = 0; i < num_phys_buf; i++) {
mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
mr->mr.map[m]->segs[n].length = buffer_list[i].size;
mr->mr.length += buffer_list[i].size;
n++;
if (n == IPATH_SEGSZ) {
m++;
n = 0;
}
}
ret = &mr->ibmr;
bail:
return ret;
}
/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
* @length: length of region to register
* @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region
* @udata: unused by the InfiniPath driver
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata)
{
struct ipath_mr *mr;
struct ib_umem *umem;
int n, m, entry;
struct scatterlist *sg;
struct ib_mr *ret;
if (length == 0) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
umem = ib_umem_get(pd->uobject->context, start, length,
mr_access_flags, 0);
if (IS_ERR(umem))
return (void *) umem;
n = umem->nmap;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
ib_umem_release(umem);
goto bail;
}
mr->mr.pd = pd;
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
mr->mr.offset = ib_umem_offset(umem);
mr->mr.access_flags = mr_access_flags;
mr->mr.max_segs = n;
mr->umem = umem;
m = 0;
n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
void *vaddr;
vaddr = page_address(sg_page(sg));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = umem->page_size;
n++;
if (n == IPATH_SEGSZ) {
m++;
n = 0;
}
}
ret = &mr->ibmr;
bail:
return ret;
}
/**
* ipath_dereg_mr - unregister and free a memory region
* @ibmr: the memory region to free
*
* Returns 0 on success.
*
* Note that this is called to free MRs created by ipath_get_dma_mr()
* or ipath_reg_user_mr().
*/
int ipath_dereg_mr(struct ib_mr *ibmr)
{
struct ipath_mr *mr = to_imr(ibmr);
int i;
ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
i = mr->mr.mapsz;
while (i) {
i--;
kfree(mr->mr.map[i]);
}
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
return 0;
}
/**
* ipath_alloc_fmr - allocate a fast memory region
* @pd: the protection domain for this memory region
* @mr_access_flags: access flags for this memory region
* @fmr_attr: fast memory region attributes
*
* Returns the memory region on success, otherwise returns an errno.
*/
struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr)
{
struct ipath_fmr *fmr;
int m, i = 0;
struct ib_fmr *ret;
/* Allocate struct plus pointers to first level page tables. */
m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
if (!fmr)
goto bail;
/* Allocate first level page tables. */
for (; i < m; i++) {
fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
GFP_KERNEL);
if (!fmr->mr.map[i])
goto bail;
}
fmr->mr.mapsz = m;
/*
* ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
* rkey.
*/
if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
goto bail;
fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
/*
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
fmr->mr.pd = pd;
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
fmr->mr.offset = 0;
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
fmr->page_shift = fmr_attr->page_shift;
ret = &fmr->ibfmr;
goto done;
bail:
while (i)
kfree(fmr->mr.map[--i]);
kfree(fmr);
ret = ERR_PTR(-ENOMEM);
done:
return ret;
}
/**
* ipath_map_phys_fmr - set up a fast memory region
* @ibmfr: the fast memory region to set up
* @page_list: the list of pages to associate with the fast memory region
* @list_len: the number of pages to associate with the fast memory region
* @iova: the virtual address of the start of the fast memory region
*
* This may be called from interrupt context.
*/
int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
int list_len, u64 iova)
{
struct ipath_fmr *fmr = to_ifmr(ibfmr);
struct ipath_lkey_table *rkt;
unsigned long flags;
int m, n, i;
u32 ps;
int ret;
if (list_len > fmr->mr.max_segs) {
ret = -EINVAL;
goto bail;
}
rkt = &to_idev(ibfmr->device)->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = iova;
fmr->mr.iova = iova;
ps = 1 << fmr->page_shift;
fmr->mr.length = list_len * ps;
m = 0;
n = 0;
ps = 1 << fmr->page_shift;
for (i = 0; i < list_len; i++) {
fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
fmr->mr.map[m]->segs[n].length = ps;
if (++n == IPATH_SEGSZ) {
m++;
n = 0;
}
}
spin_unlock_irqrestore(&rkt->lock, flags);
ret = 0;
bail:
return ret;
}
/**
* ipath_unmap_fmr - unmap fast memory regions
* @fmr_list: the list of fast memory regions to unmap
*
* Returns 0 on success.
*/
int ipath_unmap_fmr(struct list_head *fmr_list)
{
struct ipath_fmr *fmr;
struct ipath_lkey_table *rkt;
unsigned long flags;
list_for_each_entry(fmr, fmr_list, ibfmr.list) {
rkt = &to_idev(fmr->ibfmr.device)->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
spin_unlock_irqrestore(&rkt->lock, flags);
}
return 0;
}
/**
* ipath_dealloc_fmr - deallocate a fast memory region
* @ibfmr: the fast memory region to deallocate
*
* Returns 0 on success.
*/
int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
{
struct ipath_fmr *fmr = to_ifmr(ibfmr);
int i;
ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
i = fmr->mr.mapsz;
while (i)
kfree(fmr->mr.map[--i]);
kfree(fmr);
return 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,512 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _IPATH_REGISTERS_H
#define _IPATH_REGISTERS_H
/*
* This file should only be included by kernel source, and by the diags. It
* defines the registers, and their contents, for InfiniPath chips.
*/
/*
* These are the InfiniPath register and buffer bit definitions,
* that are visible to software, and needed only by the kernel
* and diag code. A few, that are visible to protocol and user
* code are in ipath_common.h. Some bits are specific
* to a given chip implementation, and have been moved to the
* chip-specific source file
*/
/* kr_revision bits */
#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
#define INFINIPATH_R_ARCH_MASK 0xFF
#define INFINIPATH_R_ARCH_SHIFT 16
#define INFINIPATH_R_SOFTWARE_MASK 0xFF
#define INFINIPATH_R_SOFTWARE_SHIFT 24
#define INFINIPATH_R_BOARDID_MASK 0xFF
#define INFINIPATH_R_BOARDID_SHIFT 32
/* kr_control bits */
#define INFINIPATH_C_FREEZEMODE 0x00000002
#define INFINIPATH_C_LINKENABLE 0x00000004
/* kr_sendctrl bits */
#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
#define INFINIPATH_S_UPDTHRESH_SHIFT 24
#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
#define IPATH_S_ABORT 0
#define IPATH_S_PIOINTBUFAVAIL 1
#define IPATH_S_PIOBUFAVAILUPD 2
#define IPATH_S_PIOENABLE 3
#define IPATH_S_SDMAINTENABLE 9
#define IPATH_S_SDMASINGLEDESCRIPTOR 10
#define IPATH_S_SDMAENABLE 11
#define IPATH_S_SDMAHALT 12
#define IPATH_S_DISARM 31
#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT)
#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL)
#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD)
#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE)
#define INFINIPATH_S_SDMAINTENABLE (1U << IPATH_S_SDMAINTENABLE)
#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
(1U << IPATH_S_SDMASINGLEDESCRIPTOR)
#define INFINIPATH_S_SDMAENABLE (1U << IPATH_S_SDMAENABLE)
#define INFINIPATH_S_SDMAHALT (1U << IPATH_S_SDMAHALT)
#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM)
/* kr_rcvctrl bits that are the same on multiple chips */
#define INFINIPATH_R_PORTENABLE_SHIFT 0
#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
/* kr_intstatus, kr_intclear, kr_intmask bits */
#define INFINIPATH_I_SDMAINT 0x8000000000000000ULL
#define INFINIPATH_I_SDMADISABLED 0x4000000000000000ULL
#define INFINIPATH_I_ERROR 0x0000000080000000ULL
#define INFINIPATH_I_SPIOSENT 0x0000000040000000ULL
#define INFINIPATH_I_SPIOBUFAVAIL 0x0000000020000000ULL
#define INFINIPATH_I_GPIO 0x0000000010000000ULL
#define INFINIPATH_I_JINT 0x0000000004000000ULL
/* kr_errorstatus, kr_errorclear, kr_errormask bits */
#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL
#define INFINIPATH_E_RVCRC 0x0000000000000002ULL
#define INFINIPATH_E_RICRC 0x0000000000000004ULL
#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL
#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL
#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL
#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL
#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL
#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL
#define INFINIPATH_E_REBP 0x0000000000000200ULL
#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL
#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL
#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL
#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL
#define INFINIPATH_E_RBADTID 0x0000000000004000ULL
#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL
#define INFINIPATH_E_RHDR 0x0000000000010000ULL
#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL
#define INFINIPATH_E_SENDSPECIALTRIGGER 0x0000000008000000ULL
#define INFINIPATH_E_SDMADISABLED 0x0000000010000000ULL
#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL
#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL
#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL
#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL
#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL
#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL
#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL
#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL
#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL
#define INFINIPATH_E_SENDBUFMISUSE 0x0000004000000000ULL
#define INFINIPATH_E_SDMAGENMISMATCH 0x0000008000000000ULL
#define INFINIPATH_E_SDMAOUTOFBOUND 0x0000010000000000ULL
#define INFINIPATH_E_SDMATAILOUTOFBOUND 0x0000020000000000ULL
#define INFINIPATH_E_SDMABASE 0x0000040000000000ULL
#define INFINIPATH_E_SDMA1STDESC 0x0000080000000000ULL
#define INFINIPATH_E_SDMARPYTAG 0x0000100000000000ULL
#define INFINIPATH_E_SDMADWEN 0x0000200000000000ULL
#define INFINIPATH_E_SDMAMISSINGDW 0x0000400000000000ULL
#define INFINIPATH_E_SDMAUNEXPDATA 0x0000800000000000ULL
#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL
#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL
#define INFINIPATH_E_RESET 0x0004000000000000ULL
#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
#define INFINIPATH_E_SDMADESCADDRMISALIGN 0x0010000000000000ULL
#define INFINIPATH_E_INVALIDEEPCMD 0x0020000000000000ULL
/*
* this is used to print "common" packet errors only when the
* __IPATH_ERRPKTDBG bit is set in ipath_debug.
*/
#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
| INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
| INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
| INFINIPATH_E_REBP )
/* Convenience for decoding Send DMA errors */
#define INFINIPATH_E_SDMAERRS ( \
INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
INFINIPATH_E_SDMAUNEXPDATA | \
INFINIPATH_E_SDMADESCADDRMISALIGN | \
INFINIPATH_E_SDMADISABLED | \
INFINIPATH_E_SENDBUFMISUSE)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
* RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
* bit 4: flag buffer, 5: datainfo, 6: header info */
#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
/* waldo specific -- find the rest in ipath_6110.c */
#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
/* kr_hwdiagctrl bits */
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL
#define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL
#define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL
#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
/* kr_ibcctrl bits */
#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
/* cycle through TS1/TS2 till OK */
#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
/* wait for TS1, then go on */
#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */
#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
#define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL
#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
/* kr_ibcstatus bits */
#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
#define INFINIPATH_IBCS_TXREADY 0x40000000
#define INFINIPATH_IBCS_TXCREDITOK 0x80000000
/* link training states (shift by
INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
#define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00
#define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01
#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02
#define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03
#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04
#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05
#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08
#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09
#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a
#define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b
#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c
#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e
#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f
/* link state machine states (shift by ibcs_ls_shift) */
#define INFINIPATH_IBCS_L_STATE_DOWN 0x0
#define INFINIPATH_IBCS_L_STATE_INIT 0x1
#define INFINIPATH_IBCS_L_STATE_ARM 0x2
#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3
#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4
/* kr_extstatus bits */
#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
/* kr_extctrl bits */
#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
#define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL
#define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL
#define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL
#define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL
#define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL
#define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL
#define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL
#define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL
#define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL
#define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL
#define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL
#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL
#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL
/* kr_partitionkey bits */
#define INFINIPATH_PKEY_SIZE 16
#define INFINIPATH_PKEY_MASK 0xFFFF
#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
/* kr_serdesconfig0 bits */
#define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */
#define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */
/* tx idle enables (per lane) */
#define INFINIPATH_SERDC0_TXIDLE 0xF000ULL
/* rx detect enables (per lane) */
#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL
/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
#define INFINIPATH_XGXS_RX_POL_SHIFT 19
#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
/*
* IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
* PIO send buffers. This is well beyond anything currently
* defined in the InfiniBand spec.
*/
#define IPATH_PIO_MAXIBHDR 128
typedef u64 ipath_err_t;
/* The following change with the type of device, so
* need to be part of the ipath_devdata struct, or
* we could have problems plugging in devices of
* different types (e.g. one HT, one PCIE)
* in one system, to be managed by one driver.
* On the other hand, this file is may also be included
* by other code, so leave the declarations here
* temporarily. Minor footprint issue if common-model
* linker used, none if C89+ linker used.
*/
/* mask of defined bits for various registers */
extern u64 infinipath_i_bitsextant;
extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
/* masks that are different in various chips, or only exist in some chips */
extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
/*
* These are the infinipath general register numbers (not offsets).
* The kernel registers are used directly, those beyond the kernel
* registers are calculated from one of the base registers. The use of
* an integer type doesn't allow type-checking as thorough as, say,
* an enum but allows for better hiding of chip differences.
*/
typedef const u16 ipath_kreg, /* infinipath general registers */
ipath_creg, /* infinipath counter registers */
ipath_sreg; /* kernel-only, infinipath send registers */
/*
* These are the chip registers common to all infinipath chips, and
* used both by the kernel and the diagnostics or other user code.
* They are all implemented such that 64 bit accesses work.
* Some implement no more than 32 bits. Because 64 bit reads
* require 2 HT cmds on opteron, we access those with 32 bit
* reads for efficiency (they are written as 64 bits, since
* the extra 32 bits are nearly free on writes, and it slightly reduces
* complexity). The rest are all accessed as 64 bits.
*/
struct ipath_kregs {
/* These are the 32 bit group */
ipath_kreg kr_control;
ipath_kreg kr_counterregbase;
ipath_kreg kr_intmask;
ipath_kreg kr_intstatus;
ipath_kreg kr_pagealign;
ipath_kreg kr_portcnt;
ipath_kreg kr_rcvtidbase;
ipath_kreg kr_rcvtidcnt;
ipath_kreg kr_rcvegrbase;
ipath_kreg kr_rcvegrcnt;
ipath_kreg kr_scratch;
ipath_kreg kr_sendctrl;
ipath_kreg kr_sendpiobufbase;
ipath_kreg kr_sendpiobufcnt;
ipath_kreg kr_sendpiosize;
ipath_kreg kr_sendregbase;
ipath_kreg kr_userregbase;
/* These are the 64 bit group */
ipath_kreg kr_debugport;
ipath_kreg kr_debugportselect;
ipath_kreg kr_errorclear;
ipath_kreg kr_errormask;
ipath_kreg kr_errorstatus;
ipath_kreg kr_extctrl;
ipath_kreg kr_extstatus;
ipath_kreg kr_gpio_clear;
ipath_kreg kr_gpio_mask;
ipath_kreg kr_gpio_out;
ipath_kreg kr_gpio_status;
ipath_kreg kr_hwdiagctrl;
ipath_kreg kr_hwerrclear;
ipath_kreg kr_hwerrmask;
ipath_kreg kr_hwerrstatus;
ipath_kreg kr_ibcctrl;
ipath_kreg kr_ibcstatus;
ipath_kreg kr_intblocked;
ipath_kreg kr_intclear;
ipath_kreg kr_interruptconfig;
ipath_kreg kr_mdio;
ipath_kreg kr_partitionkey;
ipath_kreg kr_rcvbthqp;
ipath_kreg kr_rcvbufbase;
ipath_kreg kr_rcvbufsize;
ipath_kreg kr_rcvctrl;
ipath_kreg kr_rcvhdrcnt;
ipath_kreg kr_rcvhdrentsize;
ipath_kreg kr_rcvhdrsize;
ipath_kreg kr_rcvintmembase;
ipath_kreg kr_rcvintmemsize;
ipath_kreg kr_revision;
ipath_kreg kr_sendbuffererror;
ipath_kreg kr_sendpioavailaddr;
ipath_kreg kr_serdesconfig0;
ipath_kreg kr_serdesconfig1;
ipath_kreg kr_serdesstatus;
ipath_kreg kr_txintmembase;
ipath_kreg kr_txintmemsize;
ipath_kreg kr_xgxsconfig;
ipath_kreg kr_ibpllcfg;
/* use these two (and the following N ports) only with
* ipath_k*_kreg64_port(); not *kreg64() */
ipath_kreg kr_rcvhdraddr;
ipath_kreg kr_rcvhdrtailaddr;
/* remaining registers are not present on all types of infinipath
chips */
ipath_kreg kr_rcvpktledcnt;
ipath_kreg kr_pcierbuftestreg0;
ipath_kreg kr_pcierbuftestreg1;
ipath_kreg kr_pcieq0serdesconfig0;
ipath_kreg kr_pcieq0serdesconfig1;
ipath_kreg kr_pcieq0serdesstatus;
ipath_kreg kr_pcieq1serdesconfig0;
ipath_kreg kr_pcieq1serdesconfig1;
ipath_kreg kr_pcieq1serdesstatus;
ipath_kreg kr_hrtbt_guid;
ipath_kreg kr_ibcddrctrl;
ipath_kreg kr_ibcddrstatus;
ipath_kreg kr_jintreload;
/* send dma related regs */
ipath_kreg kr_senddmabase;
ipath_kreg kr_senddmalengen;
ipath_kreg kr_senddmatail;
ipath_kreg kr_senddmahead;
ipath_kreg kr_senddmaheadaddr;
ipath_kreg kr_senddmabufmask0;
ipath_kreg kr_senddmabufmask1;
ipath_kreg kr_senddmabufmask2;
ipath_kreg kr_senddmastatus;
/* SerDes related regs (IBA7220-only) */
ipath_kreg kr_ibserdesctrl;
ipath_kreg kr_ib_epbacc;
ipath_kreg kr_ib_epbtrans;
ipath_kreg kr_pcie_epbacc;
ipath_kreg kr_pcie_epbtrans;
ipath_kreg kr_ib_ddsrxeq;
};
struct ipath_cregs {
ipath_creg cr_badformatcnt;
ipath_creg cr_erricrccnt;
ipath_creg cr_errlinkcnt;
ipath_creg cr_errlpcrccnt;
ipath_creg cr_errpkey;
ipath_creg cr_errrcvflowctrlcnt;
ipath_creg cr_err_rlencnt;
ipath_creg cr_errslencnt;
ipath_creg cr_errtidfull;
ipath_creg cr_errtidvalid;
ipath_creg cr_errvcrccnt;
ipath_creg cr_ibstatuschange;
ipath_creg cr_intcnt;
ipath_creg cr_invalidrlencnt;
ipath_creg cr_invalidslencnt;
ipath_creg cr_lbflowstallcnt;
ipath_creg cr_iblinkdowncnt;
ipath_creg cr_iblinkerrrecovcnt;
ipath_creg cr_ibsymbolerrcnt;
ipath_creg cr_pktrcvcnt;
ipath_creg cr_pktrcvflowctrlcnt;
ipath_creg cr_pktsendcnt;
ipath_creg cr_pktsendflowcnt;
ipath_creg cr_portovflcnt;
ipath_creg cr_rcvebpcnt;
ipath_creg cr_rcvovflcnt;
ipath_creg cr_rxdroppktcnt;
ipath_creg cr_senddropped;
ipath_creg cr_sendstallcnt;
ipath_creg cr_sendunderruncnt;
ipath_creg cr_unsupvlcnt;
ipath_creg cr_wordrcvcnt;
ipath_creg cr_wordsendcnt;
ipath_creg cr_vl15droppedpktcnt;
ipath_creg cr_rxotherlocalphyerrcnt;
ipath_creg cr_excessbufferovflcnt;
ipath_creg cr_locallinkintegrityerrcnt;
ipath_creg cr_rxvlerrcnt;
ipath_creg cr_rxdlidfltrcnt;
ipath_creg cr_psstat;
ipath_creg cr_psstart;
ipath_creg cr_psinterval;
ipath_creg cr_psrcvdatacount;
ipath_creg cr_psrcvpktscount;
ipath_creg cr_psxmitdatacount;
ipath_creg cr_psxmitpktscount;
ipath_creg cr_psxmitwaitcount;
};
#endif /* _IPATH_REGISTERS_H */

View File

@@ -1,734 +0,0 @@
/*
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/sched.h>
#include <linux/spinlock.h>
#include "ipath_verbs.h"
#include "ipath_kernel.h"
/*
* Convert the AETH RNR timeout code into the number of milliseconds.
*/
const u32 ib_ipath_rnr_table[32] = {
656, /* 0 */
1, /* 1 */
1, /* 2 */
1, /* 3 */
1, /* 4 */
1, /* 5 */
1, /* 6 */
1, /* 7 */
1, /* 8 */
1, /* 9 */
1, /* A */
1, /* B */
1, /* C */
1, /* D */
2, /* E */
2, /* F */
3, /* 10 */
4, /* 11 */
6, /* 12 */
8, /* 13 */
11, /* 14 */
16, /* 15 */
21, /* 16 */
31, /* 17 */
41, /* 18 */
62, /* 19 */
82, /* 1A */
123, /* 1B */
164, /* 1C */
246, /* 1D */
328, /* 1E */
492 /* 1F */
};
/**
* ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
* @qp: the QP
*
* Called with the QP s_lock held and interrupts disabled.
* XXX Use a simple list for now. We might need a priority
* queue if we have lots of QPs waiting for RNR timeouts
* but that should be rare.
*/
void ipath_insert_rnr_queue(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
/* We already did a spin_lock_irqsave(), so just use spin_lock */
spin_lock(&dev->pending_lock);
if (list_empty(&dev->rnrwait))
list_add(&qp->timerwait, &dev->rnrwait);
else {
struct list_head *l = &dev->rnrwait;
struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
timerwait);
while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
qp->s_rnr_timeout -= nqp->s_rnr_timeout;
l = l->next;
if (l->next == &dev->rnrwait) {
nqp = NULL;
break;
}
nqp = list_entry(l->next, struct ipath_qp,
timerwait);
}
if (nqp)
nqp->s_rnr_timeout -= qp->s_rnr_timeout;
list_add(&qp->timerwait, l);
}
spin_unlock(&dev->pending_lock);
}
/**
* ipath_init_sge - Validate a RWQE and fill in the SGE state
* @qp: the QP
*
* Return 1 if OK.
*/
int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
u32 *lengthp, struct ipath_sge_state *ss)
{
int i, j, ret;
struct ib_wc wc;
*lengthp = 0;
for (i = j = 0; i < wqe->num_sge; i++) {
if (wqe->sg_list[i].length == 0)
continue;
/* Check LKEY */
if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
*lengthp += wqe->sg_list[i].length;
j++;
}
ss->num_sge = j;
ret = 1;
goto bail;
bad_lkey:
memset(&wc, 0, sizeof(wc));
wc.wr_id = wqe->wr_id;
wc.status = IB_WC_LOC_PROT_ERR;
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
/* Signal solicited completion event. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
ret = 0;
bail:
return ret;
}
/**
* ipath_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
* @wr_id_only: update qp->r_wr_id only, not qp->r_sge
*
* Return 0 if no RWQE is available, otherwise return 1.
*
* Can be called from interrupt level.
*/
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
{
unsigned long flags;
struct ipath_rq *rq;
struct ipath_rwq *wq;
struct ipath_srq *srq;
struct ipath_rwqe *wqe;
void (*handler)(struct ib_event *, void *);
u32 tail;
int ret;
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
rq = &srq->rq;
} else {
srq = NULL;
handler = NULL;
rq = &qp->r_rq;
}
spin_lock_irqsave(&rq->lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
ret = 0;
goto unlock;
}
wq = rq->wq;
tail = wq->tail;
/* Validate tail before using it since it is user writable. */
if (tail >= rq->size)
tail = 0;
do {
if (unlikely(tail == wq->head)) {
ret = 0;
goto unlock;
}
/* Make sure entry is read after head index is read. */
smp_rmb();
wqe = get_rwqe_ptr(rq, tail);
if (++tail >= rq->size)
tail = 0;
if (wr_id_only)
break;
qp->r_sge.sg_list = qp->r_sg_list;
} while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
qp->r_wr_id = wqe->wr_id;
wq->tail = tail;
ret = 1;
set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
if (handler) {
u32 n;
/*
* validate head pointer value and compute
* the number of remaining WQEs.
*/
n = wq->head;
if (n >= rq->size)
n = 0;
if (n < tail)
n += rq->size - tail;
else
n -= tail;
if (n < srq->limit) {
struct ib_event ev;
srq->limit = 0;
spin_unlock_irqrestore(&rq->lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
handler(&ev, srq->ibsrq.srq_context);
goto bail;
}
}
unlock:
spin_unlock_irqrestore(&rq->lock, flags);
bail:
return ret;
}
/**
* ipath_ruc_loopback - handle UC and RC lookback requests
* @sqp: the sending QP
*
* This is called from ipath_do_send() to
* forward a WQE addressed to the same HCA.
* Note that although we are single threaded due to the tasklet, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
*/
static void ipath_ruc_loopback(struct ipath_qp *sqp)
{
struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
struct ipath_qp *qp;
struct ipath_swqe *wqe;
struct ipath_sge *sge;
unsigned long flags;
struct ib_wc wc;
u64 sdata;
atomic64_t *maddr;
enum ib_wc_status send_status;
/*
* Note that we check the responder QP state after
* checking the requester's state.
*/
qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
spin_lock_irqsave(&sqp->s_lock, flags);
/* Return if we are already busy processing a work request. */
if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
goto unlock;
sqp->s_flags |= IPATH_S_BUSY;
again:
if (sqp->s_last == sqp->s_head)
goto clr_busy;
wqe = get_swqe_ptr(sqp, sqp->s_last);
/* Return if it is not OK to start a new work reqeust. */
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
goto clr_busy;
/* We are in the error state, flush the work request. */
send_status = IB_WC_WR_FLUSH_ERR;
goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
* We increment s_cur to indicate s_last is in progress.
*/
if (sqp->s_last == sqp->s_cur) {
if (++sqp->s_cur >= sqp->s_size)
sqp->s_cur = 0;
}
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++;
/*
* For RC, the requester would timeout and retry so
* shortcut the timeouts and just signal too many retries.
*/
if (sqp->ibqp.qp_type == IB_QPT_RC)
send_status = IB_WC_RETRY_EXC_ERR;
else
send_status = IB_WC_SUCCESS;
goto serr;
}
memset(&wc, 0, sizeof wc);
send_status = IB_WC_SUCCESS;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
sqp->s_sge.num_sge = wqe->wr.num_sge;
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
if (!ipath_get_rwqe(qp, 0))
goto rnr_nak;
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
if (!ipath_get_rwqe(qp, 1))
goto rnr_nak;
/* FALLTHROUGH */
case IB_WR_RDMA_WRITE:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
if (wqe->length == 0)
break;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
break;
case IB_WR_RDMA_READ:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->wr.num_sge;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.atomic.remote_addr,
wqe->wr.wr.atomic.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
sdata = wqe->wr.wr.atomic.compare_add;
*(u64 *) sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
sdata, wqe->wr.wr.atomic.swap);
goto send_comp;
default:
send_status = IB_WC_LOC_QP_OP_ERR;
goto serr;
}
sge = &sqp->s_sge.sge;
while (sqp->s_len) {
u32 len = sqp->s_len;
if (len > sge->length)
len = sge->length;
if (len > sge->sge_length)
len = sge->sge_length;
BUG_ON(len == 0);
ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr != NULL) {
if (++sge->n >= IPATH_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
sqp->s_len -= len;
}
if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
wc.opcode = IB_WC_RECV;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_lock_irqsave(&sqp->s_lock, flags);
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
ipath_send_complete(sqp, wqe, send_status);
goto again;
rnr_nak:
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
/*
* Note: we don't need the s_lock held since the BUSY flag
* makes this single threaded.
*/
if (sqp->s_rnr_retry == 0) {
send_status = IB_WC_RNR_RETRY_EXC_ERR;
goto serr;
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
goto clr_busy;
sqp->s_flags |= IPATH_S_WAITING;
dev->n_rnr_naks++;
sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
ipath_insert_rnr_queue(sqp);
goto clr_busy;
inv_err:
send_status = IB_WC_REM_INV_REQ_ERR;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
acc_err:
send_status = IB_WC_REM_ACCESS_ERR;
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
ipath_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
ipath_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
sqp->s_flags &= ~IPATH_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
ev.device = sqp->ibqp.device;
ev.element.qp = &sqp->ibqp;
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
}
goto done;
}
clr_busy:
sqp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
if (qp && atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
}
static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
{
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
qp->ibqp.qp_type == IB_QPT_SMI) {
unsigned long flags;
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
}
}
/**
* ipath_no_bufs_available - tell the layer driver we need buffers
* @qp: the QP that caused the problem
* @dev: the device we ran out of buffers on
*
* Called when we run out of PIO buffers.
* If we are now in the error state, return zero to flush the
* send work request.
*/
static int ipath_no_bufs_available(struct ipath_qp *qp,
struct ipath_ibdev *dev)
{
unsigned long flags;
int ret = 1;
/*
* Note that as soon as want_buffer() is called and
* possibly before it returns, ipath_ib_piobufavail()
* could be called. Therefore, put QP on the piowait list before
* enabling the PIO avail interrupt.
*/
spin_lock_irqsave(&qp->s_lock, flags);
if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
dev->n_piowait++;
qp->s_flags |= IPATH_S_WAITING;
qp->s_flags &= ~IPATH_S_BUSY;
spin_lock(&dev->pending_lock);
if (list_empty(&qp->piowait))
list_add_tail(&qp->piowait, &dev->piowait);
spin_unlock(&dev->pending_lock);
} else
ret = 0;
spin_unlock_irqrestore(&qp->s_lock, flags);
if (ret)
want_buffer(dev->dd, qp);
return ret;
}
/**
* ipath_make_grh - construct a GRH header
* @dev: a pointer to the ipath device
* @hdr: a pointer to the GRH header being constructed
* @grh: the global route address to send to
* @hwords: the number of 32 bit words of header being sent
* @nwords: the number of 32 bit words of data being sent
*
* Return the size of the header in 32 bit words.
*/
u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((6 << 28) |
(grh->traffic_class << 20) |
grh->flow_label);
hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
/* next_hdr is defined by C8-7 in ch. 8.4.1 */
hdr->next_hdr = 0x1B;
hdr->hop_limit = grh->hop_limit;
/* The SGID is 32-bit aligned. */
hdr->sgid.global.subnet_prefix = dev->gid_prefix;
hdr->sgid.global.interface_id = dev->dd->ipath_guid;
hdr->dgid = grh->dgid;
/* GRH header size in 32-bit words. */
return sizeof(struct ib_grh) / sizeof(u32);
}
void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
struct ipath_other_headers *ohdr,
u32 bth0, u32 bth2)
{
u16 lrh0;
u32 nwords;
u32 extra_bytes;
/* Construct the header. */
extra_bytes = -qp->s_cur_size & 3;
nwords = (qp->s_cur_size + extra_bytes) >> 2;
lrh0 = IPATH_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
&qp->remote_ah_attr.grh,
qp->s_hdrwords, nwords);
lrh0 = IPATH_LRH_GRH;
}
lrh0 |= qp->remote_ah_attr.sl << 4;
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
qp->remote_ah_attr.src_path_bits);
bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(bth2);
}
/**
* ipath_do_send - perform a send on a QP
* @data: contains a pointer to the QP
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP (tasklet).
* Otherwise, two threads could send packets out of order.
*/
void ipath_do_send(unsigned long data)
{
struct ipath_qp *qp = (struct ipath_qp *)data;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int (*make_req)(struct ipath_qp *qp);
unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC) &&
qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
ipath_ruc_loopback(qp);
goto bail;
}
if (qp->ibqp.qp_type == IB_QPT_RC)
make_req = ipath_make_rc_req;
else if (qp->ibqp.qp_type == IB_QPT_UC)
make_req = ipath_make_uc_req;
else
make_req = ipath_make_ud_req;
spin_lock_irqsave(&qp->s_lock, flags);
/* Return if we are already busy processing a work request. */
if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
goto bail;
}
qp->s_flags |= IPATH_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, flags);
again:
/* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) {
/*
* If no PIO bufs are available, return. An interrupt will
* call ipath_ib_piobufavail() when one is available.
*/
if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
qp->s_cur_sge, qp->s_cur_size)) {
if (ipath_no_bufs_available(qp, dev))
goto bail;
}
dev->n_unicast_xmit++;
/* Record that we sent the packet and s_hdr is empty. */
qp->s_hdrwords = 0;
}
if (make_req(qp))
goto again;
bail:;
}
/*
* This should be called with s_lock held.
*/
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
return;
/* See ch. 11.2.4.1 and 10.7.3.1 */
if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
status != IB_WC_SUCCESS) {
struct ib_wc wc;
memset(&wc, 0, sizeof wc);
wc.wr_id = wqe->wr.wr_id;
wc.status = status;
wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
wc.qp = &qp->ibqp;
if (status == IB_WC_SUCCESS)
wc.byte_len = wqe->length;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
status != IB_WC_SUCCESS);
}
old_last = last = qp->s_last;
if (++last >= qp->s_size)
last = 0;
qp->s_last = last;
if (qp->s_cur == old_last)
qp->s_cur = last;
if (qp->s_tail == old_last)
qp->s_tail = last;
if (qp->state == IB_QPS_SQD && last == qp->s_cur)
qp->s_draining = 0;
}

View File

@@ -1,818 +0,0 @@
/*
* Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/spinlock.h>
#include <linux/gfp.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
#include "ipath_common.h"
#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
static void vl15_watchdog_enq(struct ipath_devdata *dd)
{
/* ipath_sdma_lock must already be held */
if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
unsigned long interval = (HZ + 19) / 20;
dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
add_timer(&dd->ipath_sdma_vl15_timer);
}
}
static void vl15_watchdog_deq(struct ipath_devdata *dd)
{
/* ipath_sdma_lock must already be held */
if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
unsigned long interval = (HZ + 19) / 20;
mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
} else {
del_timer(&dd->ipath_sdma_vl15_timer);
}
}
static void vl15_watchdog_timeout(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
ipath_dbg("vl15 watchdog timeout - clearing\n");
ipath_cancel_sends(dd, 1);
ipath_hol_down(dd);
} else {
ipath_dbg("vl15 watchdog timeout - "
"condition already cleared\n");
}
}
static void unmap_desc(struct ipath_devdata *dd, unsigned head)
{
__le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
u64 desc[2];
dma_addr_t addr;
size_t len;
desc[0] = le64_to_cpu(descqp[0]);
desc[1] = le64_to_cpu(descqp[1]);
addr = (desc[1] << 32) | (desc[0] >> 32);
len = (desc[0] >> 14) & (0x7ffULL << 2);
dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
}
/*
* ipath_sdma_lock should be locked before calling this.
*/
int ipath_sdma_make_progress(struct ipath_devdata *dd)
{
struct list_head *lp = NULL;
struct ipath_sdma_txreq *txp = NULL;
u16 dmahead;
u16 start_idx = 0;
int progress = 0;
if (!list_empty(&dd->ipath_sdma_activelist)) {
lp = dd->ipath_sdma_activelist.next;
txp = list_entry(lp, struct ipath_sdma_txreq, list);
start_idx = txp->start_idx;
}
/*
* Read the SDMA head register in order to know that the
* interrupt clear has been written to the chip.
* Otherwise, we may not get an interrupt for the last
* descriptor in the queue.
*/
dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
/* sanity check return value for error handling (chip reset, etc.) */
if (dmahead >= dd->ipath_sdma_descq_cnt)
goto done;
while (dd->ipath_sdma_descq_head != dmahead) {
if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
dd->ipath_sdma_descq_head == start_idx) {
unmap_desc(dd, dd->ipath_sdma_descq_head);
start_idx++;
if (start_idx == dd->ipath_sdma_descq_cnt)
start_idx = 0;
}
/* increment free count and head */
dd->ipath_sdma_descq_removed++;
if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
dd->ipath_sdma_descq_head = 0;
if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
/* move to notify list */
if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
vl15_watchdog_deq(dd);
list_move_tail(lp, &dd->ipath_sdma_notifylist);
if (!list_empty(&dd->ipath_sdma_activelist)) {
lp = dd->ipath_sdma_activelist.next;
txp = list_entry(lp, struct ipath_sdma_txreq,
list);
start_idx = txp->start_idx;
} else {
lp = NULL;
txp = NULL;
}
}
progress = 1;
}
if (progress)
tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
done:
return progress;
}
static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
{
struct ipath_sdma_txreq *txp, *txp_next;
list_for_each_entry_safe(txp, txp_next, list, list) {
list_del_init(&txp->list);
if (txp->callback)
(*txp->callback)(txp->callback_cookie,
txp->callback_status);
}
}
static void sdma_notify_taskbody(struct ipath_devdata *dd)
{
unsigned long flags;
struct list_head list;
INIT_LIST_HEAD(&list);
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
list_splice_init(&dd->ipath_sdma_notifylist, &list);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
ipath_sdma_notify(dd, &list);
/*
* The IB verbs layer needs to see the callback before getting
* the call to ipath_ib_piobufavail() because the callback
* handles releasing resources the next send will need.
* Otherwise, we could do these calls in
* ipath_sdma_make_progress().
*/
ipath_ib_piobufavail(dd->verbs_dev);
}
static void sdma_notify_task(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
sdma_notify_taskbody(dd);
}
static void dump_sdma_state(struct ipath_devdata *dd)
{
unsigned long reg;
reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
}
static void sdma_abort_task(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
u64 status;
unsigned long flags;
if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
return;
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
/* nothing to do */
if (status == IPATH_SDMA_ABORT_NONE)
goto unlock;
/* ipath_sdma_abort() is done, waiting for interrupt */
if (status == IPATH_SDMA_ABORT_DISARMED) {
if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
goto resched_noprint;
/* give up, intr got lost somewhere */
ipath_dbg("give up waiting for SDMADISABLED intr\n");
__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
status = IPATH_SDMA_ABORT_ABORTED;
}
/* everything is stopped, time to clean up and restart */
if (status == IPATH_SDMA_ABORT_ABORTED) {
struct ipath_sdma_txreq *txp, *txpnext;
u64 hwstatus;
int notify = 0;
hwstatus = ipath_read_kreg64(dd,
dd->ipath_kregs->kr_senddmastatus);
if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
IPATH_SDMA_STATUS_ABORT_IN_PROG |
IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
!(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
if (dd->ipath_sdma_reset_wait > 0) {
/* not done shutting down sdma */
--dd->ipath_sdma_reset_wait;
goto resched;
}
ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
"status after SDMA reset, continuing\n");
dump_sdma_state(dd);
}
/* dequeue all "sent" requests */
list_for_each_entry_safe(txp, txpnext,
&dd->ipath_sdma_activelist, list) {
txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
vl15_watchdog_deq(dd);
list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
notify = 1;
}
if (notify)
tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
/* reset our notion of head and tail */
dd->ipath_sdma_descq_tail = 0;
dd->ipath_sdma_descq_head = 0;
dd->ipath_sdma_head_dma[0] = 0;
dd->ipath_sdma_generation = 0;
dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
/* Reset SendDmaLenGen */
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
(u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
/* done with sdma state for a bit */
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
/*
* Don't restart sdma here (with the exception
* below). Wait until link is up to ACTIVE. VL15 MADs
* used to bring the link up use PIO, and multiple link
* transitions otherwise cause the sdma engine to be
* stopped and started multiple times.
* The disable is done here, including the shadow,
* so the state is kept consistent.
* See ipath_restart_sdma() for the actual starting
* of sdma.
*/
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
/* make sure I see next message */
dd->ipath_sdma_abort_jiffies = 0;
/*
* Not everything that takes SDMA offline is a link
* status change. If the link was up, restart SDMA.
*/
if (dd->ipath_flags & IPATH_LINKACTIVE)
ipath_restart_sdma(dd);
goto done;
}
resched:
/*
* for now, keep spinning
* JAG - this is bad to just have default be a loop without
* state change
*/
if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
ipath_dbg("looping with status 0x%08lx\n",
dd->ipath_sdma_status);
dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
}
resched_noprint:
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
return;
unlock:
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
done:
return;
}
/*
* This is called from interrupt context.
*/
void ipath_sdma_intr(struct ipath_devdata *dd)
{
unsigned long flags;
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
(void) ipath_sdma_make_progress(dd);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
}
static int alloc_sdma(struct ipath_devdata *dd)
{
int ret = 0;
/* Allocate memory for SendDMA descriptor FIFO */
dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
if (!dd->ipath_sdma_descq) {
ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
"FIFO memory\n");
ret = -ENOMEM;
goto done;
}
dd->ipath_sdma_descq_cnt =
SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
/* Allocate memory for DMA of head register to memory */
dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
if (!dd->ipath_sdma_head_dma) {
ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
ret = -ENOMEM;
goto cleanup_descq;
}
dd->ipath_sdma_head_dma[0] = 0;
init_timer(&dd->ipath_sdma_vl15_timer);
dd->ipath_sdma_vl15_timer.function = vl15_watchdog_timeout;
dd->ipath_sdma_vl15_timer.data = (unsigned long)dd;
atomic_set(&dd->ipath_sdma_vl15_count, 0);
goto done;
cleanup_descq:
dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
(void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
dd->ipath_sdma_descq = NULL;
dd->ipath_sdma_descq_phys = 0;
done:
return ret;
}
int setup_sdma(struct ipath_devdata *dd)
{
int ret = 0;
unsigned i, n;
u64 tmp64;
u64 senddmabufmask[3] = { 0 };
unsigned long flags;
ret = alloc_sdma(dd);
if (ret)
goto done;
if (!dd->ipath_sdma_descq) {
ipath_dev_err(dd, "SendDMA memory not allocated\n");
goto done;
}
/*
* Set initial status as if we had been up, then gone down.
* This lets initial start on transition to ACTIVE be the
* same as restart after link flap.
*/
dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
dd->ipath_sdma_abort_jiffies = 0;
dd->ipath_sdma_generation = 0;
dd->ipath_sdma_descq_tail = 0;
dd->ipath_sdma_descq_head = 0;
dd->ipath_sdma_descq_removed = 0;
dd->ipath_sdma_descq_added = 0;
/* Set SendDmaBase */
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
dd->ipath_sdma_descq_phys);
/* Set SendDmaLenGen */
tmp64 = dd->ipath_sdma_descq_cnt;
tmp64 |= 1<<18; /* enable generation checking */
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
/* Set SendDmaTail */
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
dd->ipath_sdma_descq_tail);
/* Set SendDmaHeadAddr */
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
dd->ipath_sdma_head_phys);
/*
* Reserve all the former "kernel" piobufs, using high number range
* so we get as many 4K buffers as possible
*/
n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
ipath_chg_pioavailkernel(dd, i, n - i , 0);
for (; i < n; ++i) {
unsigned word = i / 64;
unsigned bit = i & 63;
BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
senddmabufmask[0]);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
senddmabufmask[1]);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
senddmabufmask[2]);
INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
(unsigned long) dd);
tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
(unsigned long) dd);
/*
* No use to turn on SDMA here, as link is probably not ACTIVE
* Just mark it RUNNING and enable the interrupt, and let the
* ipath_restart_sdma() on link transition to ACTIVE actually
* enable it.
*/
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
__set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
done:
return ret;
}
void teardown_sdma(struct ipath_devdata *dd)
{
struct ipath_sdma_txreq *txp, *txpnext;
unsigned long flags;
dma_addr_t sdma_head_phys = 0;
dma_addr_t sdma_descq_phys = 0;
void *sdma_descq = NULL;
void *sdma_head_dma = NULL;
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
__clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
__set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
__set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
tasklet_kill(&dd->ipath_sdma_abort_task);
tasklet_kill(&dd->ipath_sdma_notify_task);
/* turn off sdma */
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
/* dequeue all "sent" requests */
list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
list) {
txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
vl15_watchdog_deq(dd);
list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
}
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
sdma_notify_taskbody(dd);
del_timer_sync(&dd->ipath_sdma_vl15_timer);
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
dd->ipath_sdma_abort_jiffies = 0;
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
if (dd->ipath_sdma_head_dma) {
sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
sdma_head_phys = dd->ipath_sdma_head_phys;
dd->ipath_sdma_head_dma = NULL;
dd->ipath_sdma_head_phys = 0;
}
if (dd->ipath_sdma_descq) {
sdma_descq = dd->ipath_sdma_descq;
sdma_descq_phys = dd->ipath_sdma_descq_phys;
dd->ipath_sdma_descq = NULL;
dd->ipath_sdma_descq_phys = 0;
}
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
if (sdma_head_dma)
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
sdma_head_dma, sdma_head_phys);
if (sdma_descq)
dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
sdma_descq, sdma_descq_phys);
}
/*
* [Re]start SDMA, if we use it, and it's not already OK.
* This is called on transition to link ACTIVE, either the first or
* subsequent times.
*/
void ipath_restart_sdma(struct ipath_devdata *dd)
{
unsigned long flags;
int needed = 1;
if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
goto bail;
/*
* First, make sure we should, which is to say,
* check that we are "RUNNING" (not in teardown)
* and not "SHUTDOWN"
*/
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
|| test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
needed = 0;
else {
__clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
__clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
__clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
}
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
if (!needed) {
ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
dd->ipath_sdma_status);
goto bail;
}
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
/*
* First clear, just to be safe. Enable is only done
* in chip on 0->1 transition
*/
dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
/* notify upper layers */
ipath_ib_piobufavail(dd->verbs_dev);
bail:
return;
}
static inline void make_sdma_desc(struct ipath_devdata *dd,
u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
{
WARN_ON(addr & 3);
/* SDmaPhyAddr[47:32] */
sdmadesc[1] = addr >> 32;
/* SDmaPhyAddr[31:0] */
sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
/* SDmaGeneration[1:0] */
sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
/* SDmaDwordCount[10:0] */
sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
/* SDmaBufOffset[12:2] */
sdmadesc[0] |= dwoffset & 0x7ffULL;
}
/*
* This function queues one IB packet onto the send DMA queue per call.
* The caller is responsible for checking:
* 1) The number of send DMA descriptor entries is less than the size of
* the descriptor queue.
* 2) The IB SGE addresses and lengths are 32-bit aligned
* (except possibly the last SGE's length)
* 3) The SGE addresses are suitable for passing to dma_map_single().
*/
int ipath_sdma_verbs_send(struct ipath_devdata *dd,
struct ipath_sge_state *ss, u32 dwords,
struct ipath_verbs_txreq *tx)
{
unsigned long flags;
struct ipath_sge *sge;
int ret = 0;
u16 tail;
__le64 *descqp;
u64 sdmadesc[2];
u32 dwoffset;
dma_addr_t addr;
if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
ipath_dbg("packet size %X > ibmax %X, fail\n",
tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
ret = -EMSGSIZE;
goto fail;
}
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
retry:
if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
ret = -EBUSY;
goto unlock;
}
if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
if (ipath_sdma_make_progress(dd))
goto retry;
ret = -ENOBUFS;
goto unlock;
}
addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
tx->map_len, DMA_TO_DEVICE);
if (dma_mapping_error(&dd->pcidev->dev, addr))
goto ioerr;
dwoffset = tx->map_len >> 2;
make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
/* SDmaFirstDesc */
sdmadesc[0] |= 1ULL << 12;
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
sdmadesc[0] |= 1ULL << 14; /* SDmaUseLargeBuf */
/* write to the descq */
tail = dd->ipath_sdma_descq_tail;
descqp = &dd->ipath_sdma_descq[tail].qw[0];
*descqp++ = cpu_to_le64(sdmadesc[0]);
*descqp++ = cpu_to_le64(sdmadesc[1]);
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
tx->txreq.start_idx = tail;
/* increment the tail */
if (++tail == dd->ipath_sdma_descq_cnt) {
tail = 0;
descqp = &dd->ipath_sdma_descq[0].qw[0];
++dd->ipath_sdma_generation;
}
sge = &ss->sge;
while (dwords) {
u32 dw;
u32 len;
len = dwords << 2;
if (len > sge->length)
len = sge->length;
if (len > sge->sge_length)
len = sge->sge_length;
BUG_ON(len == 0);
dw = (len + 3) >> 2;
addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
DMA_TO_DEVICE);
if (dma_mapping_error(&dd->pcidev->dev, addr))
goto unmap;
make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
/* SDmaUseLargeBuf has to be set in every descriptor */
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
sdmadesc[0] |= 1ULL << 14;
/* write to the descq */
*descqp++ = cpu_to_le64(sdmadesc[0]);
*descqp++ = cpu_to_le64(sdmadesc[1]);
/* increment the tail */
if (++tail == dd->ipath_sdma_descq_cnt) {
tail = 0;
descqp = &dd->ipath_sdma_descq[0].qw[0];
++dd->ipath_sdma_generation;
}
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr != NULL) {
if (++sge->n >= IPATH_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
dwoffset += dw;
dwords -= dw;
}
if (!tail)
descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
descqp -= 2;
/* SDmaLastDesc */
descqp[0] |= cpu_to_le64(1ULL << 11);
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
/* SDmaIntReq */
descqp[0] |= cpu_to_le64(1ULL << 15);
}
/* Commit writes to memory and advance the tail on the chip */
wmb();
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
tx->txreq.next_descq_idx = tail;
tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
dd->ipath_sdma_descq_tail = tail;
dd->ipath_sdma_descq_added += tx->txreq.sg_count;
list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
vl15_watchdog_enq(dd);
goto unlock;
unmap:
while (tail != dd->ipath_sdma_descq_tail) {
if (!tail)
tail = dd->ipath_sdma_descq_cnt - 1;
else
tail--;
unmap_desc(dd, tail);
}
ioerr:
ret = -EIO;
unlock:
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
fail:
return ret;
}

View File

@@ -1,380 +0,0 @@
/*
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
/**
* ipath_post_srq_receive - post a receive on a shared receive queue
* @ibsrq: the SRQ to post the receive on
* @wr: the list of work requests to post
* @bad_wr: the first WR to cause a problem is put here
*
* This may be called from interrupt context.
*/
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_rwq *wq;
unsigned long flags;
int ret;
for (; wr; wr = wr->next) {
struct ipath_rwqe *wqe;
u32 next;
int i;
if ((unsigned) wr->num_sge > srq->rq.max_sge) {
*bad_wr = wr;
ret = -EINVAL;
goto bail;
}
spin_lock_irqsave(&srq->rq.lock, flags);
wq = srq->rq.wq;
next = wq->head + 1;
if (next >= srq->rq.size)
next = 0;
if (next == wq->tail) {
spin_unlock_irqrestore(&srq->rq.lock, flags);
*bad_wr = wr;
ret = -ENOMEM;
goto bail;
}
wqe = get_rwqe_ptr(&srq->rq, wq->head);
wqe->wr_id = wr->wr_id;
wqe->num_sge = wr->num_sge;
for (i = 0; i < wr->num_sge; i++)
wqe->sg_list[i] = wr->sg_list[i];
/* Make sure queue entry is written before the head index. */
smp_wmb();
wq->head = next;
spin_unlock_irqrestore(&srq->rq.lock, flags);
}
ret = 0;
bail:
return ret;
}
/**
* ipath_create_srq - create a shared receive queue
* @ibpd: the protection domain of the SRQ to create
* @srq_init_attr: the attributes of the SRQ
* @udata: data from libipathverbs when creating a user SRQ
*/
struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata)
{
struct ipath_ibdev *dev = to_idev(ibpd->device);
struct ipath_srq *srq;
u32 sz;
struct ib_srq *ret;
if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
ret = ERR_PTR(-ENOSYS);
goto done;
}
if (srq_init_attr->attr.max_wr == 0) {
ret = ERR_PTR(-EINVAL);
goto done;
}
if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
(srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
ret = ERR_PTR(-EINVAL);
goto done;
}
srq = kmalloc(sizeof(*srq), GFP_KERNEL);
if (!srq) {
ret = ERR_PTR(-ENOMEM);
goto done;
}
/*
* Need to use vmalloc() if we want to support large #s of entries.
*/
srq->rq.size = srq_init_attr->attr.max_wr + 1;
srq->rq.max_sge = srq_init_attr->attr.max_sge;
sz = sizeof(struct ib_sge) * srq->rq.max_sge +
sizeof(struct ipath_rwqe);
srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
if (!srq->rq.wq) {
ret = ERR_PTR(-ENOMEM);
goto bail_srq;
}
/*
* Return the address of the RWQ as the offset to mmap.
* See ipath_mmap() for details.
*/
if (udata && udata->outlen >= sizeof(__u64)) {
int err;
u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
srq->ip =
ipath_create_mmap_info(dev, s,
ibpd->uobject->context,
srq->rq.wq);
if (!srq->ip) {
ret = ERR_PTR(-ENOMEM);
goto bail_wq;
}
err = ib_copy_to_udata(udata, &srq->ip->offset,
sizeof(srq->ip->offset));
if (err) {
ret = ERR_PTR(err);
goto bail_ip;
}
} else
srq->ip = NULL;
/*
* ib_create_srq() will initialize srq->ibsrq.
*/
spin_lock_init(&srq->rq.lock);
srq->rq.wq->head = 0;
srq->rq.wq->tail = 0;
srq->limit = srq_init_attr->attr.srq_limit;
spin_lock(&dev->n_srqs_lock);
if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
spin_unlock(&dev->n_srqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
dev->n_srqs_allocated++;
spin_unlock(&dev->n_srqs_lock);
if (srq->ip) {
spin_lock_irq(&dev->pending_lock);
list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
ret = &srq->ibsrq;
goto done;
bail_ip:
kfree(srq->ip);
bail_wq:
vfree(srq->rq.wq);
bail_srq:
kfree(srq);
done:
return ret;
}
/**
* ipath_modify_srq - modify a shared receive queue
* @ibsrq: the SRQ to modify
* @attr: the new attributes of the SRQ
* @attr_mask: indicates which attributes to modify
* @udata: user data for ipathverbs.so
*/
int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask,
struct ib_udata *udata)
{
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_rwq *wq;
int ret = 0;
if (attr_mask & IB_SRQ_MAX_WR) {
struct ipath_rwq *owq;
struct ipath_rwqe *p;
u32 sz, size, n, head, tail;
/* Check that the requested sizes are below the limits. */
if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
((attr_mask & IB_SRQ_LIMIT) ?
attr->srq_limit : srq->limit) > attr->max_wr) {
ret = -EINVAL;
goto bail;
}
sz = sizeof(struct ipath_rwqe) +
srq->rq.max_sge * sizeof(struct ib_sge);
size = attr->max_wr + 1;
wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
if (!wq) {
ret = -ENOMEM;
goto bail;
}
/* Check that we can write the offset to mmap. */
if (udata && udata->inlen >= sizeof(__u64)) {
__u64 offset_addr;
__u64 offset = 0;
ret = ib_copy_from_udata(&offset_addr, udata,
sizeof(offset_addr));
if (ret)
goto bail_free;
udata->outbuf =
(void __user *) (unsigned long) offset_addr;
ret = ib_copy_to_udata(udata, &offset,
sizeof(offset));
if (ret)
goto bail_free;
}
spin_lock_irq(&srq->rq.lock);
/*
* validate head pointer value and compute
* the number of remaining WQEs.
*/
owq = srq->rq.wq;
head = owq->head;
if (head >= srq->rq.size)
head = 0;
tail = owq->tail;
if (tail >= srq->rq.size)
tail = 0;
n = head;
if (n < tail)
n += srq->rq.size - tail;
else
n -= tail;
if (size <= n) {
ret = -EINVAL;
goto bail_unlock;
}
n = 0;
p = wq->wq;
while (tail != head) {
struct ipath_rwqe *wqe;
int i;
wqe = get_rwqe_ptr(&srq->rq, tail);
p->wr_id = wqe->wr_id;
p->num_sge = wqe->num_sge;
for (i = 0; i < wqe->num_sge; i++)
p->sg_list[i] = wqe->sg_list[i];
n++;
p = (struct ipath_rwqe *)((char *) p + sz);
if (++tail >= srq->rq.size)
tail = 0;
}
srq->rq.wq = wq;
srq->rq.size = size;
wq->head = n;
wq->tail = 0;
if (attr_mask & IB_SRQ_LIMIT)
srq->limit = attr->srq_limit;
spin_unlock_irq(&srq->rq.lock);
vfree(owq);
if (srq->ip) {
struct ipath_mmap_info *ip = srq->ip;
struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
u32 s = sizeof(struct ipath_rwq) + size * sz;
ipath_update_mmap_info(dev, ip, s, wq);
/*
* Return the offset to mmap.
* See ipath_mmap() for details.
*/
if (udata && udata->inlen >= sizeof(__u64)) {
ret = ib_copy_to_udata(udata, &ip->offset,
sizeof(ip->offset));
if (ret)
goto bail;
}
spin_lock_irq(&dev->pending_lock);
if (list_empty(&ip->pending_mmaps))
list_add(&ip->pending_mmaps,
&dev->pending_mmaps);
spin_unlock_irq(&dev->pending_lock);
}
} else if (attr_mask & IB_SRQ_LIMIT) {
spin_lock_irq(&srq->rq.lock);
if (attr->srq_limit >= srq->rq.size)
ret = -EINVAL;
else
srq->limit = attr->srq_limit;
spin_unlock_irq(&srq->rq.lock);
}
goto bail;
bail_unlock:
spin_unlock_irq(&srq->rq.lock);
bail_free:
vfree(wq);
bail:
return ret;
}
int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
{
struct ipath_srq *srq = to_isrq(ibsrq);
attr->max_wr = srq->rq.size - 1;
attr->max_sge = srq->rq.max_sge;
attr->srq_limit = srq->limit;
return 0;
}
/**
* ipath_destroy_srq - destroy a shared receive queue
* @ibsrq: the SRQ to destroy
*/
int ipath_destroy_srq(struct ib_srq *ibsrq)
{
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_ibdev *dev = to_idev(ibsrq->device);
spin_lock(&dev->n_srqs_lock);
dev->n_srqs_allocated--;
spin_unlock(&dev->n_srqs_lock);
if (srq->ip)
kref_put(&srq->ip->ref, ipath_release_mmap_info);
else
vfree(srq->rq.wq);
kfree(srq);
return 0;
}

View File

@@ -1,347 +0,0 @@
/*
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ipath_kernel.h"
struct infinipath_stats ipath_stats;
/**
* ipath_snap_cntr - snapshot a chip counter
* @dd: the infinipath device
* @creg: the counter to snapshot
*
* called from add_timer and user counter read calls, to deal with
* counters that wrap in "human time". The words sent and received, and
* the packets sent and received are all that we worry about. For now,
* at least, we don't worry about error counters, because if they wrap
* that quickly, we probably don't care. We may eventually just make this
* handle all the counters. word counters can wrap in about 20 seconds
* of full bandwidth traffic, packet counters in a few hours.
*/
u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
{
u32 val, reg64 = 0;
u64 val64;
unsigned long t0, t1;
u64 ret;
t0 = jiffies;
/* If fast increment counters are only 32 bits, snapshot them,
* and maintain them as 64bit values in the driver */
if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
(creg == dd->ipath_cregs->cr_wordsendcnt ||
creg == dd->ipath_cregs->cr_wordrcvcnt ||
creg == dd->ipath_cregs->cr_pktsendcnt ||
creg == dd->ipath_cregs->cr_pktrcvcnt)) {
val64 = ipath_read_creg(dd, creg);
val = val64 == ~0ULL ? ~0U : 0;
reg64 = 1;
} else /* val64 just to keep gcc quiet... */
val64 = val = ipath_read_creg32(dd, creg);
/*
* See if a second has passed. This is just a way to detect things
* that are quite broken. Normally this should take just a few
* cycles (the check is for long enough that we don't care if we get
* pre-empted.) An Opteron HT O read timeout is 4 seconds with
* normal NB values
*/
t1 = jiffies;
if (time_before(t0 + HZ, t1) && val == -1) {
ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n",
creg);
ret = 0ULL;
goto bail;
}
if (reg64) {
ret = val64;
goto bail;
}
if (creg == dd->ipath_cregs->cr_wordsendcnt) {
if (val != dd->ipath_lastsword) {
dd->ipath_sword += val - dd->ipath_lastsword;
dd->ipath_lastsword = val;
}
val64 = dd->ipath_sword;
} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
if (val != dd->ipath_lastrword) {
dd->ipath_rword += val - dd->ipath_lastrword;
dd->ipath_lastrword = val;
}
val64 = dd->ipath_rword;
} else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
if (val != dd->ipath_lastspkts) {
dd->ipath_spkts += val - dd->ipath_lastspkts;
dd->ipath_lastspkts = val;
}
val64 = dd->ipath_spkts;
} else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
if (val != dd->ipath_lastrpkts) {
dd->ipath_rpkts += val - dd->ipath_lastrpkts;
dd->ipath_lastrpkts = val;
}
val64 = dd->ipath_rpkts;
} else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
if (dd->ibdeltainprog)
val64 -= val64 - dd->ibsymsnap;
val64 -= dd->ibsymdelta;
} else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
if (dd->ibdeltainprog)
val64 -= val64 - dd->iblnkerrsnap;
val64 -= dd->iblnkerrdelta;
} else
val64 = (u64) val;
ret = val64;
bail:
return ret;
}
/**
* ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
* @dd: the infinipath device
*
* print the delta of egrfull/hdrqfull errors for kernel ports no more than
* every 5 seconds. User processes are printed at close, but kernel doesn't
* close, so... Separate routine so may call from other places someday, and
* so function name when printed by _IPATH_INFO is meaningfull
*/
static void ipath_qcheck(struct ipath_devdata *dd)
{
static u64 last_tot_hdrqfull;
struct ipath_portdata *pd = dd->ipath_pd[0];
size_t blen = 0;
char buf[128];
u32 hdrqtail;
*buf = 0;
if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
pd->port_hdrqfull -
dd->ipath_p0_hdrqfull);
dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
}
if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
blen += snprintf(buf + blen, sizeof buf - blen,
"%srcvegrfull %llu",
blen ? ", " : "",
(unsigned long long)
(ipath_stats.sps_etidfull -
dd->ipath_last_tidfull));
dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
}
/*
* this is actually the number of hdrq full interrupts, not actual
* events, but at the moment that's mostly what I'm interested in.
* Actual count, etc. is in the counters, if needed. For production
* users this won't ordinarily be printed.
*/
if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
blen += snprintf(buf + blen, sizeof buf - blen,
"%shdrqfull %llu (all ports)",
blen ? ", " : "",
(unsigned long long)
(ipath_stats.sps_hdrqfull -
last_tot_hdrqfull));
last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
}
if (blen)
ipath_dbg("%s\n", buf);
hdrqtail = ipath_get_hdrqtail(pd);
if (pd->port_head != hdrqtail) {
if (dd->ipath_lastport0rcv_cnt ==
ipath_stats.sps_port0pkts) {
ipath_cdbg(PKT, "missing rcv interrupts? "
"port0 hd=%x tl=%x; port0pkts %llx; write"
" hd (w/intr)\n",
pd->port_head, hdrqtail,
(unsigned long long)
ipath_stats.sps_port0pkts);
ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
dd->ipath_rhdrhead_intr_off, pd->port_port);
}
dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
}
}
static void ipath_chk_errormask(struct ipath_devdata *dd)
{
static u32 fixed;
u32 ctrl;
unsigned long errormask;
unsigned long hwerrs;
if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
return;
errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
if (errormask == dd->ipath_errormask)
return;
fixed++;
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
dd->ipath_errormask);
if ((hwerrs & dd->ipath_hwerrmask) ||
(ctrl & INFINIPATH_C_FREEZEMODE)) {
/* force re-interrupt of pending events, just in case */
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
dev_info(&dd->pcidev->dev,
"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
fixed, errormask, (unsigned long)dd->ipath_errormask,
ctrl, hwerrs);
} else
ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
fixed, errormask,
(unsigned long)dd->ipath_errormask);
}
/**
* ipath_get_faststats - get word counters from chip before they overflow
* @opaque - contains a pointer to the infinipath device ipath_devdata
*
* called from add_timer
*/
void ipath_get_faststats(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
int i;
static unsigned cnt;
unsigned long flags;
u64 traffic_wds;
/*
* don't access the chip while running diags, or memory diags can
* fail
*/
if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
ipath_diag_inuse)
/* but re-arm the timer, for diags case; won't hurt other */
goto done;
/*
* We now try to maintain a "active timer", based on traffic
* exceeding a threshold, so we need to check the word-counts
* even if they are 64-bit.
*/
traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
traffic_wds -= dd->ipath_traffic_wds;
dd->ipath_traffic_wds += traffic_wds;
if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
atomic_add(5, &dd->ipath_active_time); /* S/B #define */
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
}
ipath_qcheck(dd);
/*
* deal with repeat error suppression. Doesn't really matter if
* last error was almost a full interval ago, or just a few usecs
* ago; still won't get more than 2 per interval. We may want
* longer intervals for this eventually, could do with mod, counter
* or separate timer. Also see code in ipath_handle_errors() and
* ipath_handle_hwerrors().
*/
if (dd->ipath_lasterror)
dd->ipath_lasterror = 0;
if (dd->ipath_lasthwerror)
dd->ipath_lasthwerror = 0;
if (dd->ipath_maskederrs
&& time_after(jiffies, dd->ipath_unmasktime)) {
char ebuf[256];
int iserr;
iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
dd->ipath_maskederrs);
if (dd->ipath_maskederrs &
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
INFINIPATH_E_PKTERRS))
ipath_dev_err(dd, "Re-enabling masked errors "
"(%s)\n", ebuf);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal", for some
* types of processes (mostly benchmarks) that send
* huge numbers of messages, while not processing
* them. So only complain about these at debug
* level.
*/
if (iserr)
ipath_dbg(
"Re-enabling queue full errors (%s)\n",
ebuf);
else
ipath_cdbg(ERRPKT, "Re-enabling packet"
" problem interrupt (%s)\n", ebuf);
}
/* re-enable masked errors */
dd->ipath_errormask |= dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
dd->ipath_errormask);
dd->ipath_maskederrs = 0;
}
/* limit qfull messages to ~one per minute per port */
if ((++cnt & 0x10)) {
for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
struct ipath_portdata *pd = dd->ipath_pd[i];
if (pd && pd->port_lastrcvhdrqtail != -1)
pd->port_lastrcvhdrqtail = -1;
}
}
ipath_chk_errormask(dd);
done:
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,547 +0,0 @@
/*
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ipath_verbs.h"
#include "ipath_kernel.h"
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_UC_##x
/**
* ipath_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
*
* Return 1 if constructed; otherwise, return 0.
*/
int ipath_make_uc_req(struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
struct ipath_swqe *wqe;
unsigned long flags;
u32 hwords;
u32 bth0;
u32 len;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
int ret = 0;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
ohdr = &qp->s_hdr.u.oth;
if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
ohdr = &qp->s_hdr.u.l.oth;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
bth0 = 1 << 22; /* Set M bit */
/* Get the next send request. */
wqe = get_swqe_ptr(qp, qp->s_cur);
qp->s_wqe = NULL;
switch (qp->s_state) {
default:
if (!(ib_ipath_state_ops[qp->state] &
IPATH_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */
if (qp->s_cur == qp->s_head)
goto bail;
/*
* Start a new request.
*/
qp->s_psn = wqe->psn = qp->s_next_psn;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_len = len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
if (len > pmtu) {
qp->s_state = OP(SEND_FIRST);
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_SEND)
qp->s_state = OP(SEND_ONLY);
else {
qp->s_state =
OP(SEND_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
ohdr->u.rc.reth.vaddr =
cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->wr.wr.rdma.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
hwords += sizeof(struct ib_reth) / 4;
if (len > pmtu) {
qp->s_state = OP(RDMA_WRITE_FIRST);
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
qp->s_state = OP(RDMA_WRITE_ONLY);
else {
qp->s_state =
OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
/* Immediate data comes after the RETH */
ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
}
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
default:
goto bail;
}
break;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_SEND)
qp->s_state = OP(SEND_LAST);
else {
qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
}
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
len = pmtu;
break;
}
if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
qp->s_state = OP(RDMA_WRITE_LAST);
else {
qp->s_state =
OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
/* Immediate data comes after the BTH */
ohdr->u.imm_data = wqe->wr.ex.imm_data;
hwords += 1;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
}
qp->s_wqe = wqe;
if (++qp->s_cur >= qp->s_size)
qp->s_cur = 0;
break;
}
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = &qp->s_sge;
qp->s_cur_size = len;
ipath_make_ruc_header(to_idev(qp->ibqp.device),
qp, ohdr, bth0 | (qp->s_state << 24),
qp->s_next_psn++ & IPATH_PSN_MASK);
done:
ret = 1;
goto unlock;
bail:
qp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
/**
* ipath_uc_rcv - handle an incoming UC packet
* @dev: the device the packet came in on
* @hdr: the header of the packet
* @has_grh: true if the packet has a GRH
* @data: the packet data
* @tlen: the length of the packet
* @qp: the QP for this packet.
*
* This is called from ipath_qp_rcv() to process an incoming UC packet
* for the given QP.
* Called at interrupt level.
*/
void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
int opcode;
u32 hdrsize;
u32 psn;
u32 pad;
struct ib_wc wc;
u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
struct ib_reth *reth;
int header_in_data;
/* Validate the SLID. See Ch. 9.6.1.5 */
if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
goto done;
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
hdrsize = 8 + 12; /* LRH + BTH */
psn = be32_to_cpu(ohdr->bth[2]);
header_in_data = 0;
} else {
ohdr = &hdr->u.l.oth;
hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
/*
* The header with GRH is 60 bytes and the
* core driver sets the eager header buffer
* size to 56 bytes so the last 4 bytes of
* the BTH header (PSN) is in the data buffer.
*/
header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
if (header_in_data) {
psn = be32_to_cpu(((__be32 *) data)[0]);
data += sizeof(__be32);
} else
psn = be32_to_cpu(ohdr->bth[2]);
}
/*
* The opcode is in the low byte when its in network order
* (top byte when in host order).
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
memset(&wc, 0, sizeof wc);
/* Compare the PSN verses the expected PSN. */
if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
/*
* Handle a sequence error.
* Silently drop any current message.
*/
qp->r_psn = psn;
inv:
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
goto send_first;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
goto rdma_first;
default:
dev->n_pkt_drops++;
goto done;
}
}
/* Check for opcode sequence errors. */
switch (qp->r_state) {
case OP(SEND_FIRST):
case OP(SEND_MIDDLE):
if (opcode == OP(SEND_MIDDLE) ||
opcode == OP(SEND_LAST) ||
opcode == OP(SEND_LAST_WITH_IMMEDIATE))
break;
goto inv;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_MIDDLE):
if (opcode == OP(RDMA_WRITE_MIDDLE) ||
opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
break;
goto inv;
default:
if (opcode == OP(SEND_FIRST) ||
opcode == OP(SEND_ONLY) ||
opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
opcode == OP(RDMA_WRITE_FIRST) ||
opcode == OP(RDMA_WRITE_ONLY) ||
opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
break;
goto inv;
}
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
send_first:
if (qp->r_flags & IPATH_R_REUSE_SGE) {
qp->r_flags &= ~IPATH_R_REUSE_SGE;
qp->r_sge = qp->s_rdma_read_sge;
} else if (!ipath_get_rwqe(qp, 0)) {
dev->n_pkt_drops++;
goto done;
}
/* Save the WQE so we can reuse it in case of an error. */
qp->s_rdma_read_sge = qp->r_sge;
qp->r_rcv_len = 0;
if (opcode == OP(SEND_ONLY))
goto send_last;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
/* FALLTHROUGH */
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) {
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) {
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
ipath_copy_sge(&qp->r_sge, data, pmtu);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
if (header_in_data) {
wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
wc.ex.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
/* FALLTHROUGH */
case OP(SEND_LAST):
send_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) {
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto done;
}
wc.opcode = IB_WC_RECV;
last_imm:
ipath_copy_sge(&qp->r_sge, data, tlen);
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = qp->remote_ah_attr.dlid;
wc.sl = qp->remote_ah_attr.sl;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(1 << 23)) != 0);
break;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
rdma_first:
/* RETH comes after BTH */
if (!header_in_data)
reth = &ohdr->u.rc.reth;
else {
reth = (struct ib_reth *)data;
data += sizeof(*reth);
}
hdrsize += sizeof(*reth);
qp->r_len = be32_to_cpu(reth->length);
qp->r_rcv_len = 0;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = be64_to_cpu(reth->vaddr);
int ok;
/* Check rkey */
ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok)) {
dev->n_pkt_drops++;
goto done;
}
} else {
qp->r_sge.sg_list = NULL;
qp->r_sge.sge.mr = NULL;
qp->r_sge.sge.vaddr = NULL;
qp->r_sge.sge.length = 0;
qp->r_sge.sge.sge_length = 0;
}
if (unlikely(!(qp->qp_access_flags &
IB_ACCESS_REMOTE_WRITE))) {
dev->n_pkt_drops++;
goto done;
}
if (opcode == OP(RDMA_WRITE_ONLY))
goto rdma_last;
else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
goto rdma_last_imm;
/* FALLTHROUGH */
case OP(RDMA_WRITE_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4))) {
dev->n_pkt_drops++;
goto done;
}
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) {
dev->n_pkt_drops++;
goto done;
}
ipath_copy_sge(&qp->r_sge, data, pmtu);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
rdma_last_imm:
if (header_in_data) {
wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else {
/* Immediate data comes after BTH */
wc.ex.imm_data = ohdr->u.imm_data;
}
hdrsize += 4;
wc.wc_flags = IB_WC_WITH_IMM;
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
dev->n_pkt_drops++;
goto done;
}
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
dev->n_pkt_drops++;
goto done;
}
if (qp->r_flags & IPATH_R_REUSE_SGE)
qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 1)) {
dev->n_pkt_drops++;
goto done;
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
goto last_imm;
case OP(RDMA_WRITE_LAST):
rdma_last:
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* XXX LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4))) {
dev->n_pkt_drops++;
goto done;
}
/* Don't count the CRC. */
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
dev->n_pkt_drops++;
goto done;
}
ipath_copy_sge(&qp->r_sge, data, tlen);
break;
default:
/* Drop packet for unknown opcodes. */
dev->n_pkt_drops++;
goto done;
}
qp->r_psn++;
qp->r_state = opcode;
done:
return;
}

View File

@@ -1,580 +0,0 @@
/*
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/sched.h>
#include <rdma/ib_smi.h>
#include "ipath_verbs.h"
#include "ipath_kernel.h"
/**
* ipath_ud_loopback - handle send on loopback QPs
* @sqp: the sending QP
* @swqe: the send work request
*
* This is called from ipath_make_ud_req() to forward a WQE addressed
* to the same HCA.
* Note that the receive interrupt handler may be calling ipath_ud_rcv()
* while this is being called.
*/
static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
{
struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
struct ipath_qp *qp;
struct ib_ah_attr *ah_attr;
unsigned long flags;
struct ipath_rq *rq;
struct ipath_srq *srq;
struct ipath_sge_state rsge;
struct ipath_sge *sge;
struct ipath_rwq *wq;
struct ipath_rwqe *wqe;
void (*handler)(struct ib_event *, void *);
struct ib_wc wc;
u32 tail;
u32 rlen;
u32 length;
qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
dev->n_pkt_drops++;
goto done;
}
/*
* Check that the qkey matches (except for QP0, see 9.6.1.4.1).
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
if (unlikely(qp->ibqp.qp_num &&
((int) swqe->wr.wr.ud.remote_qkey < 0 ?
sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) {
/* XXX OK to lose a count once in a while. */
dev->qkey_violations++;
dev->n_pkt_drops++;
goto drop;
}
/*
* A GRH is expected to precede the data even if not
* present on the wire.
*/
length = swqe->length;
memset(&wc, 0, sizeof wc);
wc.byte_len = length + sizeof(struct ib_grh);
if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = swqe->wr.ex.imm_data;
}
/*
* This would be a lot simpler if we could call ipath_get_rwqe()
* but that uses state that the receive interrupt handler uses
* so we would need to lock out receive interrupts while doing
* local loopback.
*/
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
rq = &srq->rq;
} else {
srq = NULL;
handler = NULL;
rq = &qp->r_rq;
}
/*
* Get the next work request entry to find where to put the data.
* Note that it is safe to drop the lock after changing rq->tail
* since ipath_post_receive() won't fill the empty slot.
*/
spin_lock_irqsave(&rq->lock, flags);
wq = rq->wq;
tail = wq->tail;
/* Validate tail before using it since it is user writable. */
if (tail >= rq->size)
tail = 0;
if (unlikely(tail == wq->head)) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
goto drop;
}
wqe = get_rwqe_ptr(rq, tail);
rsge.sg_list = qp->r_ud_sg_list;
if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
goto drop;
}
/* Silently drop packets which are too big. */
if (wc.byte_len > rlen) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
goto drop;
}
if (++tail >= rq->size)
tail = 0;
wq->tail = tail;
wc.wr_id = wqe->wr_id;
if (handler) {
u32 n;
/*
* validate head pointer value and compute
* the number of remaining WQEs.
*/
n = wq->head;
if (n >= rq->size)
n = 0;
if (n < tail)
n += rq->size - tail;
else
n -= tail;
if (n < srq->limit) {
struct ib_event ev;
srq->limit = 0;
spin_unlock_irqrestore(&rq->lock, flags);
ev.device = qp->ibqp.device;
ev.element.srq = qp->ibqp.srq;
ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
handler(&ev, srq->ibsrq.srq_context);
} else
spin_unlock_irqrestore(&rq->lock, flags);
} else
spin_unlock_irqrestore(&rq->lock, flags);
ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
if (ah_attr->ah_flags & IB_AH_GRH) {
ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
wc.wc_flags |= IB_WC_GRH;
} else
ipath_skip_sge(&rsge, sizeof(struct ib_grh));
sge = swqe->sg_list;
while (length) {
u32 len = sge->length;
if (len > length)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
BUG_ON(len == 0);
ipath_copy_sge(&rsge, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (--swqe->wr.num_sge)
sge++;
} else if (sge->length == 0 && sge->mr != NULL) {
if (++sge->n >= IPATH_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
length -= len;
}
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = sqp->ibqp.qp_num;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc.pkey_index = 0;
wc.slid = dev->dd->ipath_lid |
(ah_attr->src_path_bits &
((1 << dev->dd->ipath_lmc) - 1));
wc.sl = ah_attr->sl;
wc.dlid_path_bits =
ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
swqe->wr.send_flags & IB_SEND_SOLICITED);
drop:
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
done:;
}
/**
* ipath_make_ud_req - construct a UD request packet
* @qp: the QP
*
* Return 1 if constructed; otherwise, return 0.
*/
int ipath_make_ud_req(struct ipath_qp *qp)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct ipath_swqe *wqe;
unsigned long flags;
u32 nwords;
u32 extra_bytes;
u32 bth0;
u16 lrh0;
u16 lid;
int ret = 0;
int next_cur;
spin_lock_irqsave(&qp->s_lock, flags);
if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
if (qp->s_last == qp->s_head)
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
wqe = get_swqe_ptr(qp, qp->s_last);
ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done;
}
if (qp->s_cur == qp->s_head)
goto bail;
wqe = get_swqe_ptr(qp, qp->s_cur);
next_cur = qp->s_cur + 1;
if (next_cur >= qp->s_size)
next_cur = 0;
/* Construct the header. */
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
dev->n_multicast_xmit++;
else
dev->n_unicast_xmit++;
} else {
dev->n_unicast_xmit++;
lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
if (unlikely(lid == dev->dd->ipath_lid)) {
/*
* If DMAs are in progress, we can't generate
* a completion for the loopback packet since
* it would be out of order.
* XXX Instead of waiting, we could queue a
* zero length descriptor so we get a callback.
*/
if (atomic_read(&qp->s_dma_busy)) {
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
qp->s_cur = next_cur;
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags);
ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done;
}
}
qp->s_cur = next_cur;
extra_bytes = -wqe->length & 3;
nwords = (wqe->length + extra_bytes) >> 2;
/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
qp->s_hdrwords = 7;
qp->s_cur_size = wqe->length;
qp->s_cur_sge = &qp->s_sge;
qp->s_dmult = ah_attr->static_rate;
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
qp->s_sge.sg_list = wqe->sg_list + 1;
qp->s_sge.num_sge = wqe->wr.num_sge;
if (ah_attr->ah_flags & IB_AH_GRH) {
/* Header size in 32-bit words. */
qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
&ah_attr->grh,
qp->s_hdrwords, nwords);
lrh0 = IPATH_LRH_GRH;
ohdr = &qp->s_hdr.u.l.oth;
/*
* Don't worry about sending to locally attached multicast
* QPs. It is unspecified by the spec. what happens.
*/
} else {
/* Header size in 32-bit words. */
lrh0 = IPATH_LRH_BTH;
ohdr = &qp->s_hdr.u.oth;
}
if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
qp->s_hdrwords++;
ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
} else
bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
lrh0 |= ah_attr->sl << 4;
if (qp->ibqp.qp_type == IB_QPT_SMI)
lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
SIZE_OF_CRC);
lid = dev->dd->ipath_lid;
if (lid) {
lid |= ah_attr->src_path_bits &
((1 << dev->dd->ipath_lmc) - 1);
qp->s_hdr.lrh[3] = cpu_to_be16(lid);
} else
qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
if (wqe->wr.send_flags & IB_SEND_SOLICITED)
bth0 |= 1 << 23;
bth0 |= extra_bytes << 20;
bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
ipath_get_pkey(dev->dd, qp->s_pkey_index);
ohdr->bth[0] = cpu_to_be32(bth0);
/*
* Use the multicast QP if the destination LID is a multicast LID.
*/
ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
ah_attr->dlid != IPATH_PERMISSIVE_LID ?
cpu_to_be32(IPATH_MULTICAST_QPN) :
cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
qp->qkey : wqe->wr.wr.ud.remote_qkey);
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
done:
ret = 1;
goto unlock;
bail:
qp->s_flags &= ~IPATH_S_BUSY;
unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
return ret;
}
/**
* ipath_ud_rcv - receive an incoming UD packet
* @dev: the device the packet came in on
* @hdr: the packet header
* @has_grh: true if the packet has a GRH
* @data: the packet data
* @tlen: the packet length
* @qp: the QP the packet came on
*
* This is called from ipath_qp_rcv() to process an incoming UD packet
* for the given QP.
* Called at interrupt level.
*/
void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
{
struct ipath_other_headers *ohdr;
int opcode;
u32 hdrsize;
u32 pad;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
u16 dlid;
int header_in_data;
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
header_in_data = 0;
} else {
ohdr = &hdr->u.l.oth;
hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
/*
* The header with GRH is 68 bytes and the core driver sets
* the eager header buffer size to 56 bytes so the last 12
* bytes of the IB header is in the data buffer.
*/
header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
if (header_in_data) {
qkey = be32_to_cpu(((__be32 *) data)[1]);
src_qp = be32_to_cpu(((__be32 *) data)[2]);
data += 12;
} else {
qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
}
}
src_qp &= IPATH_QPN_MASK;
/*
* Check that the permissive LID is only used on QP0
* and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
*/
if (qp->ibqp.qp_num) {
if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
hdr->lrh[3] == IB_LID_PERMISSIVE)) {
dev->n_pkt_drops++;
goto bail;
}
if (unlikely(qkey != qp->qkey)) {
/* XXX OK to lose a count once in a while. */
dev->qkey_violations++;
dev->n_pkt_drops++;
goto bail;
}
} else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
hdr->lrh[3] == IB_LID_PERMISSIVE) {
struct ib_smp *smp = (struct ib_smp *) data;
if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
dev->n_pkt_drops++;
goto bail;
}
}
/*
* The opcode is in the low byte when its in network order
* (top byte when in host order).
*/
opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
if (qp->ibqp.qp_num > 1 &&
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
if (header_in_data) {
wc.ex.imm_data = *(__be32 *) data;
data += sizeof(__be32);
} else
wc.ex.imm_data = ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
hdrsize += sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.ex.imm_data = 0;
wc.wc_flags = 0;
} else {
dev->n_pkt_drops++;
goto bail;
}
/* Get the number of bytes the message was padded by. */
pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
if (unlikely(tlen < (hdrsize + pad + 4))) {
/* Drop incomplete packets. */
dev->n_pkt_drops++;
goto bail;
}
tlen -= hdrsize + pad + 4;
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely((qp->ibqp.qp_num == 0 &&
(tlen != 256 ||
(be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
(qp->ibqp.qp_num == 1 &&
(tlen != 256 ||
(be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
dev->n_pkt_drops++;
goto bail;
}
/*
* A GRH is expected to precede the data even if not
* present on the wire.
*/
wc.byte_len = tlen + sizeof(struct ib_grh);
/*
* Get the next work request entry to find where to put the data.
*/
if (qp->r_flags & IPATH_R_REUSE_SGE)
qp->r_flags &= ~IPATH_R_REUSE_SGE;
else if (!ipath_get_rwqe(qp, 0)) {
/*
* Count VL15 packets dropped due to no receive buffer.
* Otherwise, count them as buffer overruns since usually,
* the HW will be able to receive packets even if there are
* no QPs with posted receive buffers.
*/
if (qp->ibqp.qp_num == 0)
dev->n_vl15_dropped++;
else
dev->rcv_errors++;
goto bail;
}
/* Silently drop packets which are too big. */
if (wc.byte_len > qp->r_len) {
qp->r_flags |= IPATH_R_REUSE_SGE;
dev->n_pkt_drops++;
goto bail;
}
if (has_grh) {
ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh));
wc.wc_flags |= IB_WC_GRH;
} else
ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
ipath_copy_sge(&qp->r_sge, data,
wc.byte_len - sizeof(struct ib_grh));
if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
goto bail;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
wc.vendor_err = 0;
wc.qp = &qp->ibqp;
wc.src_qp = src_qp;
/* XXX do we know which pkey matched? Only needed for GSI. */
wc.pkey_index = 0;
wc.slid = be16_to_cpu(hdr->lrh[3]);
wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
dlid = be16_to_cpu(hdr->lrh[1]);
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
*/
wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
dlid & ((1 << dev->dd->ipath_lmc) - 1);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
cpu_to_be32(1 << 23)) != 0);
bail:;
}

View File

@@ -1,229 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mm.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include "ipath_kernel.h"
static void __ipath_release_user_pages(struct page **p, size_t num_pages,
int dirty)
{
size_t i;
for (i = 0; i < num_pages; i++) {
ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
(unsigned long) num_pages, p[i]);
if (dirty)
set_page_dirty_lock(p[i]);
put_page(p[i]);
}
}
/* call with current->mm->mmap_sem held */
static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p)
{
unsigned long lock_limit;
size_t got;
int ret;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (num_pages > lock_limit) {
ret = -ENOMEM;
goto bail;
}
ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
(unsigned long) num_pages, start_page);
for (got = 0; got < num_pages; got += ret) {
ret = get_user_pages(current, current->mm,
start_page + got * PAGE_SIZE,
num_pages - got, 1, 1,
p + got, NULL);
if (ret < 0)
goto bail_release;
}
current->mm->pinned_vm += num_pages;
ret = 0;
goto bail;
bail_release:
__ipath_release_user_pages(p, got, 0);
bail:
return ret;
}
/**
* ipath_map_page - a safety wrapper around pci_map_page()
*
* A dma_addr of all 0's is interpreted by the chip as "disabled".
* Unfortunately, it can also be a valid dma_addr returned on some
* architectures.
*
* The powerpc iommu assigns dma_addrs in ascending order, so we don't
* have to bother with retries or mapping a dummy page to insure we
* don't just get the same mapping again.
*
* I'm sure we won't be so lucky with other iommu's, so FIXME.
*/
dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
unsigned long offset, size_t size, int direction)
{
dma_addr_t phys;
phys = pci_map_page(hwdev, page, offset, size, direction);
if (phys == 0) {
pci_unmap_page(hwdev, phys, size, direction);
phys = pci_map_page(hwdev, page, offset, size, direction);
/*
* FIXME: If we get 0 again, we should keep this page,
* map another, then free the 0 page.
*/
}
return phys;
}
/**
* ipath_map_single - a safety wrapper around pci_map_single()
*
* Same idea as ipath_map_page().
*/
dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
int direction)
{
dma_addr_t phys;
phys = pci_map_single(hwdev, ptr, size, direction);
if (phys == 0) {
pci_unmap_single(hwdev, phys, size, direction);
phys = pci_map_single(hwdev, ptr, size, direction);
/*
* FIXME: If we get 0 again, we should keep this page,
* map another, then free the 0 page.
*/
}
return phys;
}
/**
* ipath_get_user_pages - lock user pages into memory
* @start_page: the start page
* @num_pages: the number of pages
* @p: the output page structures
*
* This function takes a given start page (page aligned user virtual
* address) and pins it and the following specified number of pages. For
* now, num_pages is always 1, but that will probably change at some point
* (because caller is doing expected sends on a single virtually contiguous
* buffer, so we can do all pages at once).
*/
int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p)
{
int ret;
down_write(&current->mm->mmap_sem);
ret = __ipath_get_user_pages(start_page, num_pages, p);
up_write(&current->mm->mmap_sem);
return ret;
}
void ipath_release_user_pages(struct page **p, size_t num_pages)
{
down_write(&current->mm->mmap_sem);
__ipath_release_user_pages(p, num_pages, 1);
current->mm->pinned_vm -= num_pages;
up_write(&current->mm->mmap_sem);
}
struct ipath_user_pages_work {
struct work_struct work;
struct mm_struct *mm;
unsigned long num_pages;
};
static void user_pages_account(struct work_struct *_work)
{
struct ipath_user_pages_work *work =
container_of(_work, struct ipath_user_pages_work, work);
down_write(&work->mm->mmap_sem);
work->mm->pinned_vm -= work->num_pages;
up_write(&work->mm->mmap_sem);
mmput(work->mm);
kfree(work);
}
void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
{
struct ipath_user_pages_work *work;
struct mm_struct *mm;
__ipath_release_user_pages(p, num_pages, 1);
mm = get_task_mm(current);
if (!mm)
return;
work = kmalloc(sizeof(*work), GFP_KERNEL);
if (!work)
goto bail_mm;
INIT_WORK(&work->work, user_pages_account);
work->mm = mm;
work->num_pages = num_pages;
queue_work(ib_wq, &work->work);
return;
bail_mm:
mmput(mm);
return;
}

View File

@@ -1,875 +0,0 @@
/*
* Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/device.h>
#include <linux/dmapool.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/highmem.h>
#include <linux/io.h>
#include <linux/uio.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
#include "ipath_kernel.h"
#include "ipath_user_sdma.h"
/* minimum size of header */
#define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64
/* expected size of headers (for dma_pool) */
#define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64
/* length mask in PBC (lower 11 bits) */
#define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1)
struct ipath_user_sdma_pkt {
u8 naddr; /* dimension of addr (1..3) ... */
u32 counter; /* sdma pkts queued counter for this entry */
u64 added; /* global descq number of entries */
struct {
u32 offset; /* offset for kvaddr, addr */
u32 length; /* length in page */
u8 put_page; /* should we put_page? */
u8 dma_mapped; /* is page dma_mapped? */
struct page *page; /* may be NULL (coherent mem) */
void *kvaddr; /* FIXME: only for pio hack */
dma_addr_t addr;
} addr[4]; /* max pages, any more and we coalesce */
struct list_head list; /* list element */
};
struct ipath_user_sdma_queue {
/*
* pkts sent to dma engine are queued on this
* list head. the type of the elements of this
* list are struct ipath_user_sdma_pkt...
*/
struct list_head sent;
/* headers with expected length are allocated from here... */
char header_cache_name[64];
struct dma_pool *header_cache;
/* packets are allocated from the slab cache... */
char pkt_slab_name[64];
struct kmem_cache *pkt_slab;
/* as packets go on the queued queue, they are counted... */
u32 counter;
u32 sent_counter;
/* dma page table */
struct rb_root dma_pages_root;
/* protect everything above... */
struct mutex lock;
};
struct ipath_user_sdma_queue *
ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
{
struct ipath_user_sdma_queue *pq =
kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
if (!pq)
goto done;
pq->counter = 0;
pq->sent_counter = 0;
INIT_LIST_HEAD(&pq->sent);
mutex_init(&pq->lock);
snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
"ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
sizeof(struct ipath_user_sdma_pkt),
0, 0, NULL);
if (!pq->pkt_slab)
goto err_kfree;
snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
"ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
pq->header_cache = dma_pool_create(pq->header_cache_name,
dev,
IPATH_USER_SDMA_EXP_HEADER_LENGTH,
4, 0);
if (!pq->header_cache)
goto err_slab;
pq->dma_pages_root = RB_ROOT;
goto done;
err_slab:
kmem_cache_destroy(pq->pkt_slab);
err_kfree:
kfree(pq);
pq = NULL;
done:
return pq;
}
static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
int i, size_t offset, size_t len,
int put_page, int dma_mapped,
struct page *page,
void *kvaddr, dma_addr_t dma_addr)
{
pkt->addr[i].offset = offset;
pkt->addr[i].length = len;
pkt->addr[i].put_page = put_page;
pkt->addr[i].dma_mapped = dma_mapped;
pkt->addr[i].page = page;
pkt->addr[i].kvaddr = kvaddr;
pkt->addr[i].addr = dma_addr;
}
static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
u32 counter, size_t offset,
size_t len, int dma_mapped,
struct page *page,
void *kvaddr, dma_addr_t dma_addr)
{
pkt->naddr = 1;
pkt->counter = counter;
ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
kvaddr, dma_addr);
}
/* we've too many pages in the iovec, coalesce to a single page */
static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
struct ipath_user_sdma_pkt *pkt,
const struct iovec *iov,
unsigned long niov) {
int ret = 0;
struct page *page = alloc_page(GFP_KERNEL);
void *mpage_save;
char *mpage;
int i;
int len = 0;
dma_addr_t dma_addr;
if (!page) {
ret = -ENOMEM;
goto done;
}
mpage = kmap(page);
mpage_save = mpage;
for (i = 0; i < niov; i++) {
int cfur;
cfur = copy_from_user(mpage,
iov[i].iov_base, iov[i].iov_len);
if (cfur) {
ret = -EFAULT;
goto free_unmap;
}
mpage += iov[i].iov_len;
len += iov[i].iov_len;
}
dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
DMA_TO_DEVICE);
if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto free_unmap;
}
ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
dma_addr);
pkt->naddr = 2;
goto done;
free_unmap:
kunmap(page);
__free_page(page);
done:
return ret;
}
/* how many pages in this iovec element? */
static int ipath_user_sdma_num_pages(const struct iovec *iov)
{
const unsigned long addr = (unsigned long) iov->iov_base;
const unsigned long len = iov->iov_len;
const unsigned long spage = addr & PAGE_MASK;
const unsigned long epage = (addr + len - 1) & PAGE_MASK;
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
/* truncate length to page boundary */
static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
{
const unsigned long offset = addr & ~PAGE_MASK;
return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
}
static void ipath_user_sdma_free_pkt_frag(struct device *dev,
struct ipath_user_sdma_queue *pq,
struct ipath_user_sdma_pkt *pkt,
int frag)
{
const int i = frag;
if (pkt->addr[i].page) {
if (pkt->addr[i].dma_mapped)
dma_unmap_page(dev,
pkt->addr[i].addr,
pkt->addr[i].length,
DMA_TO_DEVICE);
if (pkt->addr[i].kvaddr)
kunmap(pkt->addr[i].page);
if (pkt->addr[i].put_page)
put_page(pkt->addr[i].page);
else
__free_page(pkt->addr[i].page);
} else if (pkt->addr[i].kvaddr)
/* free coherent mem from cache... */
dma_pool_free(pq->header_cache,
pkt->addr[i].kvaddr, pkt->addr[i].addr);
}
/* return number of pages pinned... */
static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
struct ipath_user_sdma_pkt *pkt,
unsigned long addr, int tlen, int npages)
{
struct page *pages[2];
int j;
int ret;
ret = get_user_pages_fast(addr, npages, 0, pages);
if (ret != npages) {
int i;
for (i = 0; i < ret; i++)
put_page(pages[i]);
ret = -ENOMEM;
goto done;
}
for (j = 0; j < npages; j++) {
/* map the pages... */
const int flen =
ipath_user_sdma_page_length(addr, tlen);
dma_addr_t dma_addr =
dma_map_page(&dd->pcidev->dev,
pages[j], 0, flen, DMA_TO_DEVICE);
unsigned long fofs = addr & ~PAGE_MASK;
if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto done;
}
ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
pages[j], kmap(pages[j]),
dma_addr);
pkt->naddr++;
addr += flen;
tlen -= flen;
}
done:
return ret;
}
static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq,
struct ipath_user_sdma_pkt *pkt,
const struct iovec *iov,
unsigned long niov)
{
int ret = 0;
unsigned long idx;
for (idx = 0; idx < niov; idx++) {
const int npages = ipath_user_sdma_num_pages(iov + idx);
const unsigned long addr = (unsigned long) iov[idx].iov_base;
ret = ipath_user_sdma_pin_pages(dd, pkt,
addr, iov[idx].iov_len,
npages);
if (ret < 0)
goto free_pkt;
}
goto done;
free_pkt:
for (idx = 0; idx < pkt->naddr; idx++)
ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
done:
return ret;
}
static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq,
struct ipath_user_sdma_pkt *pkt,
const struct iovec *iov,
unsigned long niov, int npages)
{
int ret = 0;
if (npages >= ARRAY_SIZE(pkt->addr))
ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
else
ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
return ret;
}
/* free a packet list -- return counter value of last packet */
static void ipath_user_sdma_free_pkt_list(struct device *dev,
struct ipath_user_sdma_queue *pq,
struct list_head *list)
{
struct ipath_user_sdma_pkt *pkt, *pkt_next;
list_for_each_entry_safe(pkt, pkt_next, list, list) {
int i;
for (i = 0; i < pkt->naddr; i++)
ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
kmem_cache_free(pq->pkt_slab, pkt);
}
}
/*
* copy headers, coalesce etc -- pq->lock must be held
*
* we queue all the packets to list, returning the
* number of bytes total. list must be empty initially,
* as, if there is an error we clean it...
*/
static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq,
struct list_head *list,
const struct iovec *iov,
unsigned long niov,
int maxpkts)
{
unsigned long idx = 0;
int ret = 0;
int npkts = 0;
struct page *page = NULL;
__le32 *pbc;
dma_addr_t dma_addr;
struct ipath_user_sdma_pkt *pkt = NULL;
size_t len;
size_t nw;
u32 counter = pq->counter;
int dma_mapped = 0;
while (idx < niov && npkts < maxpkts) {
const unsigned long addr = (unsigned long) iov[idx].iov_base;
const unsigned long idx_save = idx;
unsigned pktnw;
unsigned pktnwc;
int nfrags = 0;
int npages = 0;
int cfur;
dma_mapped = 0;
len = iov[idx].iov_len;
nw = len >> 2;
page = NULL;
pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
if (!pkt) {
ret = -ENOMEM;
goto free_list;
}
if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
len > PAGE_SIZE || len & 3 || addr & 3) {
ret = -EINVAL;
goto free_pkt;
}
if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
&dma_addr);
else
pbc = NULL;
if (!pbc) {
page = alloc_page(GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
goto free_pkt;
}
pbc = kmap(page);
}
cfur = copy_from_user(pbc, iov[idx].iov_base, len);
if (cfur) {
ret = -EFAULT;
goto free_pbc;
}
/*
* this assignment is a bit strange. it's because the
* the pbc counts the number of 32 bit words in the full
* packet _except_ the first word of the pbc itself...
*/
pktnwc = nw - 1;
/*
* pktnw computation yields the number of 32 bit words
* that the caller has indicated in the PBC. note that
* this is one less than the total number of words that
* goes to the send DMA engine as the first 32 bit word
* of the PBC itself is not counted. Armed with this count,
* we can verify that the packet is consistent with the
* iovec lengths.
*/
pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
ret = -EINVAL;
goto free_pbc;
}
idx++;
while (pktnwc < pktnw && idx < niov) {
const size_t slen = iov[idx].iov_len;
const unsigned long faddr =
(unsigned long) iov[idx].iov_base;
if (slen & 3 || faddr & 3 || !slen ||
slen > PAGE_SIZE) {
ret = -EINVAL;
goto free_pbc;
}
npages++;
if ((faddr & PAGE_MASK) !=
((faddr + slen - 1) & PAGE_MASK))
npages++;
pktnwc += slen >> 2;
idx++;
nfrags++;
}
if (pktnwc != pktnw) {
ret = -EINVAL;
goto free_pbc;
}
if (page) {
dma_addr = dma_map_page(&dd->pcidev->dev,
page, 0, len, DMA_TO_DEVICE);
if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto free_pbc;
}
dma_mapped = 1;
}
ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
page, pbc, dma_addr);
if (nfrags) {
ret = ipath_user_sdma_init_payload(dd, pq, pkt,
iov + idx_save + 1,
nfrags, npages);
if (ret < 0)
goto free_pbc_dma;
}
counter++;
npkts++;
list_add_tail(&pkt->list, list);
}
ret = idx;
goto done;
free_pbc_dma:
if (dma_mapped)
dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
free_pbc:
if (page) {
kunmap(page);
__free_page(page);
} else
dma_pool_free(pq->header_cache, pbc, dma_addr);
free_pkt:
kmem_cache_free(pq->pkt_slab, pkt);
free_list:
ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
done:
return ret;
}
static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
u32 c)
{
pq->sent_counter = c;
}
/* try to clean out queue -- needs pq->lock */
static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq)
{
struct list_head free_list;
struct ipath_user_sdma_pkt *pkt;
struct ipath_user_sdma_pkt *pkt_prev;
int ret = 0;
INIT_LIST_HEAD(&free_list);
list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
if (descd < 0)
break;
list_move_tail(&pkt->list, &free_list);
/* one more packet cleaned */
ret++;
}
if (!list_empty(&free_list)) {
u32 counter;
pkt = list_entry(free_list.prev,
struct ipath_user_sdma_pkt, list);
counter = pkt->counter;
ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
ipath_user_sdma_set_complete_counter(pq, counter);
}
return ret;
}
void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
{
if (!pq)
return;
kmem_cache_destroy(pq->pkt_slab);
dma_pool_destroy(pq->header_cache);
kfree(pq);
}
/* clean descriptor queue, returns > 0 if some elements cleaned */
static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
{
int ret;
unsigned long flags;
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
ret = ipath_sdma_make_progress(dd);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
return ret;
}
/* we're in close, drain packets so that we can cleanup successfully... */
void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq)
{
int i;
if (!pq)
return;
for (i = 0; i < 100; i++) {
mutex_lock(&pq->lock);
if (list_empty(&pq->sent)) {
mutex_unlock(&pq->lock);
break;
}
ipath_user_sdma_hwqueue_clean(dd);
ipath_user_sdma_queue_clean(dd, pq);
mutex_unlock(&pq->lock);
msleep(10);
}
if (!list_empty(&pq->sent)) {
struct list_head free_list;
printk(KERN_INFO "drain: lists not empty: forcing!\n");
INIT_LIST_HEAD(&free_list);
mutex_lock(&pq->lock);
list_splice_init(&pq->sent, &free_list);
ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
mutex_unlock(&pq->lock);
}
}
static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
u64 addr, u64 dwlen, u64 dwoffset)
{
return cpu_to_le64(/* SDmaPhyAddr[31:0] */
((addr & 0xfffffffcULL) << 32) |
/* SDmaGeneration[1:0] */
((dd->ipath_sdma_generation & 3ULL) << 30) |
/* SDmaDwordCount[10:0] */
((dwlen & 0x7ffULL) << 16) |
/* SDmaBufOffset[12:2] */
(dwoffset & 0x7ffULL));
}
static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
{
return descq | cpu_to_le64(1ULL << 12);
}
static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
{
/* last */ /* dma head */
return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
}
static inline __le64 ipath_sdma_make_desc1(u64 addr)
{
/* SDmaPhyAddr[47:32] */
return cpu_to_le64(addr >> 32);
}
static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
struct ipath_user_sdma_pkt *pkt, int idx,
unsigned ofs, u16 tail)
{
const u64 addr = (u64) pkt->addr[idx].addr +
(u64) pkt->addr[idx].offset;
const u64 dwlen = (u64) pkt->addr[idx].length / 4;
__le64 *descqp;
__le64 descq0;
descqp = &dd->ipath_sdma_descq[tail].qw[0];
descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
if (idx == 0)
descq0 = ipath_sdma_make_first_desc0(descq0);
if (idx == pkt->naddr - 1)
descq0 = ipath_sdma_make_last_desc0(descq0);
descqp[0] = descq0;
descqp[1] = ipath_sdma_make_desc1(addr);
}
/* pq->lock must be held, get packets on the wire... */
static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq,
struct list_head *pktlist)
{
int ret = 0;
unsigned long flags;
u16 tail;
if (list_empty(pktlist))
return 0;
if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
return -ECOMM;
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
ret = -ECOMM;
goto unlock;
}
tail = dd->ipath_sdma_descq_tail;
while (!list_empty(pktlist)) {
struct ipath_user_sdma_pkt *pkt =
list_entry(pktlist->next, struct ipath_user_sdma_pkt,
list);
int i;
unsigned ofs = 0;
u16 dtail = tail;
if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
goto unlock_check_tail;
for (i = 0; i < pkt->naddr; i++) {
ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
ofs += pkt->addr[i].length >> 2;
if (++tail == dd->ipath_sdma_descq_cnt) {
tail = 0;
++dd->ipath_sdma_generation;
}
}
if ((ofs<<2) > dd->ipath_ibmaxlen) {
ipath_dbg("packet size %X > ibmax %X, fail\n",
ofs<<2, dd->ipath_ibmaxlen);
ret = -EMSGSIZE;
goto unlock;
}
/*
* if the packet is >= 2KB mtu equivalent, we have to use
* the large buffers, and have to mark each descriptor as
* part of a large buffer packet.
*/
if (ofs >= IPATH_SMALLBUF_DWORDS) {
for (i = 0; i < pkt->naddr; i++) {
dd->ipath_sdma_descq[dtail].qw[0] |=
cpu_to_le64(1ULL << 14);
if (++dtail == dd->ipath_sdma_descq_cnt)
dtail = 0;
}
}
dd->ipath_sdma_descq_added += pkt->naddr;
pkt->added = dd->ipath_sdma_descq_added;
list_move_tail(&pkt->list, &pq->sent);
ret++;
}
unlock_check_tail:
/* advance the tail on the chip if necessary */
if (dd->ipath_sdma_descq_tail != tail) {
wmb();
ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
dd->ipath_sdma_descq_tail = tail;
}
unlock:
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
return ret;
}
int ipath_user_sdma_writev(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq,
const struct iovec *iov,
unsigned long dim)
{
int ret = 0;
struct list_head list;
int npkts = 0;
INIT_LIST_HEAD(&list);
mutex_lock(&pq->lock);
if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
ipath_user_sdma_hwqueue_clean(dd);
ipath_user_sdma_queue_clean(dd, pq);
}
while (dim) {
const int mxp = 8;
ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
if (ret <= 0)
goto done_unlock;
else {
dim -= ret;
iov += ret;
}
/* force packets onto the sdma hw queue... */
if (!list_empty(&list)) {
/*
* lazily clean hw queue. the 4 is a guess of about
* how many sdma descriptors a packet will take (it
* doesn't have to be perfect).
*/
if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
ipath_user_sdma_hwqueue_clean(dd);
ipath_user_sdma_queue_clean(dd, pq);
}
ret = ipath_user_sdma_push_pkts(dd, pq, &list);
if (ret < 0)
goto done_unlock;
else {
npkts += ret;
pq->counter += ret;
if (!list_empty(&list))
goto done_unlock;
}
}
}
done_unlock:
if (!list_empty(&list))
ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
mutex_unlock(&pq->lock);
return (ret < 0) ? ret : npkts;
}
int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq)
{
int ret = 0;
mutex_lock(&pq->lock);
ipath_user_sdma_hwqueue_clean(dd);
ret = ipath_user_sdma_queue_clean(dd, pq);
mutex_unlock(&pq->lock);
return ret;
}
u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
{
return pq->sent_counter;
}
u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
{
return pq->counter;
}

View File

@@ -1,52 +0,0 @@
/*
* Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/device.h>
struct ipath_user_sdma_queue;
struct ipath_user_sdma_queue *
ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
int ipath_user_sdma_writev(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq,
const struct iovec *iov,
unsigned long dim);
int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq);
void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
struct ipath_user_sdma_queue *pq);
u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);

File diff suppressed because it is too large Load Diff

View File

@@ -1,939 +0,0 @@
/*
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IPATH_VERBS_H
#define IPATH_VERBS_H
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
#include "ipath_kernel.h"
#define IPATH_MAX_RDMA_ATOMIC 4
#define QPN_MAX (1 << 24)
#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
/*
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define IPATH_UVERBS_ABI_VERSION 2
/*
* Define an ib_cq_notify value that is not valid so we know when CQ
* notifications are armed.
*/
#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
/* AETH NAK opcode values */
#define IB_RNR_NAK 0x20
#define IB_NAK_PSN_ERROR 0x60
#define IB_NAK_INVALID_REQUEST 0x61
#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
#define IB_NAK_INVALID_RD_REQUEST 0x64
/* Flags for checking QP state (see ib_ipath_state_ops[]) */
#define IPATH_POST_SEND_OK 0x01
#define IPATH_POST_RECV_OK 0x02
#define IPATH_PROCESS_RECV_OK 0x04
#define IPATH_PROCESS_SEND_OK 0x08
#define IPATH_PROCESS_NEXT_SEND_OK 0x10
#define IPATH_FLUSH_SEND 0x20
#define IPATH_FLUSH_RECV 0x40
#define IPATH_PROCESS_OR_FLUSH_SEND \
(IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
/* Mandatory IB performance counter select values. */
#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001)
#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002)
#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003)
#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004)
#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005)
struct ib_reth {
__be64 vaddr;
__be32 rkey;
__be32 length;
} __attribute__ ((packed));
struct ib_atomic_eth {
__be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
} __attribute__ ((packed));
struct ipath_other_headers {
__be32 bth[3];
union {
struct {
__be32 deth[2];
__be32 imm_data;
} ud;
struct {
struct ib_reth reth;
__be32 imm_data;
} rc;
struct {
__be32 aeth;
__be32 atomic_ack_eth[2];
} at;
__be32 imm_data;
__be32 aeth;
struct ib_atomic_eth atomic_eth;
} u;
} __attribute__ ((packed));
/*
* Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
* long (72 w/ imm_data). Only the first 56 bytes of the IB header
* will be in the eager header buffer. The remaining 12 or 16 bytes
* are in the data buffer.
*/
struct ipath_ib_header {
__be16 lrh[4];
union {
struct {
struct ib_grh grh;
struct ipath_other_headers oth;
} l;
struct ipath_other_headers oth;
} u;
} __attribute__ ((packed));
struct ipath_pio_header {
__le32 pbc[2];
struct ipath_ib_header hdr;
} __attribute__ ((packed));
/*
* There is one struct ipath_mcast for each multicast GID.
* All attached QPs are then stored as a list of
* struct ipath_mcast_qp.
*/
struct ipath_mcast_qp {
struct list_head list;
struct ipath_qp *qp;
};
struct ipath_mcast {
struct rb_node rb_node;
union ib_gid mgid;
struct list_head qp_list;
wait_queue_head_t wait;
atomic_t refcount;
int n_attached;
};
/* Protection domain */
struct ipath_pd {
struct ib_pd ibpd;
int user; /* non-zero if created from user space */
};
/* Address Handle */
struct ipath_ah {
struct ib_ah ibah;
struct ib_ah_attr attr;
};
/*
* This structure is used by ipath_mmap() to validate an offset
* when an mmap() request is made. The vm_area_struct then uses
* this as its vm_private_data.
*/
struct ipath_mmap_info {
struct list_head pending_mmaps;
struct ib_ucontext *context;
void *obj;
__u64 offset;
struct kref ref;
unsigned size;
};
/*
* This structure is used to contain the head pointer, tail pointer,
* and completion queue entries as a single memory allocation so
* it can be mmap'ed into user space.
*/
struct ipath_cq_wc {
u32 head; /* index of next entry to fill */
u32 tail; /* index of next ib_poll_cq() entry */
union {
/* these are actually size ibcq.cqe + 1 */
struct ib_uverbs_wc uqueue[0];
struct ib_wc kqueue[0];
};
};
/*
* The completion queue structure.
*/
struct ipath_cq {
struct ib_cq ibcq;
struct tasklet_struct comptask;
spinlock_t lock;
u8 notify;
u8 triggered;
struct ipath_cq_wc *queue;
struct ipath_mmap_info *ip;
};
/*
* A segment is a linear region of low physical memory.
* XXX Maybe we should use phys addr here and kmap()/kunmap().
* Used by the verbs layer.
*/
struct ipath_seg {
void *vaddr;
size_t length;
};
/* The number of ipath_segs that fit in a page. */
#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
struct ipath_segarray {
struct ipath_seg segs[IPATH_SEGSZ];
};
struct ipath_mregion {
struct ib_pd *pd; /* shares refcnt of ibmr.pd */
u64 user_base; /* User's address for this region */
u64 iova; /* IB start address of this region */
size_t length;
u32 lkey;
u32 offset; /* offset (bytes) to start of region */
int access_flags;
u32 max_segs; /* number of ipath_segs in all the arrays */
u32 mapsz; /* size of the map array */
struct ipath_segarray *map[0]; /* the segments */
};
/*
* These keep track of the copy progress within a memory region.
* Used by the verbs layer.
*/
struct ipath_sge {
struct ipath_mregion *mr;
void *vaddr; /* kernel virtual address of segment */
u32 sge_length; /* length of the SGE */
u32 length; /* remaining length of the segment */
u16 m; /* current index: mr->map[m] */
u16 n; /* current index: mr->map[m]->segs[n] */
};
/* Memory region */
struct ipath_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
struct ipath_mregion mr; /* must be last */
};
/*
* Send work request queue entry.
* The size of the sg_list is determined when the QP is created and stored
* in qp->s_max_sge.
*/
struct ipath_swqe {
struct ib_send_wr wr; /* don't use wr.sg_list */
u32 psn; /* first packet sequence number */
u32 lpsn; /* last packet sequence number */
u32 ssn; /* send sequence number */
u32 length; /* total length of data in sg_list */
struct ipath_sge sg_list[0];
};
/*
* Receive work request queue entry.
* The size of the sg_list is determined when the QP (or SRQ) is created
* and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
*/
struct ipath_rwqe {
u64 wr_id;
u8 num_sge;
struct ib_sge sg_list[0];
};
/*
* This structure is used to contain the head pointer, tail pointer,
* and receive work queue entries as a single memory allocation so
* it can be mmap'ed into user space.
* Note that the wq array elements are variable size so you can't
* just index into the array to get the N'th element;
* use get_rwqe_ptr() instead.
*/
struct ipath_rwq {
u32 head; /* new work requests posted to the head */
u32 tail; /* receives pull requests from here. */
struct ipath_rwqe wq[0];
};
struct ipath_rq {
struct ipath_rwq *wq;
spinlock_t lock;
u32 size; /* size of RWQE array */
u8 max_sge;
};
struct ipath_srq {
struct ib_srq ibsrq;
struct ipath_rq rq;
struct ipath_mmap_info *ip;
/* send signal when number of RWQEs < limit */
u32 limit;
};
struct ipath_sge_state {
struct ipath_sge *sg_list; /* next SGE to be used if any */
struct ipath_sge sge; /* progress state for the current SGE */
u8 num_sge;
u8 static_rate;
};
/*
* This structure holds the information that the send tasklet needs
* to send a RDMA read response or atomic operation.
*/
struct ipath_ack_entry {
u8 opcode;
u8 sent;
u32 psn;
union {
struct ipath_sge_state rdma_sge;
u64 atomic_data;
};
};
/*
* Variables prefixed with s_ are for the requester (sender).
* Variables prefixed with r_ are for the responder (receiver).
* Variables prefixed with ack_ are for responder replies.
*
* Common variables are protected by both r_rq.lock and s_lock in that order
* which only happens in modify_qp() or changing the QP 'state'.
*/
struct ipath_qp {
struct ib_qp ibqp;
struct ipath_qp *next; /* link list for QPN hash table */
struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
struct list_head piowait; /* link for wait PIO buf */
struct list_head timerwait; /* link for waiting for timeouts */
struct ib_ah_attr remote_ah_attr;
struct ipath_ib_header s_hdr; /* next packet header to send */
atomic_t refcount;
wait_queue_head_t wait;
wait_queue_head_t wait_dma;
struct tasklet_struct s_task;
struct ipath_mmap_info *ip;
struct ipath_sge_state *s_cur_sge;
struct ipath_verbs_txreq *s_tx;
struct ipath_sge_state s_sge; /* current send request data */
struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
struct ipath_sge_state s_ack_rdma_sge;
struct ipath_sge_state s_rdma_read_sge;
struct ipath_sge_state r_sge; /* current receive data */
spinlock_t s_lock;
atomic_t s_dma_busy;
u16 s_pkt_delay;
u16 s_hdrwords; /* size of s_hdr in 32 bit words */
u32 s_cur_size; /* size of send packet in bytes */
u32 s_len; /* total length of s_sge */
u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
u32 s_next_psn; /* PSN for next request */
u32 s_last_psn; /* last response PSN processed */
u32 s_psn; /* current packet sequence number */
u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
u64 r_wr_id; /* ID for current receive WQE */
unsigned long r_aflags;
u32 r_len; /* total length of r_sge */
u32 r_rcv_len; /* receive data len processed */
u32 r_psn; /* expected rcv packet sequence number */
u32 r_msn; /* message sequence number */
u8 state; /* QP state */
u8 s_state; /* opcode of last packet sent */
u8 s_ack_state; /* opcode of packet to ACK */
u8 s_nak_state; /* non-zero if NAK is pending */
u8 r_state; /* opcode of last packet received */
u8 r_nak_state; /* non-zero if NAK is pending */
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_flags;
u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
u8 r_head_ack_queue; /* index into s_ack_queue[] */
u8 qp_access_flags;
u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_pkey_index; /* PKEY index to use */
u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
u8 s_tail_ack_queue; /* index into s_ack_queue[] */
u8 s_flags;
u8 s_dmult;
u8 s_draining;
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
u32 remote_qpn;
u32 qkey; /* QKEY for this QP (for UD or RD) */
u32 s_size; /* send work queue size */
u32 s_head; /* new entries added here */
u32 s_tail; /* next entry to process */
u32 s_cur; /* current work queue entry */
u32 s_last; /* last un-ACK'ed entry */
u32 s_ssn; /* SSN of tail entry */
u32 s_lsn; /* limit sequence number (credit) */
struct ipath_swqe *s_wq; /* send work queue */
struct ipath_swqe *s_wqe;
struct ipath_sge *r_ud_sg_list;
struct ipath_rq r_rq; /* receive work queue */
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
/*
* Atomic bit definitions for r_aflags.
*/
#define IPATH_R_WRID_VALID 0
/*
* Bit definitions for r_flags.
*/
#define IPATH_R_REUSE_SGE 0x01
#define IPATH_R_RDMAR_SEQ 0x02
/*
* Bit definitions for s_flags.
*
* IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
* before processing the next SWQE
* IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
* before processing the next SWQE
* IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
* IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
* IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
* next send completion entry not via send DMA.
*/
#define IPATH_S_SIGNAL_REQ_WR 0x01
#define IPATH_S_FENCE_PENDING 0x02
#define IPATH_S_RDMAR_PENDING 0x04
#define IPATH_S_ACK_PENDING 0x08
#define IPATH_S_BUSY 0x10
#define IPATH_S_WAITING 0x20
#define IPATH_S_WAIT_SSN_CREDIT 0x40
#define IPATH_S_WAIT_DMA 0x80
#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
#define IPATH_PSN_CREDIT 512
/*
* Since struct ipath_swqe is not a fixed size, we can't simply index into
* struct ipath_qp.s_wq. This function does the array index computation.
*/
static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
unsigned n)
{
return (struct ipath_swqe *)((char *)qp->s_wq +
(sizeof(struct ipath_swqe) +
qp->s_max_sge *
sizeof(struct ipath_sge)) * n);
}
/*
* Since struct ipath_rwqe is not a fixed size, we can't simply index into
* struct ipath_rwq.wq. This function does the array index computation.
*/
static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
unsigned n)
{
return (struct ipath_rwqe *)
((char *) rq->wq->wq +
(sizeof(struct ipath_rwqe) +
rq->max_sge * sizeof(struct ib_sge)) * n);
}
/*
* QPN-map pages start out as NULL, they get allocated upon
* first use and are never deallocated. This way,
* large bitmaps are not allocated unless large numbers of QPs are used.
*/
struct qpn_map {
atomic_t n_free;
void *page;
};
struct ipath_qp_table {
spinlock_t lock;
u32 last; /* last QP number allocated */
u32 max; /* size of the hash table */
u32 nmaps; /* size of the map table */
struct ipath_qp **table;
/* bit map of free numbers */
struct qpn_map map[QPNMAP_ENTRIES];
};
struct ipath_lkey_table {
spinlock_t lock;
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
u32 max; /* size of the table */
struct ipath_mregion **table;
};
struct ipath_opcode_stats {
u64 n_packets; /* number of packets */
u64 n_bytes; /* total number of bytes */
};
struct ipath_ibdev {
struct ib_device ibdev;
struct ipath_devdata *dd;
struct list_head pending_mmaps;
spinlock_t mmap_offset_lock;
u32 mmap_offset;
int ib_unit; /* This is the device number */
u16 sm_lid; /* in host order */
u8 sm_sl;
u8 mkeyprot;
/* non-zero when timer is set */
unsigned long mkey_lease_timeout;
/* The following fields are really per port. */
struct ipath_qp_table qp_table;
struct ipath_lkey_table lk_table;
struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */
struct list_head piowait; /* list for wait PIO buf */
struct list_head txreq_free;
void *txreq_bufs;
/* list of QPs waiting for RNR timer */
struct list_head rnrwait;
spinlock_t pending_lock;
__be64 sys_image_guid; /* in network order */
__be64 gid_prefix; /* in network order */
__be64 mkey;
u32 n_pds_allocated; /* number of PDs allocated for device */
spinlock_t n_pds_lock;
u32 n_ahs_allocated; /* number of AHs allocated for device */
spinlock_t n_ahs_lock;
u32 n_cqs_allocated; /* number of CQs allocated for device */
spinlock_t n_cqs_lock;
u32 n_qps_allocated; /* number of QPs allocated for device */
spinlock_t n_qps_lock;
u32 n_srqs_allocated; /* number of SRQs allocated for device */
spinlock_t n_srqs_lock;
u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
spinlock_t n_mcast_grps_lock;
u64 ipath_sword; /* total dwords sent (sample result) */
u64 ipath_rword; /* total dwords received (sample result) */
u64 ipath_spkts; /* total packets sent (sample result) */
u64 ipath_rpkts; /* total packets received (sample result) */
/* # of ticks no data sent (sample result) */
u64 ipath_xmit_wait;
u64 rcv_errors; /* # of packets with SW detected rcv errs */
u64 n_unicast_xmit; /* total unicast packets sent */
u64 n_unicast_rcv; /* total unicast packets received */
u64 n_multicast_xmit; /* total multicast packets sent */
u64 n_multicast_rcv; /* total multicast packets received */
u64 z_symbol_error_counter; /* starting count for PMA */
u64 z_link_error_recovery_counter; /* starting count for PMA */
u64 z_link_downed_counter; /* starting count for PMA */
u64 z_port_rcv_errors; /* starting count for PMA */
u64 z_port_rcv_remphys_errors; /* starting count for PMA */
u64 z_port_xmit_discards; /* starting count for PMA */
u64 z_port_xmit_data; /* starting count for PMA */
u64 z_port_rcv_data; /* starting count for PMA */
u64 z_port_xmit_packets; /* starting count for PMA */
u64 z_port_rcv_packets; /* starting count for PMA */
u32 z_pkey_violations; /* starting count for PMA */
u32 z_local_link_integrity_errors; /* starting count for PMA */
u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
u32 z_vl15_dropped; /* starting count for PMA */
u32 n_rc_resends;
u32 n_rc_acks;
u32 n_rc_qacks;
u32 n_seq_naks;
u32 n_rdma_seq;
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
u32 n_pkt_drops;
u32 n_vl15_dropped;
u32 n_wqe_errs;
u32 n_rdma_dup_busy;
u32 n_piowait;
u32 n_unaligned;
u32 port_cap_flags;
u32 pma_sample_start;
u32 pma_sample_interval;
__be16 pma_counter_select[5];
u16 pma_tag;
u16 qkey_violations;
u16 mkey_violations;
u16 mkey_lease_period;
u16 pending_index; /* which pending queue is active */
u8 pma_sample_status;
u8 subnet_timeout;
u8 vl_high_limit;
struct ipath_opcode_stats opstats[128];
};
struct ipath_verbs_counters {
u64 symbol_error_counter;
u64 link_error_recovery_counter;
u64 link_downed_counter;
u64 port_rcv_errors;
u64 port_rcv_remphys_errors;
u64 port_xmit_discards;
u64 port_xmit_data;
u64 port_rcv_data;
u64 port_xmit_packets;
u64 port_rcv_packets;
u32 local_link_integrity_errors;
u32 excessive_buffer_overrun_errors;
u32 vl15_dropped;
};
struct ipath_verbs_txreq {
struct ipath_qp *qp;
struct ipath_swqe *wqe;
u32 map_len;
u32 len;
struct ipath_sge_state *ss;
struct ipath_pio_header hdr;
struct ipath_sdma_txreq txreq;
};
static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct ipath_mr, ibmr);
}
static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct ipath_pd, ibpd);
}
static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
{
return container_of(ibah, struct ipath_ah, ibah);
}
static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct ipath_cq, ibcq);
}
static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
{
return container_of(ibsrq, struct ipath_srq, ibsrq);
}
static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct ipath_qp, ibqp);
}
static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
{
return container_of(ibdev, struct ipath_ibdev, ibdev);
}
/*
* This must be called with s_lock held.
*/
static inline void ipath_schedule_send(struct ipath_qp *qp)
{
if (qp->s_flags & IPATH_S_ANY_WAIT)
qp->s_flags &= ~IPATH_S_ANY_WAIT;
if (!(qp->s_flags & IPATH_S_BUSY))
tasklet_hi_schedule(&qp->s_task);
}
int ipath_process_mad(struct ib_device *ibdev,
int mad_flags,
u8 port_num,
const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
struct ib_mad_hdr *out, size_t *out_mad_size,
u16 *out_mad_pkey_index);
/*
* Compare the lower 24 bits of the two values.
* Returns an integer <, ==, or > than zero.
*/
static inline int ipath_cmp24(u32 a, u32 b)
{
return (((int) a) - ((int) b)) << 8;
}
struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
u64 *rwords, u64 *spkts, u64 *rpkts,
u64 *xmit_wait);
int ipath_get_counters(struct ipath_devdata *dd,
struct ipath_verbs_counters *cntrs);
int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int ipath_mcast_tree_empty(void);
__be32 ipath_compute_aeth(struct ipath_qp *qp);
struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int ipath_destroy_qp(struct ib_qp *ibqp);
int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr);
unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
u32 hdrwords, struct ipath_sge_state *ss, u32 len);
void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
struct ipath_mregion *mr);
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask,
struct ib_udata *udata);
int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
int ipath_destroy_srq(struct ib_srq *ibsrq);
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
struct ib_cq *ipath_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata);
int ipath_destroy_cq(struct ib_cq *ibcq);
int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *buffer_list,
int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
int ipath_dereg_mr(struct ib_mr *ibmr);
struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr);
int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
int list_len, u64 iova);
int ipath_unmap_fmr(struct list_head *fmr_list);
int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
void ipath_release_mmap_info(struct kref *ref);
struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
u32 size,
struct ib_ucontext *context,
void *obj);
void ipath_update_mmap_info(struct ipath_ibdev *dev,
struct ipath_mmap_info *ip,
u32 size, void *obj);
int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void ipath_insert_rnr_queue(struct ipath_qp *qp);
int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
u32 *lengthp, struct ipath_sge_state *ss);
int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
struct ipath_other_headers *ohdr,
u32 bth0, u32 bth2);
void ipath_do_send(unsigned long data);
void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
enum ib_wc_status status);
int ipath_make_rc_req(struct ipath_qp *qp);
int ipath_make_uc_req(struct ipath_qp *qp);
int ipath_make_ud_req(struct ipath_qp *qp);
int ipath_register_ib_device(struct ipath_devdata *);
void ipath_unregister_ib_device(struct ipath_ibdev *);
void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
int ipath_ib_piobufavail(struct ipath_ibdev *);
unsigned ipath_get_npkeys(struct ipath_devdata *);
u32 ipath_get_cr_errpkey(struct ipath_devdata *);
unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
/*
* Below converts HCA-specific LinkTrainingState to IB PhysPortState
* values.
*/
extern const u8 ipath_cvt_physportstate[];
#define IB_PHYSPORTSTATE_SLEEP 1
#define IB_PHYSPORTSTATE_POLL 2
#define IB_PHYSPORTSTATE_DISABLED 3
#define IB_PHYSPORTSTATE_CFG_TRAIN 4
#define IB_PHYSPORTSTATE_LINKUP 5
#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
extern const int ib_ipath_state_ops[];
extern unsigned int ib_ipath_lkey_table_size;
extern unsigned int ib_ipath_max_cqes;
extern unsigned int ib_ipath_max_cqs;
extern unsigned int ib_ipath_max_qp_wrs;
extern unsigned int ib_ipath_max_qps;
extern unsigned int ib_ipath_max_sges;
extern unsigned int ib_ipath_max_mcast_grps;
extern unsigned int ib_ipath_max_mcast_qp_attached;
extern unsigned int ib_ipath_max_srqs;
extern unsigned int ib_ipath_max_srq_sges;
extern unsigned int ib_ipath_max_srq_wrs;
extern const u32 ib_ipath_rnr_table[];
extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
#endif /* IPATH_VERBS_H */

View File

@@ -1,364 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/rculist.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include "ipath_verbs.h"
/*
* Global table of GID to attached QPs.
* The table is global to all ipath devices since a send from one QP/device
* needs to be locally routed to any locally attached QPs on the same
* or different device.
*/
static struct rb_root mcast_tree;
static DEFINE_SPINLOCK(mcast_lock);
/**
* ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
* @qp: the QP to link
*/
static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
{
struct ipath_mcast_qp *mqp;
mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
if (!mqp)
goto bail;
mqp->qp = qp;
atomic_inc(&qp->refcount);
bail:
return mqp;
}
static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
{
struct ipath_qp *qp = mqp->qp;
/* Notify ipath_destroy_qp() if it is waiting. */
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
kfree(mqp);
}
/**
* ipath_mcast_alloc - allocate the multicast GID structure
* @mgid: the multicast GID
*
* A list of QPs will be attached to this structure.
*/
static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
{
struct ipath_mcast *mcast;
mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
if (!mcast)
goto bail;
mcast->mgid = *mgid;
INIT_LIST_HEAD(&mcast->qp_list);
init_waitqueue_head(&mcast->wait);
atomic_set(&mcast->refcount, 0);
mcast->n_attached = 0;
bail:
return mcast;
}
static void ipath_mcast_free(struct ipath_mcast *mcast)
{
struct ipath_mcast_qp *p, *tmp;
list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
ipath_mcast_qp_free(p);
kfree(mcast);
}
/**
* ipath_mcast_find - search the global table for the given multicast GID
* @mgid: the multicast GID to search for
*
* Returns NULL if not found.
*
* The caller is responsible for decrementing the reference count if found.
*/
struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
{
struct rb_node *n;
unsigned long flags;
struct ipath_mcast *mcast;
spin_lock_irqsave(&mcast_lock, flags);
n = mcast_tree.rb_node;
while (n) {
int ret;
mcast = rb_entry(n, struct ipath_mcast, rb_node);
ret = memcmp(mgid->raw, mcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0)
n = n->rb_left;
else if (ret > 0)
n = n->rb_right;
else {
atomic_inc(&mcast->refcount);
spin_unlock_irqrestore(&mcast_lock, flags);
goto bail;
}
}
spin_unlock_irqrestore(&mcast_lock, flags);
mcast = NULL;
bail:
return mcast;
}
/**
* ipath_mcast_add - insert mcast GID into table and attach QP struct
* @mcast: the mcast GID table
* @mqp: the QP to attach
*
* Return zero if both were added. Return EEXIST if the GID was already in
* the table but the QP was added. Return ESRCH if the QP was already
* attached and neither structure was added.
*/
static int ipath_mcast_add(struct ipath_ibdev *dev,
struct ipath_mcast *mcast,
struct ipath_mcast_qp *mqp)
{
struct rb_node **n = &mcast_tree.rb_node;
struct rb_node *pn = NULL;
int ret;
spin_lock_irq(&mcast_lock);
while (*n) {
struct ipath_mcast *tmcast;
struct ipath_mcast_qp *p;
pn = *n;
tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0) {
n = &pn->rb_left;
continue;
}
if (ret > 0) {
n = &pn->rb_right;
continue;
}
/* Search the QP list to see if this is already there. */
list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
if (p->qp == mqp->qp) {
ret = ESRCH;
goto bail;
}
}
if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
ret = ENOMEM;
goto bail;
}
tmcast->n_attached++;
list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
ret = EEXIST;
goto bail;
}
spin_lock(&dev->n_mcast_grps_lock);
if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
spin_unlock(&dev->n_mcast_grps_lock);
ret = ENOMEM;
goto bail;
}
dev->n_mcast_grps_allocated++;
spin_unlock(&dev->n_mcast_grps_lock);
mcast->n_attached++;
list_add_tail_rcu(&mqp->list, &mcast->qp_list);
atomic_inc(&mcast->refcount);
rb_link_node(&mcast->rb_node, pn, n);
rb_insert_color(&mcast->rb_node, &mcast_tree);
ret = 0;
bail:
spin_unlock_irq(&mcast_lock);
return ret;
}
int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_mcast *mcast;
struct ipath_mcast_qp *mqp;
int ret;
/*
* Allocate data structures since its better to do this outside of
* spin locks and it will most likely be needed.
*/
mcast = ipath_mcast_alloc(gid);
if (mcast == NULL) {
ret = -ENOMEM;
goto bail;
}
mqp = ipath_mcast_qp_alloc(qp);
if (mqp == NULL) {
ipath_mcast_free(mcast);
ret = -ENOMEM;
goto bail;
}
switch (ipath_mcast_add(dev, mcast, mqp)) {
case ESRCH:
/* Neither was used: can't attach the same QP twice. */
ipath_mcast_qp_free(mqp);
ipath_mcast_free(mcast);
ret = -EINVAL;
goto bail;
case EEXIST: /* The mcast wasn't used */
ipath_mcast_free(mcast);
break;
case ENOMEM:
/* Exceeded the maximum number of mcast groups. */
ipath_mcast_qp_free(mqp);
ipath_mcast_free(mcast);
ret = -ENOMEM;
goto bail;
default:
break;
}
ret = 0;
bail:
return ret;
}
int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct ipath_qp *qp = to_iqp(ibqp);
struct ipath_ibdev *dev = to_idev(ibqp->device);
struct ipath_mcast *mcast = NULL;
struct ipath_mcast_qp *p, *tmp;
struct rb_node *n;
int last = 0;
int ret;
spin_lock_irq(&mcast_lock);
/* Find the GID in the mcast table. */
n = mcast_tree.rb_node;
while (1) {
if (n == NULL) {
spin_unlock_irq(&mcast_lock);
ret = -EINVAL;
goto bail;
}
mcast = rb_entry(n, struct ipath_mcast, rb_node);
ret = memcmp(gid->raw, mcast->mgid.raw,
sizeof(union ib_gid));
if (ret < 0)
n = n->rb_left;
else if (ret > 0)
n = n->rb_right;
else
break;
}
/* Search the QP list. */
list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
if (p->qp != qp)
continue;
/*
* We found it, so remove it, but don't poison the forward
* link until we are sure there are no list walkers.
*/
list_del_rcu(&p->list);
mcast->n_attached--;
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
rb_erase(&mcast->rb_node, &mcast_tree);
last = 1;
}
break;
}
spin_unlock_irq(&mcast_lock);
if (p) {
/*
* Wait for any list walkers to finish before freeing the
* list element.
*/
wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
ipath_mcast_qp_free(p);
}
if (last) {
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
ipath_mcast_free(mcast);
spin_lock_irq(&dev->n_mcast_grps_lock);
dev->n_mcast_grps_allocated--;
spin_unlock_irq(&dev->n_mcast_grps_lock);
}
ret = 0;
bail:
return ret;
}
int ipath_mcast_tree_empty(void)
{
return mcast_tree.rb_node == NULL;
}

View File

@@ -1,49 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
* This file is conditionally built on PowerPC only. Otherwise weak symbol
* versions of the functions exported from here are used.
*/
#include "ipath_kernel.h"
/**
* ipath_enable_wc - enable write combining for MMIO writes to the device
* @dd: infinipath device
*
* Nothing to do on PowerPC, so just return without error.
*/
int ipath_enable_wc(struct ipath_devdata *dd)
{
return 0;
}

View File

@@ -1,144 +0,0 @@
/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*
* This file is conditionally built on x86_64 only. Otherwise weak symbol
* versions of the functions exported from here are used.
*/
#include <linux/pci.h>
#include <asm/processor.h>
#include "ipath_kernel.h"
/**
* ipath_enable_wc - enable write combining for MMIO writes to the device
* @dd: infinipath device
*
* This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
* write combining.
*/
int ipath_enable_wc(struct ipath_devdata *dd)
{
int ret = 0;
u64 pioaddr, piolen;
unsigned bits;
const unsigned long addr = pci_resource_start(dd->pcidev, 0);
const size_t len = pci_resource_len(dd->pcidev, 0);
/*
* Set the PIO buffers to be WCCOMB, so we get HT bursts to the
* chip. Linux (possibly the hardware) requires it to be on a power
* of 2 address matching the length (which has to be a power of 2).
* For rev1, that means the base address, for rev2, it will be just
* the PIO buffers themselves.
* For chips with two sets of buffers, the calculations are
* somewhat more complicated; we need to sum, and the piobufbase
* register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
* The buffers are still packed, so a single range covers both.
*/
if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
unsigned long pio2kbase, pio4kbase;
pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
if (pio2kbase < pio4kbase) { /* all, for now */
pioaddr = addr + pio2kbase;
piolen = pio4kbase - pio2kbase +
dd->ipath_piobcnt4k * dd->ipath_4kalign;
} else {
pioaddr = addr + pio4kbase;
piolen = pio2kbase - pio4kbase +
dd->ipath_piobcnt2k * dd->ipath_palign;
}
} else { /* single buffer size (2K, currently) */
pioaddr = addr + dd->ipath_piobufbase;
piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
dd->ipath_piobcnt4k * dd->ipath_4kalign;
}
for (bits = 0; !(piolen & (1ULL << bits)); bits++)
/* do nothing */ ;
if (piolen != (1ULL << bits)) {
piolen >>= bits;
while (piolen >>= 1)
bits++;
piolen = 1ULL << (bits + 1);
}
if (pioaddr & (piolen - 1)) {
u64 atmp;
ipath_dbg("pioaddr %llx not on right boundary for size "
"%llx, fixing\n",
(unsigned long long) pioaddr,
(unsigned long long) piolen);
atmp = pioaddr & ~(piolen - 1);
if (atmp < addr || (atmp + piolen) > (addr + len)) {
ipath_dev_err(dd, "No way to align address/size "
"(%llx/%llx), no WC mtrr\n",
(unsigned long long) atmp,
(unsigned long long) piolen << 1);
ret = -ENODEV;
} else {
ipath_dbg("changing WC base from %llx to %llx, "
"len from %llx to %llx\n",
(unsigned long long) pioaddr,
(unsigned long long) atmp,
(unsigned long long) piolen,
(unsigned long long) piolen << 1);
pioaddr = atmp;
piolen <<= 1;
}
}
if (!ret) {
dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
if (dd->wc_cookie < 0) {
ipath_dev_err(dd, "Seting mtrr failed on PIO buffers\n");
ret = -ENODEV;
} else if (dd->wc_cookie == 0)
ipath_cdbg(VERBOSE, "Set mtrr for chip to WC not needed\n");
else
ipath_cdbg(VERBOSE, "Set mtrr for chip to WC\n");
}
return ret;
}
/**
* ipath_disable_wc - disable write combining for MMIO writes to the device
* @dd: infinipath device
*/
void ipath_disable_wc(struct ipath_devdata *dd)
{
arch_phys_wc_del(dd->wc_cookie);
}

View File

@@ -89,7 +89,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = ah_attr->grh.sgid_index;
ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
@@ -148,9 +148,13 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
enum rdma_link_layer ll;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
if (ll == IB_LINK_LAYER_ETHERNET)
ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29;
else
ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
if (ah->av.ib.stat_rate)
ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;

View File

@@ -638,7 +638,7 @@ static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries,
* simulated FLUSH_ERR completions
*/
list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) {
mlx4_ib_qp_sw_comp(qp, num_entries, wc, npolled, 1);
mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 1);
if (*npolled >= num_entries)
goto out;
}

View File

@@ -580,7 +580,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
list.length = sizeof (struct mlx4_rcv_tunnel_mad);
list.lkey = tun_ctx->mr->lkey;
list.lkey = tun_ctx->pd->local_dma_lkey;
wr.wr.ud.ah = ah;
wr.wr.ud.port_num = port;
@@ -1133,7 +1133,7 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
sg_list.addr = tun_qp->ring[index].map;
sg_list.length = size;
sg_list.lkey = ctx->mr->lkey;
sg_list.lkey = ctx->pd->local_dma_lkey;
recv_wr.next = NULL;
recv_wr.sg_list = &sg_list;
@@ -1244,7 +1244,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
list.length = sizeof (struct mlx4_mad_snd_buf);
list.lkey = sqp_ctx->mr->lkey;
list.lkey = sqp_ctx->pd->local_dma_lkey;
wr.wr.ud.ah = ah;
wr.wr.ud.port_num = port;
@@ -1827,19 +1827,12 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
goto err_cq;
}
ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(ctx->mr)) {
ret = PTR_ERR(ctx->mr);
pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
goto err_pd;
}
if (ctx->has_smi) {
ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
if (ret) {
pr_err("Couldn't create %s QP0 (%d)\n",
create_tun ? "tunnel for" : "", ret);
goto err_mr;
goto err_pd;
}
}
@@ -1876,10 +1869,6 @@ err_qp0:
ib_destroy_qp(ctx->qp[0].qp);
ctx->qp[0].qp = NULL;
err_mr:
ib_dereg_mr(ctx->mr);
ctx->mr = NULL;
err_pd:
ib_dealloc_pd(ctx->pd);
ctx->pd = NULL;
@@ -1916,8 +1905,6 @@ static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
ib_destroy_qp(ctx->qp[1].qp);
ctx->qp[1].qp = NULL;
mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
ib_dereg_mr(ctx->mr);
ctx->mr = NULL;
ib_dealloc_pd(ctx->pd);
ctx->pd = NULL;
ib_destroy_cq(ctx->cq);
@@ -2050,8 +2037,6 @@ static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
ib_destroy_qp(sqp_ctx->qp[1].qp);
sqp_ctx->qp[1].qp = NULL;
mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
ib_dereg_mr(sqp_ctx->mr);
sqp_ctx->mr = NULL;
ib_dealloc_pd(sqp_ctx->pd);
sqp_ctx->pd = NULL;
ib_destroy_cq(sqp_ctx->cq);

File diff suppressed because it is too large Load Diff

View File

@@ -51,6 +51,10 @@
pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
(group)->name, group->demux->port, ## arg)
#define mcg_debug_group(group, format, arg...) \
pr_debug("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
(group)->name, (group)->demux->port, ## arg)
#define mcg_error_group(group, format, arg...) \
pr_err(" %16s: " format, (group)->name, ## arg)
@@ -206,15 +210,16 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
{
struct mlx4_ib_dev *dev = ctx->dev;
struct ib_ah_attr ah_attr;
unsigned long flags;
spin_lock(&dev->sm_lock);
spin_lock_irqsave(&dev->sm_lock, flags);
if (!dev->sm_ah[ctx->port - 1]) {
/* port is not yet Active, sm_ah not ready */
spin_unlock(&dev->sm_lock);
spin_unlock_irqrestore(&dev->sm_lock, flags);
return -EAGAIN;
}
mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
spin_unlock(&dev->sm_lock);
spin_unlock_irqrestore(&dev->sm_lock, flags);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
&ah_attr, NULL, mad);
@@ -961,8 +966,8 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
mutex_lock(&group->lock);
if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
mutex_unlock(&group->lock);
mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
port, slave, MAX_PEND_REQS_PER_FUNC);
mcg_debug_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
port, slave, MAX_PEND_REQS_PER_FUNC);
release_group(group, 0);
kfree(req);
return -ENOMEM;

View File

@@ -70,11 +70,24 @@ extern int mlx4_ib_sm_guid_assign;
#define MLX4_IB_UC_STEER_QPN_ALIGN 1
#define MLX4_IB_UC_MAX_NUM_QPS 256
enum hw_bar_type {
HW_BAR_BF,
HW_BAR_DB,
HW_BAR_CLOCK,
HW_BAR_COUNT
};
struct mlx4_ib_vma_private_data {
struct vm_area_struct *vma;
};
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
};
struct mlx4_ib_pd {
@@ -415,7 +428,6 @@ struct mlx4_ib_demux_pv_ctx {
struct ib_device *ib_dev;
struct ib_cq *cq;
struct ib_pd *pd;
struct ib_mr *mr;
struct work_struct work;
struct workqueue_struct *wq;
struct mlx4_ib_demux_pv_qp qp[2];
@@ -457,15 +469,26 @@ struct mlx4_ib_sriov {
struct idr pv_id_table;
};
struct gid_cache_context {
int real_index;
int refcount;
};
struct gid_entry {
union ib_gid gid;
struct gid_cache_context *ctx;
};
struct mlx4_port_gid_table {
struct gid_entry gids[MLX4_MAX_PORT_GIDS];
};
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
struct net_device *masters[MLX4_MAX_PORTS];
atomic64_t mac[MLX4_MAX_PORTS];
struct notifier_block nb;
struct notifier_block nb_inet;
struct notifier_block nb_inet6;
union ib_gid gid_table[MLX4_MAX_PORTS][128];
struct mlx4_port_gid_table gids[MLX4_MAX_PORTS];
};
struct pkey_mgt {
@@ -680,8 +703,9 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
int page_list_len);
void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
@@ -838,5 +862,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
u64 start, u64 length, u64 virt_addr,
int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata);
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
u8 port_num, int index);
#endif /* MLX4_IB_H */

View File

@@ -350,19 +350,24 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
return 0;
}
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len)
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
int err;
if (mr_type != IB_MR_TYPE_MEM_REG ||
max_num_sg > MLX4_MAX_FAST_REG_PAGES)
return ERR_PTR(-EINVAL);
mr = kmalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
max_page_list_len, 0, &mr->mmr);
max_num_sg, 0, &mr->mmr);
if (err)
goto err_free;

View File

@@ -1292,14 +1292,18 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
path->static_rate = 0;
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
int real_sgid_index = mlx4_ib_gid_index_to_real_index(dev,
port,
ah->grh.sgid_index);
if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
pr_err("sgid_index (%u) too large. max is %d\n",
ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
real_sgid_index, dev->dev->caps.gid_table_len[port] - 1);
return -1;
}
path->grh_mylmc |= 1 << 7;
path->mgid_index = ah->grh.sgid_index;
path->mgid_index = real_sgid_index;
path->hop_limit = ah->grh.hop_limit;
path->tclass_flowlabel =
cpu_to_be32((ah->grh.traffic_class << 20) |

View File

@@ -640,6 +640,8 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
struct mlx4_port *p;
int i;
int ret;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) ==
IB_LINK_LAYER_ETHERNET;
p = kzalloc(sizeof *p, GFP_KERNEL);
if (!p)
@@ -657,7 +659,8 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
p->pkey_group.name = "pkey_idx";
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, store_port_pkey,
alloc_group_attrs(show_port_pkey,
is_eth ? NULL : store_port_pkey,
dev->dev->caps.pkey_table_len[port_num]);
if (!p->pkey_group.attrs) {
ret = -ENOMEM;

View File

@@ -33,6 +33,7 @@
#include <linux/kref.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -227,7 +228,14 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->dlid_path_bits = cqe->ml_path;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
wc->wc_flags |= g ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
if (unlikely(is_qp1(qp->ibqp.qp_type))) {
u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
&wc->pkey_index);
} else {
wc->pkey_index = 0;
}
}
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)

View File

@@ -212,6 +212,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
int err = -ENOMEM;
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
@@ -264,7 +265,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->hw_ver = mdev->pdev->revision;
props->max_mr_size = ~0ull;
props->page_size_cap = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
props->page_size_cap = ~(min_page_size - 1);
props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
@@ -273,6 +274,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
@@ -1121,7 +1123,6 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev)
mlx5_ib_destroy_qp(dev->umrc.qp);
ib_destroy_cq(dev->umrc.cq);
ib_dereg_mr(dev->umrc.mr);
ib_dealloc_pd(dev->umrc.pd);
}
@@ -1136,7 +1137,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
struct ib_mr *mr;
struct ib_cq_init_attr cq_attr = {};
int ret;
@@ -1154,13 +1154,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
goto error_0;
}
mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(mr)) {
mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
ret = PTR_ERR(mr);
goto error_1;
}
cq_attr.cqe = 128;
cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL,
&cq_attr);
@@ -1218,7 +1211,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
dev->umrc.qp = qp;
dev->umrc.cq = cq;
dev->umrc.mr = mr;
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
@@ -1240,9 +1232,6 @@ error_3:
ib_destroy_cq(cq);
error_2:
ib_dereg_mr(mr);
error_1:
ib_dealloc_pd(pd);
error_0:
@@ -1256,10 +1245,18 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
u32 rsvd_lkey;
int ret = 0;
dev = container_of(devr, struct mlx5_ib_dev, devr);
ret = mlx5_core_query_special_context(dev->mdev, &rsvd_lkey);
if (ret) {
pr_err("Failed to query special context %d\n", ret);
return ret;
}
dev->ib_dev.local_dma_lkey = rsvd_lkey;
devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->p0)) {
ret = PTR_ERR(devr->p0);
@@ -1421,7 +1418,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
@@ -1490,12 +1486,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
dev->ib_dev.create_mr = mlx5_ib_create_mr;
dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;

View File

@@ -349,7 +349,6 @@ struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
struct ib_mr *mr;
/* control access to UMR QP
*/
struct semaphore sem;
@@ -573,11 +572,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
int npages, int zap);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr);
struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
int page_list_len);
void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
@@ -683,6 +680,11 @@ static inline u8 convert_access(int acc)
MLX5_PERM_LOCAL_READ;
}
static inline int is_qp1(enum ib_qp_type qp_type)
{
return qp_type == IB_QPT_GSI;
}
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)

View File

@@ -441,9 +441,6 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
spin_unlock_irq(&ent->lock);
queue_work(cache->wq, &ent->work);
if (mr)
break;
}
if (!mr)
@@ -690,12 +687,11 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_mr *mr = dev->umrc.mr;
struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
sg->addr = dma;
sg->length = ALIGN(sizeof(u64) * n, 64);
sg->lkey = mr->lkey;
sg->lkey = dev->umrc.pd->local_dma_lkey;
wr->next = NULL;
wr->send_flags = 0;
@@ -926,7 +922,7 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
sg.addr = dma;
sg.length = ALIGN(npages * sizeof(u64),
MLX5_UMR_MTT_ALIGNMENT);
sg.lkey = dev->umrc.mr->lkey;
sg.lkey = dev->umrc.pd->local_dma_lkey;
wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
MLX5_IB_SEND_UMR_UPDATE_MTT;
@@ -1118,19 +1114,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return &mr->ibmr;
error:
/*
* Destroy the umem *before* destroying the MR, to ensure we
* will not have any in-flight notifiers when destroying the
* MR.
*
* As the MR is completely invalid to begin with, and this
* error path is only taken if we can't push the mr entry into
* the pagefault tree, this is safe.
*/
ib_umem_release(umem);
/* Kill the MR, and return an error code. */
clean_mr(mr);
return ERR_PTR(err);
}
@@ -1173,6 +1157,19 @@ static int clean_mr(struct mlx5_ib_mr *mr)
int umred = mr->umred;
int err;
if (mr->sig) {
if (mlx5_core_destroy_psv(dev->mdev,
mr->sig->psv_memory.psv_idx))
mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
mr->sig->psv_memory.psv_idx);
if (mlx5_core_destroy_psv(dev->mdev,
mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
kfree(mr->sig);
mr->sig = NULL;
}
if (!umred) {
err = destroy_mkey(dev, mr);
if (err) {
@@ -1234,14 +1231,15 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr)
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
int access_mode, err;
int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
int ndescs = roundup(max_num_sg, 4);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
@@ -1257,9 +1255,11 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
in->seg.xlt_oct_size = cpu_to_be32(ndescs);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
access_mode = MLX5_ACCESS_MODE_MTT;
if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
if (mr_type == IB_MR_TYPE_MEM_REG) {
access_mode = MLX5_ACCESS_MODE_MTT;
in->seg.log2_page_size = PAGE_SHIFT;
} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
u32 psv_index[2];
in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
@@ -1285,6 +1285,10 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
mr->sig->sig_err_exists = false;
/* Next UMR, Arm SIGERR */
++mr->sig->sigerr_count;
} else {
mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
err = -EINVAL;
goto err_free_in;
}
in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
@@ -1320,80 +1324,6 @@ err_free:
return ERR_PTR(err);
}
int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
{
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int err;
if (mr->sig) {
if (mlx5_core_destroy_psv(dev->mdev,
mr->sig->psv_memory.psv_idx))
mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
mr->sig->psv_memory.psv_idx);
if (mlx5_core_destroy_psv(dev->mdev,
mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
kfree(mr->sig);
}
err = destroy_mkey(dev, mr);
if (err) {
mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
mr->mmr.key, err);
return err;
}
kfree(mr);
return err;
}
struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
int err;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
in->seg.status = MLX5_MKEY_STATUS_FREE;
in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
/*
* TBD not needed - issue 197292 */
in->seg.log2_page_size = PAGE_SHIFT;
err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
NULL, NULL);
kfree(in);
if (err)
goto err_free;
mr->ibmr.lkey = mr->mmr.key;
mr->ibmr.rkey = mr->mmr.key;
mr->umem = NULL;
return &mr->ibmr;
err_free:
kfree(mr);
return ERR_PTR(err);
}
struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
int page_list_len)
{

Some files were not shown because too many files have changed in this diff Show More