Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "A smaller cycle this time. Notably we see another new driver, 'Soft
  iWarp', and the deletion of an ancient unused driver for nes.

   - Revise and simplify the signature offload RDMA MR APIs

   - More progress on hoisting object allocation boiler plate code out
     of the drivers

   - Driver bug fixes and revisions for hns, hfi1, efa, cxgb4, qib,
     i40iw

   - Tree wide cleanups: struct_size, put_user_page, xarray, rst doc
     conversion

   - Removal of obsolete ib_ucm chardev and nes driver

   - netlink based discovery of chardevs and autoloading of the modules
     providing them

   - Move more of the rdamvt/hfi1 uapi to include/uapi/rdma

   - New driver 'siw' for software based iWarp running on top of netdev,
     much like rxe's software RoCE.

   - mlx5 feature to report events in their raw devx format to userspace

   - Expose per-object counters through rdma tool

   - Adaptive interrupt moderation for RDMA (DIM), sharing the DIM core
     from netdev"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (194 commits)
  RMDA/siw: Require a 64 bit arch
  RDMA/siw: Mark expected switch fall-throughs
  RDMA/core: Fix -Wunused-const-variable warnings
  rdma/siw: Remove set but not used variable 's'
  rdma/siw: Add missing dependencies on LIBCRC32C and DMA_VIRT_OPS
  RDMA/siw: Add missing rtnl_lock around access to ifa
  rdma/siw: Use proper enumerated type in map_cqe_status
  RDMA/siw: Remove unnecessary kthread create/destroy printouts
  IB/rdmavt: Fix variable shadowing issue in rvt_create_cq
  RDMA/core: Fix race when resolving IP address
  RDMA/core: Make rdma_counter.h compile stand alone
  IB/core: Work on the caller socket net namespace in nldev_newlink()
  RDMA/rxe: Fill in wc byte_len with IB_WC_RECV_RDMA_WITH_IMM
  RDMA/mlx5: Set RDMA DIM to be enabled by default
  RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation to netlink
  RDMA/core: Provide RDMA DIM support for ULPs
  linux/dim: Implement RDMA adaptive moderation (DIM)
  IB/mlx5: Report correctly tag matching rendezvous capability
  docs: infiniband: add it to the driver-api bookset
  IB/mlx5: Implement VHCA tunnel mechanism in DEVX
  ...
This commit is contained in:
Linus Torvalds
2019-07-15 20:38:15 -07:00
221 changed files with 18860 additions and 24846 deletions

View File

@@ -2,8 +2,6 @@
# DIM Dynamic Interrupt Moderation library
#
obj-$(CONFIG_DIMLIB) = net_dim.o
obj-$(CONFIG_DIMLIB) += dim.o
net_dim-y = \
dim.o \
net_dim.o
dim-y := dim.o net_dim.o rdma_dim.o

108
lib/dim/rdma_dim.c Normal file
View File

@@ -0,0 +1,108 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/dim.h>
static int rdma_dim_step(struct dim *dim)
{
if (dim->tune_state == DIM_GOING_RIGHT) {
if (dim->profile_ix == (RDMA_DIM_PARAMS_NUM_PROFILES - 1))
return DIM_ON_EDGE;
dim->profile_ix++;
dim->steps_right++;
}
if (dim->tune_state == DIM_GOING_LEFT) {
if (dim->profile_ix == 0)
return DIM_ON_EDGE;
dim->profile_ix--;
dim->steps_left++;
}
return DIM_STEPPED;
}
static int rdma_dim_stats_compare(struct dim_stats *curr,
struct dim_stats *prev)
{
/* first stat */
if (!prev->cpms)
return DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->cpms, prev->cpms))
return (curr->cpms > prev->cpms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (IS_SIGNIFICANT_DIFF(curr->cpe_ratio, prev->cpe_ratio))
return (curr->cpe_ratio > prev->cpe_ratio) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
return DIM_STATS_SAME;
}
static bool rdma_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
{
int prev_ix = dim->profile_ix;
u8 state = dim->tune_state;
int stats_res;
int step_res;
if (state != DIM_PARKING_ON_TOP && state != DIM_PARKING_TIRED) {
stats_res = rdma_dim_stats_compare(curr_stats,
&dim->prev_stats);
switch (stats_res) {
case DIM_STATS_SAME:
if (curr_stats->cpe_ratio <= 50 * prev_ix)
dim->profile_ix = 0;
break;
case DIM_STATS_WORSE:
dim_turn(dim);
/* fall through */
case DIM_STATS_BETTER:
step_res = rdma_dim_step(dim);
if (step_res == DIM_ON_EDGE)
dim_turn(dim);
break;
}
}
dim->prev_stats = *curr_stats;
return dim->profile_ix != prev_ix;
}
void rdma_dim(struct dim *dim, u64 completions)
{
struct dim_sample *curr_sample = &dim->measuring_sample;
struct dim_stats curr_stats;
u32 nevents;
dim_update_sample_with_comps(curr_sample->event_ctr + 1, 0, 0,
curr_sample->comp_ctr + completions,
&dim->measuring_sample);
switch (dim->state) {
case DIM_MEASURE_IN_PROGRESS:
nevents = curr_sample->event_ctr - dim->start_sample.event_ctr;
if (nevents < DIM_NEVENTS)
break;
dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats);
if (rdma_dim_decision(&curr_stats, dim)) {
dim->state = DIM_APPLY_NEW_PROFILE;
schedule_work(&dim->work);
break;
}
/* fall through */
case DIM_START_MEASURE:
dim->state = DIM_MEASURE_IN_PROGRESS;
dim_update_sample_with_comps(curr_sample->event_ctr, 0, 0,
curr_sample->comp_ctr,
&dim->start_sample);
break;
case DIM_APPLY_NEW_PROFILE:
break;
}
}
EXPORT_SYMBOL(rdma_dim);