rdma_vt.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
  2. /*
  3. * Copyright(c) 2016 - 2019 Intel Corporation.
  4. */
  5. #ifndef DEF_RDMA_VT_H
  6. #define DEF_RDMA_VT_H
  7. /*
  8. * Structure that low level drivers will populate in order to register with the
  9. * rdmavt layer.
  10. */
  11. #include <linux/spinlock.h>
  12. #include <linux/list.h>
  13. #include <linux/hash.h>
  14. #include <rdma/ib_verbs.h>
  15. #include <rdma/ib_mad.h>
  16. #include <rdma/rdmavt_mr.h>
  17. #define RVT_MAX_PKEY_VALUES 16
  18. #define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */
  19. #define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/
  20. #define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */
  21. struct trap_list {
  22. u32 list_len;
  23. struct list_head list;
  24. };
  25. struct rvt_qp;
  26. struct rvt_qpn_table;
  27. struct rvt_ibport {
  28. struct rvt_qp __rcu *qp[2];
  29. struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
  30. struct rb_root mcast_tree;
  31. spinlock_t lock; /* protect changes in this struct */
  32. /* non-zero when timer is set */
  33. unsigned long mkey_lease_timeout;
  34. unsigned long trap_timeout;
  35. __be64 gid_prefix; /* in network order */
  36. __be64 mkey;
  37. u64 tid;
  38. u32 port_cap_flags;
  39. u16 port_cap3_flags;
  40. u32 pma_sample_start;
  41. u32 pma_sample_interval;
  42. __be16 pma_counter_select[5];
  43. u16 pma_tag;
  44. u16 mkey_lease_period;
  45. u32 sm_lid;
  46. u8 sm_sl;
  47. u8 mkeyprot;
  48. u8 subnet_timeout;
  49. u8 vl_high_limit;
  50. /*
  51. * Driver is expected to keep these up to date. These
  52. * counters are informational only and not required to be
  53. * completely accurate.
  54. */
  55. u64 n_rc_resends;
  56. u64 n_seq_naks;
  57. u64 n_rdma_seq;
  58. u64 n_rnr_naks;
  59. u64 n_other_naks;
  60. u64 n_loop_pkts;
  61. u64 n_pkt_drops;
  62. u64 n_vl15_dropped;
  63. u64 n_rc_timeouts;
  64. u64 n_dmawait;
  65. u64 n_unaligned;
  66. u64 n_rc_dupreq;
  67. u64 n_rc_seqnak;
  68. u64 n_rc_crwaits;
  69. u16 pkey_violations;
  70. u16 qkey_violations;
  71. u16 mkey_violations;
  72. /* Hot-path per CPU counters to avoid cacheline trading to update */
  73. u64 z_rc_acks;
  74. u64 z_rc_qacks;
  75. u64 z_rc_delayed_comp;
  76. u64 __percpu *rc_acks;
  77. u64 __percpu *rc_qacks;
  78. u64 __percpu *rc_delayed_comp;
  79. void *priv; /* driver private data */
  80. /*
  81. * The pkey table is allocated and maintained by the driver. Drivers
  82. * need to have access to this before registering with rdmav. However
  83. * rdmavt will need access to it so drivers need to provide this during
  84. * the attach port API call.
  85. */
  86. u16 *pkey_table;
  87. struct rvt_ah *sm_ah;
  88. /*
  89. * Keep a list of traps that have not been repressed. They will be
  90. * resent based on trap_timer.
  91. */
  92. struct trap_list trap_lists[RVT_MAX_TRAP_LISTS];
  93. struct timer_list trap_timer;
  94. };
  95. #define RVT_CQN_MAX 16 /* maximum length of cq name */
  96. #define RVT_SGE_COPY_MEMCPY 0
  97. #define RVT_SGE_COPY_CACHELESS 1
  98. #define RVT_SGE_COPY_ADAPTIVE 2
  99. /*
  100. * Things that are driver specific, module parameters in hfi1 and qib
  101. */
  102. struct rvt_driver_params {
  103. struct ib_device_attr props;
  104. /*
  105. * Anything driver specific that is not covered by props
  106. * For instance special module parameters. Goes here.
  107. */
  108. unsigned int lkey_table_size;
  109. unsigned int qp_table_size;
  110. unsigned int sge_copy_mode;
  111. unsigned int wss_threshold;
  112. unsigned int wss_clean_period;
  113. int qpn_start;
  114. int qpn_inc;
  115. int qpn_res_start;
  116. int qpn_res_end;
  117. int nports;
  118. int npkeys;
  119. int node;
  120. int psn_mask;
  121. int psn_shift;
  122. int psn_modify_mask;
  123. u32 core_cap_flags;
  124. u32 max_mad_size;
  125. u8 qos_shift;
  126. u8 max_rdma_atomic;
  127. u8 extra_rdma_atomic;
  128. u8 reserved_operations;
  129. };
  130. /* User context */
  131. struct rvt_ucontext {
  132. struct ib_ucontext ibucontext;
  133. };
  134. /* Protection domain */
  135. struct rvt_pd {
  136. struct ib_pd ibpd;
  137. bool user;
  138. };
  139. /* Address handle */
  140. struct rvt_ah {
  141. struct ib_ah ibah;
  142. struct rdma_ah_attr attr;
  143. u8 vl;
  144. u8 log_pmtu;
  145. };
  146. /*
  147. * This structure is used by rvt_mmap() to validate an offset
  148. * when an mmap() request is made. The vm_area_struct then uses
  149. * this as its vm_private_data.
  150. */
  151. struct rvt_mmap_info {
  152. struct list_head pending_mmaps;
  153. struct ib_ucontext *context;
  154. void *obj;
  155. __u64 offset;
  156. struct kref ref;
  157. u32 size;
  158. };
  159. /* memory working set size */
  160. struct rvt_wss {
  161. unsigned long *entries;
  162. atomic_t total_count;
  163. atomic_t clean_counter;
  164. atomic_t clean_entry;
  165. int threshold;
  166. int num_entries;
  167. long pages_mask;
  168. unsigned int clean_period;
  169. };
  170. struct rvt_dev_info;
  171. struct rvt_swqe;
  172. struct rvt_driver_provided {
  173. /*
  174. * Which functions are required depends on which verbs rdmavt is
  175. * providing and which verbs the driver is overriding. See
  176. * check_support() for details.
  177. */
  178. /* hot path calldowns in a single cacheline */
  179. /*
  180. * Give the driver a notice that there is send work to do. It is up to
  181. * the driver to generally push the packets out, this just queues the
  182. * work with the driver. There are two variants here. The no_lock
  183. * version requires the s_lock not to be held. The other assumes the
  184. * s_lock is held.
  185. */
  186. bool (*schedule_send)(struct rvt_qp *qp);
  187. bool (*schedule_send_no_lock)(struct rvt_qp *qp);
  188. /*
  189. * Driver specific work request setup and checking.
  190. * This function is allowed to perform any setup, checks, or
  191. * adjustments required to the SWQE in order to be usable by
  192. * underlying protocols. This includes private data structure
  193. * allocations.
  194. */
  195. int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
  196. bool *call_send);
  197. /*
  198. * Sometimes rdmavt needs to kick the driver's send progress. That is
  199. * done by this call back.
  200. */
  201. void (*do_send)(struct rvt_qp *qp);
  202. /*
  203. * Returns a pointer to the underlying hardware's PCI device. This is
  204. * used to display information as to what hardware is being referenced
  205. * in an output message
  206. */
  207. struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
  208. /*
  209. * Allocate a private queue pair data structure for driver specific
  210. * information which is opaque to rdmavt. Errors are returned via
  211. * ERR_PTR(err). The driver is free to return NULL or a valid
  212. * pointer.
  213. */
  214. void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
  215. /*
  216. * Init a structure allocated with qp_priv_alloc(). This should be
  217. * called after all qp fields have been initialized in rdmavt.
  218. */
  219. int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
  220. struct ib_qp_init_attr *init_attr);
  221. /*
  222. * Free the driver's private qp structure.
  223. */
  224. void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
  225. /*
  226. * Inform the driver the particular qp in question has been reset so
  227. * that it can clean up anything it needs to.
  228. */
  229. void (*notify_qp_reset)(struct rvt_qp *qp);
  230. /*
  231. * Get a path mtu from the driver based on qp attributes.
  232. */
  233. int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
  234. struct ib_qp_attr *attr);
  235. /*
  236. * Notify driver that it needs to flush any outstanding IO requests that
  237. * are waiting on a qp.
  238. */
  239. void (*flush_qp_waiters)(struct rvt_qp *qp);
  240. /*
  241. * Notify driver to stop its queue of sending packets. Nothing else
  242. * should be posted to the queue pair after this has been called.
  243. */
  244. void (*stop_send_queue)(struct rvt_qp *qp);
  245. /*
  246. * Have the driver drain any in progress operations
  247. */
  248. void (*quiesce_qp)(struct rvt_qp *qp);
  249. /*
  250. * Inform the driver a qp has went to error state.
  251. */
  252. void (*notify_error_qp)(struct rvt_qp *qp);
  253. /*
  254. * Get an MTU for a qp.
  255. */
  256. u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
  257. u32 pmtu);
  258. /*
  259. * Convert an mtu to a path mtu
  260. */
  261. int (*mtu_to_path_mtu)(u32 mtu);
  262. /*
  263. * Get the guid of a port in big endian byte order
  264. */
  265. int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
  266. int guid_index, __be64 *guid);
  267. /*
  268. * Query driver for the state of the port.
  269. */
  270. int (*query_port_state)(struct rvt_dev_info *rdi, u32 port_num,
  271. struct ib_port_attr *props);
  272. /*
  273. * Tell driver to shutdown a port
  274. */
  275. int (*shut_down_port)(struct rvt_dev_info *rdi, u32 port_num);
  276. /* Tell driver to send a trap for changed port capabilities */
  277. void (*cap_mask_chg)(struct rvt_dev_info *rdi, u32 port_num);
  278. /*
  279. * The following functions can be safely ignored completely. Any use of
  280. * these is checked for NULL before blindly calling. Rdmavt should also
  281. * be functional if drivers omit these.
  282. */
  283. /* Called to inform the driver that all qps should now be freed. */
  284. unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
  285. /* Driver specific AH validation */
  286. int (*check_ah)(struct ib_device *, struct rdma_ah_attr *);
  287. /* Inform the driver a new AH has been created */
  288. void (*notify_new_ah)(struct ib_device *, struct rdma_ah_attr *,
  289. struct rvt_ah *);
  290. /* Let the driver pick the next queue pair number*/
  291. int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
  292. enum ib_qp_type type, u32 port_num);
  293. /* Determine if its safe or allowed to modify the qp */
  294. int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
  295. int attr_mask, struct ib_udata *udata);
  296. /* Driver specific QP modification/notification-of */
  297. void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
  298. int attr_mask, struct ib_udata *udata);
  299. /* Notify driver a mad agent has been created */
  300. void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
  301. /* Notify driver a mad agent has been removed */
  302. void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
  303. /* Notify driver to restart rc */
  304. void (*notify_restart_rc)(struct rvt_qp *qp, u32 psn, int wait);
  305. /* Get and return CPU to pin CQ processing thread */
  306. int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect);
  307. };
  308. struct rvt_dev_info {
  309. struct ib_device ibdev; /* Keep this first. Nothing above here */
  310. /*
  311. * Prior to calling for registration the driver will be responsible for
  312. * allocating space for this structure.
  313. *
  314. * The driver will also be responsible for filling in certain members of
  315. * dparms.props. The driver needs to fill in dparms exactly as it would
  316. * want values reported to a ULP. This will be returned to the caller
  317. * in rdmavt's device. The driver should also therefore refrain from
  318. * modifying this directly after registration with rdmavt.
  319. */
  320. /* Driver specific properties */
  321. struct rvt_driver_params dparms;
  322. /* post send table */
  323. const struct rvt_operation_params *post_parms;
  324. /* opcode translation table */
  325. const enum ib_wc_opcode *wc_opcode;
  326. /* Driver specific helper functions */
  327. struct rvt_driver_provided driver_f;
  328. struct rvt_mregion __rcu *dma_mr;
  329. struct rvt_lkey_table lkey_table;
  330. /* Internal use */
  331. int n_pds_allocated;
  332. spinlock_t n_pds_lock; /* Protect pd allocated count */
  333. int n_ahs_allocated;
  334. spinlock_t n_ahs_lock; /* Protect ah allocated count */
  335. u32 n_srqs_allocated;
  336. spinlock_t n_srqs_lock; /* Protect srqs allocated count */
  337. int flags;
  338. struct rvt_ibport **ports;
  339. /* QP */
  340. struct rvt_qp_ibdev *qp_dev;
  341. u32 n_qps_allocated; /* number of QPs allocated for device */
  342. u32 n_rc_qps; /* number of RC QPs allocated for device */
  343. u32 busy_jiffies; /* timeout scaling based on RC QP count */
  344. spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */
  345. /* memory maps */
  346. struct list_head pending_mmaps;
  347. spinlock_t mmap_offset_lock; /* protect mmap_offset */
  348. u32 mmap_offset;
  349. spinlock_t pending_lock; /* protect pending mmap list */
  350. /* CQ */
  351. u32 n_cqs_allocated; /* number of CQs allocated for device */
  352. spinlock_t n_cqs_lock; /* protect count of in use cqs */
  353. /* Multicast */
  354. u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
  355. spinlock_t n_mcast_grps_lock;
  356. /* Memory Working Set Size */
  357. struct rvt_wss *wss;
  358. };
  359. /**
  360. * rvt_set_ibdev_name - Craft an IB device name from client info
  361. * @rdi: pointer to the client rvt_dev_info structure
  362. * @name: client specific name
  363. * @unit: client specific unit number.
  364. */
  365. static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
  366. const char *fmt, const char *name,
  367. const int unit)
  368. {
  369. /*
  370. * FIXME: rvt and its users want to touch the ibdev before
  371. * registration and have things like the name work. We don't have the
  372. * infrastructure in the core to support this directly today, hack it
  373. * to work by setting the name manually here.
  374. */
  375. dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
  376. strscpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
  377. }
  378. /**
  379. * rvt_get_ibdev_name - return the IB name
  380. * @rdi: rdmavt device
  381. *
  382. * Return the registered name of the device.
  383. */
  384. static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
  385. {
  386. return dev_name(&rdi->ibdev.dev);
  387. }
  388. static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
  389. {
  390. return container_of(ibpd, struct rvt_pd, ibpd);
  391. }
  392. static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah)
  393. {
  394. return container_of(ibah, struct rvt_ah, ibah);
  395. }
  396. static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev)
  397. {
  398. return container_of(ibdev, struct rvt_dev_info, ibdev);
  399. }
  400. static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
  401. {
  402. /*
  403. * All ports have same number of pkeys.
  404. */
  405. return rdi->dparms.npkeys;
  406. }
  407. /*
  408. * Return the max atomic suitable for determining
  409. * the size of the ack ring buffer in a QP.
  410. */
  411. static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
  412. {
  413. return rdi->dparms.max_rdma_atomic +
  414. rdi->dparms.extra_rdma_atomic + 1;
  415. }
  416. static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
  417. {
  418. return rdi->dparms.max_rdma_atomic +
  419. rdi->dparms.extra_rdma_atomic;
  420. }
  421. /*
  422. * Return the indexed PKEY from the port PKEY table.
  423. */
  424. static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
  425. int port_index,
  426. unsigned index)
  427. {
  428. if (index >= rvt_get_npkeys(rdi))
  429. return 0;
  430. else
  431. return rdi->ports[port_index]->pkey_table[index];
  432. }
  433. struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
  434. void rvt_dealloc_device(struct rvt_dev_info *rdi);
  435. int rvt_register_device(struct rvt_dev_info *rvd);
  436. void rvt_unregister_device(struct rvt_dev_info *rvd);
  437. int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
  438. int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
  439. int port_index, u16 *pkey_table);
  440. int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
  441. int access);
  442. int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
  443. int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
  444. u32 len, u64 vaddr, u32 rkey, int acc);
  445. int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
  446. struct rvt_sge *isge, struct rvt_sge *last_sge,
  447. struct ib_sge *sge, int acc);
  448. struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
  449. u16 lid);
  450. #endif /* DEF_RDMA_VT_H */