main.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
  4. */
  5. #include <linux/device.h>
  6. #include <linux/eventfd.h>
  7. #include <linux/file.h>
  8. #include <linux/interrupt.h>
  9. #include <linux/iommu.h>
  10. #include <linux/module.h>
  11. #include <linux/mutex.h>
  12. #include <linux/notifier.h>
  13. #include <linux/pci.h>
  14. #include <linux/pm_runtime.h>
  15. #include <linux/types.h>
  16. #include <linux/uaccess.h>
  17. #include <linux/vfio.h>
  18. #include <linux/sched/mm.h>
  19. #include <linux/anon_inodes.h>
  20. #include "cmd.h"
  21. /* Arbitrary to prevent userspace from consuming endless memory */
  22. #define MAX_MIGRATION_SIZE (512*1024*1024)
  23. static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
  24. {
  25. struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
  26. return container_of(core_device, struct mlx5vf_pci_core_device,
  27. core_device);
  28. }
  29. static struct page *
  30. mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
  31. unsigned long offset)
  32. {
  33. unsigned long cur_offset = 0;
  34. struct scatterlist *sg;
  35. unsigned int i;
  36. /* All accesses are sequential */
  37. if (offset < migf->last_offset || !migf->last_offset_sg) {
  38. migf->last_offset = 0;
  39. migf->last_offset_sg = migf->table.sgt.sgl;
  40. migf->sg_last_entry = 0;
  41. }
  42. cur_offset = migf->last_offset;
  43. for_each_sg(migf->last_offset_sg, sg,
  44. migf->table.sgt.orig_nents - migf->sg_last_entry, i) {
  45. if (offset < sg->length + cur_offset) {
  46. migf->last_offset_sg = sg;
  47. migf->sg_last_entry += i;
  48. migf->last_offset = cur_offset;
  49. return nth_page(sg_page(sg),
  50. (offset - cur_offset) / PAGE_SIZE);
  51. }
  52. cur_offset += sg->length;
  53. }
  54. return NULL;
  55. }
  56. static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf,
  57. unsigned int npages)
  58. {
  59. unsigned int to_alloc = npages;
  60. struct page **page_list;
  61. unsigned long filled;
  62. unsigned int to_fill;
  63. int ret;
  64. to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
  65. page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL);
  66. if (!page_list)
  67. return -ENOMEM;
  68. do {
  69. filled = alloc_pages_bulk_array(GFP_KERNEL, to_fill, page_list);
  70. if (!filled) {
  71. ret = -ENOMEM;
  72. goto err;
  73. }
  74. to_alloc -= filled;
  75. ret = sg_alloc_append_table_from_pages(
  76. &migf->table, page_list, filled, 0,
  77. filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
  78. GFP_KERNEL);
  79. if (ret)
  80. goto err;
  81. migf->allocated_length += filled * PAGE_SIZE;
  82. /* clean input for another bulk allocation */
  83. memset(page_list, 0, filled * sizeof(*page_list));
  84. to_fill = min_t(unsigned int, to_alloc,
  85. PAGE_SIZE / sizeof(*page_list));
  86. } while (to_alloc > 0);
  87. kvfree(page_list);
  88. return 0;
  89. err:
  90. kvfree(page_list);
  91. return ret;
  92. }
  93. static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
  94. {
  95. struct sg_page_iter sg_iter;
  96. mutex_lock(&migf->lock);
  97. /* Undo alloc_pages_bulk_array() */
  98. for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0)
  99. __free_page(sg_page_iter_page(&sg_iter));
  100. sg_free_append_table(&migf->table);
  101. migf->disabled = true;
  102. migf->total_length = 0;
  103. migf->allocated_length = 0;
  104. migf->filp->f_pos = 0;
  105. mutex_unlock(&migf->lock);
  106. }
  107. static int mlx5vf_release_file(struct inode *inode, struct file *filp)
  108. {
  109. struct mlx5_vf_migration_file *migf = filp->private_data;
  110. mlx5vf_disable_fd(migf);
  111. mutex_destroy(&migf->lock);
  112. kfree(migf);
  113. return 0;
  114. }
  115. static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
  116. loff_t *pos)
  117. {
  118. struct mlx5_vf_migration_file *migf = filp->private_data;
  119. ssize_t done = 0;
  120. if (pos)
  121. return -ESPIPE;
  122. pos = &filp->f_pos;
  123. if (!(filp->f_flags & O_NONBLOCK)) {
  124. if (wait_event_interruptible(migf->poll_wait,
  125. READ_ONCE(migf->total_length) || migf->is_err))
  126. return -ERESTARTSYS;
  127. }
  128. mutex_lock(&migf->lock);
  129. if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) {
  130. done = -EAGAIN;
  131. goto out_unlock;
  132. }
  133. if (*pos > migf->total_length) {
  134. done = -EINVAL;
  135. goto out_unlock;
  136. }
  137. if (migf->disabled || migf->is_err) {
  138. done = -ENODEV;
  139. goto out_unlock;
  140. }
  141. len = min_t(size_t, migf->total_length - *pos, len);
  142. while (len) {
  143. size_t page_offset;
  144. struct page *page;
  145. size_t page_len;
  146. u8 *from_buff;
  147. int ret;
  148. page_offset = (*pos) % PAGE_SIZE;
  149. page = mlx5vf_get_migration_page(migf, *pos - page_offset);
  150. if (!page) {
  151. if (done == 0)
  152. done = -EINVAL;
  153. goto out_unlock;
  154. }
  155. page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
  156. from_buff = kmap_local_page(page);
  157. ret = copy_to_user(buf, from_buff + page_offset, page_len);
  158. kunmap_local(from_buff);
  159. if (ret) {
  160. done = -EFAULT;
  161. goto out_unlock;
  162. }
  163. *pos += page_len;
  164. len -= page_len;
  165. done += page_len;
  166. buf += page_len;
  167. }
  168. out_unlock:
  169. mutex_unlock(&migf->lock);
  170. return done;
  171. }
  172. static __poll_t mlx5vf_save_poll(struct file *filp,
  173. struct poll_table_struct *wait)
  174. {
  175. struct mlx5_vf_migration_file *migf = filp->private_data;
  176. __poll_t pollflags = 0;
  177. poll_wait(filp, &migf->poll_wait, wait);
  178. mutex_lock(&migf->lock);
  179. if (migf->disabled || migf->is_err)
  180. pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
  181. else if (READ_ONCE(migf->total_length))
  182. pollflags = EPOLLIN | EPOLLRDNORM;
  183. mutex_unlock(&migf->lock);
  184. return pollflags;
  185. }
  186. static const struct file_operations mlx5vf_save_fops = {
  187. .owner = THIS_MODULE,
  188. .read = mlx5vf_save_read,
  189. .poll = mlx5vf_save_poll,
  190. .release = mlx5vf_release_file,
  191. .llseek = no_llseek,
  192. };
  193. static struct mlx5_vf_migration_file *
  194. mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
  195. {
  196. struct mlx5_vf_migration_file *migf;
  197. int ret;
  198. migf = kzalloc(sizeof(*migf), GFP_KERNEL);
  199. if (!migf)
  200. return ERR_PTR(-ENOMEM);
  201. migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf,
  202. O_RDONLY);
  203. if (IS_ERR(migf->filp)) {
  204. int err = PTR_ERR(migf->filp);
  205. kfree(migf);
  206. return ERR_PTR(err);
  207. }
  208. stream_open(migf->filp->f_inode, migf->filp);
  209. mutex_init(&migf->lock);
  210. init_waitqueue_head(&migf->poll_wait);
  211. mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
  212. INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
  213. ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
  214. &migf->total_length);
  215. if (ret)
  216. goto out_free;
  217. ret = mlx5vf_add_migration_pages(
  218. migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
  219. if (ret)
  220. goto out_free;
  221. migf->mvdev = mvdev;
  222. ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
  223. if (ret)
  224. goto out_free;
  225. return migf;
  226. out_free:
  227. fput(migf->filp);
  228. return ERR_PTR(ret);
  229. }
  230. static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
  231. size_t len, loff_t *pos)
  232. {
  233. struct mlx5_vf_migration_file *migf = filp->private_data;
  234. loff_t requested_length;
  235. ssize_t done = 0;
  236. if (pos)
  237. return -ESPIPE;
  238. pos = &filp->f_pos;
  239. if (*pos < 0 ||
  240. check_add_overflow((loff_t)len, *pos, &requested_length))
  241. return -EINVAL;
  242. if (requested_length > MAX_MIGRATION_SIZE)
  243. return -ENOMEM;
  244. mutex_lock(&migf->lock);
  245. if (migf->disabled) {
  246. done = -ENODEV;
  247. goto out_unlock;
  248. }
  249. if (migf->allocated_length < requested_length) {
  250. done = mlx5vf_add_migration_pages(
  251. migf,
  252. DIV_ROUND_UP(requested_length - migf->allocated_length,
  253. PAGE_SIZE));
  254. if (done)
  255. goto out_unlock;
  256. }
  257. while (len) {
  258. size_t page_offset;
  259. struct page *page;
  260. size_t page_len;
  261. u8 *to_buff;
  262. int ret;
  263. page_offset = (*pos) % PAGE_SIZE;
  264. page = mlx5vf_get_migration_page(migf, *pos - page_offset);
  265. if (!page) {
  266. if (done == 0)
  267. done = -EINVAL;
  268. goto out_unlock;
  269. }
  270. page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
  271. to_buff = kmap_local_page(page);
  272. ret = copy_from_user(to_buff + page_offset, buf, page_len);
  273. kunmap_local(to_buff);
  274. if (ret) {
  275. done = -EFAULT;
  276. goto out_unlock;
  277. }
  278. *pos += page_len;
  279. len -= page_len;
  280. done += page_len;
  281. buf += page_len;
  282. migf->total_length += page_len;
  283. }
  284. out_unlock:
  285. mutex_unlock(&migf->lock);
  286. return done;
  287. }
  288. static const struct file_operations mlx5vf_resume_fops = {
  289. .owner = THIS_MODULE,
  290. .write = mlx5vf_resume_write,
  291. .release = mlx5vf_release_file,
  292. .llseek = no_llseek,
  293. };
  294. static struct mlx5_vf_migration_file *
  295. mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
  296. {
  297. struct mlx5_vf_migration_file *migf;
  298. migf = kzalloc(sizeof(*migf), GFP_KERNEL);
  299. if (!migf)
  300. return ERR_PTR(-ENOMEM);
  301. migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf,
  302. O_WRONLY);
  303. if (IS_ERR(migf->filp)) {
  304. int err = PTR_ERR(migf->filp);
  305. kfree(migf);
  306. return ERR_PTR(err);
  307. }
  308. stream_open(migf->filp->f_inode, migf->filp);
  309. mutex_init(&migf->lock);
  310. return migf;
  311. }
  312. void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
  313. {
  314. if (mvdev->resuming_migf) {
  315. mlx5vf_disable_fd(mvdev->resuming_migf);
  316. fput(mvdev->resuming_migf->filp);
  317. mvdev->resuming_migf = NULL;
  318. }
  319. if (mvdev->saving_migf) {
  320. mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
  321. cancel_work_sync(&mvdev->saving_migf->async_data.work);
  322. mlx5vf_disable_fd(mvdev->saving_migf);
  323. fput(mvdev->saving_migf->filp);
  324. mvdev->saving_migf = NULL;
  325. }
  326. }
  327. static struct file *
  328. mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
  329. u32 new)
  330. {
  331. u32 cur = mvdev->mig_state;
  332. int ret;
  333. if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) {
  334. ret = mlx5vf_cmd_suspend_vhca(mvdev,
  335. MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
  336. if (ret)
  337. return ERR_PTR(ret);
  338. return NULL;
  339. }
  340. if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
  341. ret = mlx5vf_cmd_resume_vhca(mvdev,
  342. MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER);
  343. if (ret)
  344. return ERR_PTR(ret);
  345. return NULL;
  346. }
  347. if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
  348. ret = mlx5vf_cmd_suspend_vhca(mvdev,
  349. MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
  350. if (ret)
  351. return ERR_PTR(ret);
  352. return NULL;
  353. }
  354. if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
  355. ret = mlx5vf_cmd_resume_vhca(mvdev,
  356. MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
  357. if (ret)
  358. return ERR_PTR(ret);
  359. return NULL;
  360. }
  361. if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
  362. struct mlx5_vf_migration_file *migf;
  363. migf = mlx5vf_pci_save_device_data(mvdev);
  364. if (IS_ERR(migf))
  365. return ERR_CAST(migf);
  366. get_file(migf->filp);
  367. mvdev->saving_migf = migf;
  368. return migf->filp;
  369. }
  370. if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
  371. mlx5vf_disable_fds(mvdev);
  372. return NULL;
  373. }
  374. if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
  375. struct mlx5_vf_migration_file *migf;
  376. migf = mlx5vf_pci_resume_device_data(mvdev);
  377. if (IS_ERR(migf))
  378. return ERR_CAST(migf);
  379. get_file(migf->filp);
  380. mvdev->resuming_migf = migf;
  381. return migf->filp;
  382. }
  383. if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
  384. ret = mlx5vf_cmd_load_vhca_state(mvdev,
  385. mvdev->resuming_migf);
  386. if (ret)
  387. return ERR_PTR(ret);
  388. mlx5vf_disable_fds(mvdev);
  389. return NULL;
  390. }
  391. /*
  392. * vfio_mig_get_next_state() does not use arcs other than the above
  393. */
  394. WARN_ON(true);
  395. return ERR_PTR(-EINVAL);
  396. }
  397. /*
  398. * This function is called in all state_mutex unlock cases to
  399. * handle a 'deferred_reset' if exists.
  400. */
  401. void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
  402. {
  403. again:
  404. spin_lock(&mvdev->reset_lock);
  405. if (mvdev->deferred_reset) {
  406. mvdev->deferred_reset = false;
  407. spin_unlock(&mvdev->reset_lock);
  408. mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
  409. mlx5vf_disable_fds(mvdev);
  410. goto again;
  411. }
  412. mutex_unlock(&mvdev->state_mutex);
  413. spin_unlock(&mvdev->reset_lock);
  414. }
  415. static struct file *
  416. mlx5vf_pci_set_device_state(struct vfio_device *vdev,
  417. enum vfio_device_mig_state new_state)
  418. {
  419. struct mlx5vf_pci_core_device *mvdev = container_of(
  420. vdev, struct mlx5vf_pci_core_device, core_device.vdev);
  421. enum vfio_device_mig_state next_state;
  422. struct file *res = NULL;
  423. int ret;
  424. mutex_lock(&mvdev->state_mutex);
  425. while (new_state != mvdev->mig_state) {
  426. ret = vfio_mig_get_next_state(vdev, mvdev->mig_state,
  427. new_state, &next_state);
  428. if (ret) {
  429. res = ERR_PTR(ret);
  430. break;
  431. }
  432. res = mlx5vf_pci_step_device_state_locked(mvdev, next_state);
  433. if (IS_ERR(res))
  434. break;
  435. mvdev->mig_state = next_state;
  436. if (WARN_ON(res && new_state != mvdev->mig_state)) {
  437. fput(res);
  438. res = ERR_PTR(-EINVAL);
  439. break;
  440. }
  441. }
  442. mlx5vf_state_mutex_unlock(mvdev);
  443. return res;
  444. }
  445. static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
  446. enum vfio_device_mig_state *curr_state)
  447. {
  448. struct mlx5vf_pci_core_device *mvdev = container_of(
  449. vdev, struct mlx5vf_pci_core_device, core_device.vdev);
  450. mutex_lock(&mvdev->state_mutex);
  451. *curr_state = mvdev->mig_state;
  452. mlx5vf_state_mutex_unlock(mvdev);
  453. return 0;
  454. }
  455. static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev)
  456. {
  457. struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev);
  458. if (!mvdev->migrate_cap)
  459. return;
  460. /*
  461. * As the higher VFIO layers are holding locks across reset and using
  462. * those same locks with the mm_lock we need to prevent ABBA deadlock
  463. * with the state_mutex and mm_lock.
  464. * In case the state_mutex was taken already we defer the cleanup work
  465. * to the unlock flow of the other running context.
  466. */
  467. spin_lock(&mvdev->reset_lock);
  468. mvdev->deferred_reset = true;
  469. if (!mutex_trylock(&mvdev->state_mutex)) {
  470. spin_unlock(&mvdev->reset_lock);
  471. return;
  472. }
  473. spin_unlock(&mvdev->reset_lock);
  474. mlx5vf_state_mutex_unlock(mvdev);
  475. }
  476. static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
  477. {
  478. struct mlx5vf_pci_core_device *mvdev = container_of(
  479. core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
  480. struct vfio_pci_core_device *vdev = &mvdev->core_device;
  481. int ret;
  482. ret = vfio_pci_core_enable(vdev);
  483. if (ret)
  484. return ret;
  485. if (mvdev->migrate_cap)
  486. mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
  487. vfio_pci_core_finish_enable(vdev);
  488. return 0;
  489. }
  490. static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
  491. {
  492. struct mlx5vf_pci_core_device *mvdev = container_of(
  493. core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
  494. mlx5vf_cmd_close_migratable(mvdev);
  495. vfio_pci_core_close_device(core_vdev);
  496. }
  497. static const struct vfio_migration_ops mlx5vf_pci_mig_ops = {
  498. .migration_set_state = mlx5vf_pci_set_device_state,
  499. .migration_get_state = mlx5vf_pci_get_device_state,
  500. };
  501. static const struct vfio_log_ops mlx5vf_pci_log_ops = {
  502. .log_start = mlx5vf_start_page_tracker,
  503. .log_stop = mlx5vf_stop_page_tracker,
  504. .log_read_and_clear = mlx5vf_tracker_read_and_clear,
  505. };
  506. static int mlx5vf_pci_init_dev(struct vfio_device *core_vdev)
  507. {
  508. struct mlx5vf_pci_core_device *mvdev = container_of(core_vdev,
  509. struct mlx5vf_pci_core_device, core_device.vdev);
  510. int ret;
  511. ret = vfio_pci_core_init_dev(core_vdev);
  512. if (ret)
  513. return ret;
  514. mlx5vf_cmd_set_migratable(mvdev, &mlx5vf_pci_mig_ops,
  515. &mlx5vf_pci_log_ops);
  516. return 0;
  517. }
  518. static void mlx5vf_pci_release_dev(struct vfio_device *core_vdev)
  519. {
  520. struct mlx5vf_pci_core_device *mvdev = container_of(core_vdev,
  521. struct mlx5vf_pci_core_device, core_device.vdev);
  522. mlx5vf_cmd_remove_migratable(mvdev);
  523. vfio_pci_core_release_dev(core_vdev);
  524. }
  525. static const struct vfio_device_ops mlx5vf_pci_ops = {
  526. .name = "mlx5-vfio-pci",
  527. .init = mlx5vf_pci_init_dev,
  528. .release = mlx5vf_pci_release_dev,
  529. .open_device = mlx5vf_pci_open_device,
  530. .close_device = mlx5vf_pci_close_device,
  531. .ioctl = vfio_pci_core_ioctl,
  532. .device_feature = vfio_pci_core_ioctl_feature,
  533. .read = vfio_pci_core_read,
  534. .write = vfio_pci_core_write,
  535. .mmap = vfio_pci_core_mmap,
  536. .request = vfio_pci_core_request,
  537. .match = vfio_pci_core_match,
  538. };
  539. static int mlx5vf_pci_probe(struct pci_dev *pdev,
  540. const struct pci_device_id *id)
  541. {
  542. struct mlx5vf_pci_core_device *mvdev;
  543. int ret;
  544. mvdev = vfio_alloc_device(mlx5vf_pci_core_device, core_device.vdev,
  545. &pdev->dev, &mlx5vf_pci_ops);
  546. if (IS_ERR(mvdev))
  547. return PTR_ERR(mvdev);
  548. dev_set_drvdata(&pdev->dev, &mvdev->core_device);
  549. ret = vfio_pci_core_register_device(&mvdev->core_device);
  550. if (ret)
  551. goto out_put_vdev;
  552. return 0;
  553. out_put_vdev:
  554. vfio_put_device(&mvdev->core_device.vdev);
  555. return ret;
  556. }
  557. static void mlx5vf_pci_remove(struct pci_dev *pdev)
  558. {
  559. struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev);
  560. vfio_pci_core_unregister_device(&mvdev->core_device);
  561. vfio_put_device(&mvdev->core_device.vdev);
  562. }
  563. static const struct pci_device_id mlx5vf_pci_table[] = {
  564. { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_MELLANOX, 0x101e) }, /* ConnectX Family mlx5Gen Virtual Function */
  565. {}
  566. };
  567. MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table);
  568. static const struct pci_error_handlers mlx5vf_err_handlers = {
  569. .reset_done = mlx5vf_pci_aer_reset_done,
  570. .error_detected = vfio_pci_core_aer_err_detected,
  571. };
  572. static struct pci_driver mlx5vf_pci_driver = {
  573. .name = KBUILD_MODNAME,
  574. .id_table = mlx5vf_pci_table,
  575. .probe = mlx5vf_pci_probe,
  576. .remove = mlx5vf_pci_remove,
  577. .err_handler = &mlx5vf_err_handlers,
  578. .driver_managed_dma = true,
  579. };
  580. static void __exit mlx5vf_pci_cleanup(void)
  581. {
  582. pci_unregister_driver(&mlx5vf_pci_driver);
  583. }
  584. static int __init mlx5vf_pci_init(void)
  585. {
  586. return pci_register_driver(&mlx5vf_pci_driver);
  587. }
  588. module_init(mlx5vf_pci_init);
  589. module_exit(mlx5vf_pci_cleanup);
  590. MODULE_LICENSE("GPL");
  591. MODULE_AUTHOR("Max Gurtovoy <[email protected]>");
  592. MODULE_AUTHOR("Yishai Hadas <[email protected]>");
  593. MODULE_DESCRIPTION(
  594. "MLX5 VFIO PCI - User Level meta-driver for MLX5 device family");