vduse_dev.c 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VDUSE: vDPA Device in Userspace
  4. *
  5. * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
  6. *
  7. * Author: Xie Yongji <[email protected]>
  8. *
  9. */
  10. #include <linux/init.h>
  11. #include <linux/module.h>
  12. #include <linux/cdev.h>
  13. #include <linux/device.h>
  14. #include <linux/eventfd.h>
  15. #include <linux/slab.h>
  16. #include <linux/wait.h>
  17. #include <linux/dma-map-ops.h>
  18. #include <linux/poll.h>
  19. #include <linux/file.h>
  20. #include <linux/uio.h>
  21. #include <linux/vdpa.h>
  22. #include <linux/nospec.h>
  23. #include <linux/vmalloc.h>
  24. #include <linux/sched/mm.h>
  25. #include <uapi/linux/vduse.h>
  26. #include <uapi/linux/vdpa.h>
  27. #include <uapi/linux/virtio_config.h>
  28. #include <uapi/linux/virtio_ids.h>
  29. #include <uapi/linux/virtio_blk.h>
  30. #include <linux/mod_devicetable.h>
  31. #include "iova_domain.h"
  32. #define DRV_AUTHOR "Yongji Xie <[email protected]>"
  33. #define DRV_DESC "vDPA Device in Userspace"
  34. #define DRV_LICENSE "GPL v2"
  35. #define VDUSE_DEV_MAX (1U << MINORBITS)
  36. #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
  37. #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
  38. #define VDUSE_MSG_DEFAULT_TIMEOUT 30
  39. struct vduse_virtqueue {
  40. u16 index;
  41. u16 num_max;
  42. u32 num;
  43. u64 desc_addr;
  44. u64 driver_addr;
  45. u64 device_addr;
  46. struct vdpa_vq_state state;
  47. bool ready;
  48. bool kicked;
  49. spinlock_t kick_lock;
  50. spinlock_t irq_lock;
  51. struct eventfd_ctx *kickfd;
  52. struct vdpa_callback cb;
  53. struct work_struct inject;
  54. struct work_struct kick;
  55. };
  56. struct vduse_dev;
  57. struct vduse_vdpa {
  58. struct vdpa_device vdpa;
  59. struct vduse_dev *dev;
  60. };
  61. struct vduse_umem {
  62. unsigned long iova;
  63. unsigned long npages;
  64. struct page **pages;
  65. struct mm_struct *mm;
  66. };
  67. struct vduse_dev {
  68. struct vduse_vdpa *vdev;
  69. struct device *dev;
  70. struct vduse_virtqueue *vqs;
  71. struct vduse_iova_domain *domain;
  72. char *name;
  73. struct mutex lock;
  74. spinlock_t msg_lock;
  75. u64 msg_unique;
  76. u32 msg_timeout;
  77. wait_queue_head_t waitq;
  78. struct list_head send_list;
  79. struct list_head recv_list;
  80. struct vdpa_callback config_cb;
  81. struct work_struct inject;
  82. spinlock_t irq_lock;
  83. struct rw_semaphore rwsem;
  84. int minor;
  85. bool broken;
  86. bool connected;
  87. u64 api_version;
  88. u64 device_features;
  89. u64 driver_features;
  90. u32 device_id;
  91. u32 vendor_id;
  92. u32 generation;
  93. u32 config_size;
  94. void *config;
  95. u8 status;
  96. u32 vq_num;
  97. u32 vq_align;
  98. struct vduse_umem *umem;
  99. struct mutex mem_lock;
  100. };
  101. struct vduse_dev_msg {
  102. struct vduse_dev_request req;
  103. struct vduse_dev_response resp;
  104. struct list_head list;
  105. wait_queue_head_t waitq;
  106. bool completed;
  107. };
  108. struct vduse_control {
  109. u64 api_version;
  110. };
  111. static DEFINE_MUTEX(vduse_lock);
  112. static DEFINE_IDR(vduse_idr);
  113. static dev_t vduse_major;
  114. static struct class *vduse_class;
  115. static struct cdev vduse_ctrl_cdev;
  116. static struct cdev vduse_cdev;
  117. static struct workqueue_struct *vduse_irq_wq;
  118. static u32 allowed_device_id[] = {
  119. VIRTIO_ID_BLOCK,
  120. };
  121. static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
  122. {
  123. struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
  124. return vdev->dev;
  125. }
  126. static inline struct vduse_dev *dev_to_vduse(struct device *dev)
  127. {
  128. struct vdpa_device *vdpa = dev_to_vdpa(dev);
  129. return vdpa_to_vduse(vdpa);
  130. }
  131. static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
  132. uint32_t request_id)
  133. {
  134. struct vduse_dev_msg *msg;
  135. list_for_each_entry(msg, head, list) {
  136. if (msg->req.request_id == request_id) {
  137. list_del(&msg->list);
  138. return msg;
  139. }
  140. }
  141. return NULL;
  142. }
  143. static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
  144. {
  145. struct vduse_dev_msg *msg = NULL;
  146. if (!list_empty(head)) {
  147. msg = list_first_entry(head, struct vduse_dev_msg, list);
  148. list_del(&msg->list);
  149. }
  150. return msg;
  151. }
  152. static void vduse_enqueue_msg(struct list_head *head,
  153. struct vduse_dev_msg *msg)
  154. {
  155. list_add_tail(&msg->list, head);
  156. }
  157. static void vduse_dev_broken(struct vduse_dev *dev)
  158. {
  159. struct vduse_dev_msg *msg, *tmp;
  160. if (unlikely(dev->broken))
  161. return;
  162. list_splice_init(&dev->recv_list, &dev->send_list);
  163. list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
  164. list_del(&msg->list);
  165. msg->completed = 1;
  166. msg->resp.result = VDUSE_REQ_RESULT_FAILED;
  167. wake_up(&msg->waitq);
  168. }
  169. dev->broken = true;
  170. wake_up(&dev->waitq);
  171. }
  172. static int vduse_dev_msg_sync(struct vduse_dev *dev,
  173. struct vduse_dev_msg *msg)
  174. {
  175. int ret;
  176. if (unlikely(dev->broken))
  177. return -EIO;
  178. init_waitqueue_head(&msg->waitq);
  179. spin_lock(&dev->msg_lock);
  180. if (unlikely(dev->broken)) {
  181. spin_unlock(&dev->msg_lock);
  182. return -EIO;
  183. }
  184. msg->req.request_id = dev->msg_unique++;
  185. vduse_enqueue_msg(&dev->send_list, msg);
  186. wake_up(&dev->waitq);
  187. spin_unlock(&dev->msg_lock);
  188. if (dev->msg_timeout)
  189. ret = wait_event_killable_timeout(msg->waitq, msg->completed,
  190. (long)dev->msg_timeout * HZ);
  191. else
  192. ret = wait_event_killable(msg->waitq, msg->completed);
  193. spin_lock(&dev->msg_lock);
  194. if (!msg->completed) {
  195. list_del(&msg->list);
  196. msg->resp.result = VDUSE_REQ_RESULT_FAILED;
  197. /* Mark the device as malfunction when there is a timeout */
  198. if (!ret)
  199. vduse_dev_broken(dev);
  200. }
  201. ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
  202. spin_unlock(&dev->msg_lock);
  203. return ret;
  204. }
  205. static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
  206. struct vduse_virtqueue *vq,
  207. struct vdpa_vq_state_packed *packed)
  208. {
  209. struct vduse_dev_msg msg = { 0 };
  210. int ret;
  211. msg.req.type = VDUSE_GET_VQ_STATE;
  212. msg.req.vq_state.index = vq->index;
  213. ret = vduse_dev_msg_sync(dev, &msg);
  214. if (ret)
  215. return ret;
  216. packed->last_avail_counter =
  217. msg.resp.vq_state.packed.last_avail_counter & 0x0001;
  218. packed->last_avail_idx =
  219. msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
  220. packed->last_used_counter =
  221. msg.resp.vq_state.packed.last_used_counter & 0x0001;
  222. packed->last_used_idx =
  223. msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
  224. return 0;
  225. }
  226. static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
  227. struct vduse_virtqueue *vq,
  228. struct vdpa_vq_state_split *split)
  229. {
  230. struct vduse_dev_msg msg = { 0 };
  231. int ret;
  232. msg.req.type = VDUSE_GET_VQ_STATE;
  233. msg.req.vq_state.index = vq->index;
  234. ret = vduse_dev_msg_sync(dev, &msg);
  235. if (ret)
  236. return ret;
  237. split->avail_index = msg.resp.vq_state.split.avail_index;
  238. return 0;
  239. }
  240. static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
  241. {
  242. struct vduse_dev_msg msg = { 0 };
  243. msg.req.type = VDUSE_SET_STATUS;
  244. msg.req.s.status = status;
  245. return vduse_dev_msg_sync(dev, &msg);
  246. }
  247. static int vduse_dev_update_iotlb(struct vduse_dev *dev,
  248. u64 start, u64 last)
  249. {
  250. struct vduse_dev_msg msg = { 0 };
  251. if (last < start)
  252. return -EINVAL;
  253. msg.req.type = VDUSE_UPDATE_IOTLB;
  254. msg.req.iova.start = start;
  255. msg.req.iova.last = last;
  256. return vduse_dev_msg_sync(dev, &msg);
  257. }
  258. static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
  259. {
  260. struct file *file = iocb->ki_filp;
  261. struct vduse_dev *dev = file->private_data;
  262. struct vduse_dev_msg *msg;
  263. int size = sizeof(struct vduse_dev_request);
  264. ssize_t ret;
  265. if (iov_iter_count(to) < size)
  266. return -EINVAL;
  267. spin_lock(&dev->msg_lock);
  268. while (1) {
  269. msg = vduse_dequeue_msg(&dev->send_list);
  270. if (msg)
  271. break;
  272. ret = -EAGAIN;
  273. if (file->f_flags & O_NONBLOCK)
  274. goto unlock;
  275. spin_unlock(&dev->msg_lock);
  276. ret = wait_event_interruptible_exclusive(dev->waitq,
  277. !list_empty(&dev->send_list));
  278. if (ret)
  279. return ret;
  280. spin_lock(&dev->msg_lock);
  281. }
  282. spin_unlock(&dev->msg_lock);
  283. ret = copy_to_iter(&msg->req, size, to);
  284. spin_lock(&dev->msg_lock);
  285. if (ret != size) {
  286. ret = -EFAULT;
  287. vduse_enqueue_msg(&dev->send_list, msg);
  288. goto unlock;
  289. }
  290. vduse_enqueue_msg(&dev->recv_list, msg);
  291. unlock:
  292. spin_unlock(&dev->msg_lock);
  293. return ret;
  294. }
  295. static bool is_mem_zero(const char *ptr, int size)
  296. {
  297. int i;
  298. for (i = 0; i < size; i++) {
  299. if (ptr[i])
  300. return false;
  301. }
  302. return true;
  303. }
  304. static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
  305. {
  306. struct file *file = iocb->ki_filp;
  307. struct vduse_dev *dev = file->private_data;
  308. struct vduse_dev_response resp;
  309. struct vduse_dev_msg *msg;
  310. size_t ret;
  311. ret = copy_from_iter(&resp, sizeof(resp), from);
  312. if (ret != sizeof(resp))
  313. return -EINVAL;
  314. if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
  315. return -EINVAL;
  316. spin_lock(&dev->msg_lock);
  317. msg = vduse_find_msg(&dev->recv_list, resp.request_id);
  318. if (!msg) {
  319. ret = -ENOENT;
  320. goto unlock;
  321. }
  322. memcpy(&msg->resp, &resp, sizeof(resp));
  323. msg->completed = 1;
  324. wake_up(&msg->waitq);
  325. unlock:
  326. spin_unlock(&dev->msg_lock);
  327. return ret;
  328. }
  329. static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
  330. {
  331. struct vduse_dev *dev = file->private_data;
  332. __poll_t mask = 0;
  333. poll_wait(file, &dev->waitq, wait);
  334. spin_lock(&dev->msg_lock);
  335. if (unlikely(dev->broken))
  336. mask |= EPOLLERR;
  337. if (!list_empty(&dev->send_list))
  338. mask |= EPOLLIN | EPOLLRDNORM;
  339. if (!list_empty(&dev->recv_list))
  340. mask |= EPOLLOUT | EPOLLWRNORM;
  341. spin_unlock(&dev->msg_lock);
  342. return mask;
  343. }
  344. static void vduse_dev_reset(struct vduse_dev *dev)
  345. {
  346. int i;
  347. struct vduse_iova_domain *domain = dev->domain;
  348. /* The coherent mappings are handled in vduse_dev_free_coherent() */
  349. if (domain->bounce_map)
  350. vduse_domain_reset_bounce_map(domain);
  351. down_write(&dev->rwsem);
  352. dev->status = 0;
  353. dev->driver_features = 0;
  354. dev->generation++;
  355. spin_lock(&dev->irq_lock);
  356. dev->config_cb.callback = NULL;
  357. dev->config_cb.private = NULL;
  358. spin_unlock(&dev->irq_lock);
  359. flush_work(&dev->inject);
  360. for (i = 0; i < dev->vq_num; i++) {
  361. struct vduse_virtqueue *vq = &dev->vqs[i];
  362. vq->ready = false;
  363. vq->desc_addr = 0;
  364. vq->driver_addr = 0;
  365. vq->device_addr = 0;
  366. vq->num = 0;
  367. memset(&vq->state, 0, sizeof(vq->state));
  368. spin_lock(&vq->kick_lock);
  369. vq->kicked = false;
  370. if (vq->kickfd)
  371. eventfd_ctx_put(vq->kickfd);
  372. vq->kickfd = NULL;
  373. spin_unlock(&vq->kick_lock);
  374. spin_lock(&vq->irq_lock);
  375. vq->cb.callback = NULL;
  376. vq->cb.private = NULL;
  377. spin_unlock(&vq->irq_lock);
  378. flush_work(&vq->inject);
  379. flush_work(&vq->kick);
  380. }
  381. up_write(&dev->rwsem);
  382. }
  383. static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
  384. u64 desc_area, u64 driver_area,
  385. u64 device_area)
  386. {
  387. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  388. struct vduse_virtqueue *vq = &dev->vqs[idx];
  389. vq->desc_addr = desc_area;
  390. vq->driver_addr = driver_area;
  391. vq->device_addr = device_area;
  392. return 0;
  393. }
  394. static void vduse_vq_kick(struct vduse_virtqueue *vq)
  395. {
  396. spin_lock(&vq->kick_lock);
  397. if (!vq->ready)
  398. goto unlock;
  399. if (vq->kickfd)
  400. eventfd_signal(vq->kickfd, 1);
  401. else
  402. vq->kicked = true;
  403. unlock:
  404. spin_unlock(&vq->kick_lock);
  405. }
  406. static void vduse_vq_kick_work(struct work_struct *work)
  407. {
  408. struct vduse_virtqueue *vq = container_of(work,
  409. struct vduse_virtqueue, kick);
  410. vduse_vq_kick(vq);
  411. }
  412. static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
  413. {
  414. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  415. struct vduse_virtqueue *vq = &dev->vqs[idx];
  416. if (!eventfd_signal_allowed()) {
  417. schedule_work(&vq->kick);
  418. return;
  419. }
  420. vduse_vq_kick(vq);
  421. }
  422. static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
  423. struct vdpa_callback *cb)
  424. {
  425. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  426. struct vduse_virtqueue *vq = &dev->vqs[idx];
  427. spin_lock(&vq->irq_lock);
  428. vq->cb.callback = cb->callback;
  429. vq->cb.private = cb->private;
  430. spin_unlock(&vq->irq_lock);
  431. }
  432. static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
  433. {
  434. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  435. struct vduse_virtqueue *vq = &dev->vqs[idx];
  436. vq->num = num;
  437. }
  438. static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
  439. u16 idx, bool ready)
  440. {
  441. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  442. struct vduse_virtqueue *vq = &dev->vqs[idx];
  443. vq->ready = ready;
  444. }
  445. static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
  446. {
  447. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  448. struct vduse_virtqueue *vq = &dev->vqs[idx];
  449. return vq->ready;
  450. }
  451. static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
  452. const struct vdpa_vq_state *state)
  453. {
  454. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  455. struct vduse_virtqueue *vq = &dev->vqs[idx];
  456. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
  457. vq->state.packed.last_avail_counter =
  458. state->packed.last_avail_counter;
  459. vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
  460. vq->state.packed.last_used_counter =
  461. state->packed.last_used_counter;
  462. vq->state.packed.last_used_idx = state->packed.last_used_idx;
  463. } else
  464. vq->state.split.avail_index = state->split.avail_index;
  465. return 0;
  466. }
  467. static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
  468. struct vdpa_vq_state *state)
  469. {
  470. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  471. struct vduse_virtqueue *vq = &dev->vqs[idx];
  472. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
  473. return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
  474. return vduse_dev_get_vq_state_split(dev, vq, &state->split);
  475. }
  476. static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
  477. {
  478. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  479. return dev->vq_align;
  480. }
  481. static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
  482. {
  483. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  484. return dev->device_features;
  485. }
  486. static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
  487. {
  488. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  489. dev->driver_features = features;
  490. return 0;
  491. }
  492. static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
  493. {
  494. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  495. return dev->driver_features;
  496. }
  497. static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
  498. struct vdpa_callback *cb)
  499. {
  500. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  501. spin_lock(&dev->irq_lock);
  502. dev->config_cb.callback = cb->callback;
  503. dev->config_cb.private = cb->private;
  504. spin_unlock(&dev->irq_lock);
  505. }
  506. static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
  507. {
  508. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  509. u16 num_max = 0;
  510. int i;
  511. for (i = 0; i < dev->vq_num; i++)
  512. if (num_max < dev->vqs[i].num_max)
  513. num_max = dev->vqs[i].num_max;
  514. return num_max;
  515. }
  516. static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
  517. {
  518. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  519. return dev->device_id;
  520. }
  521. static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
  522. {
  523. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  524. return dev->vendor_id;
  525. }
  526. static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
  527. {
  528. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  529. return dev->status;
  530. }
  531. static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
  532. {
  533. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  534. if (vduse_dev_set_status(dev, status))
  535. return;
  536. dev->status = status;
  537. }
  538. static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
  539. {
  540. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  541. return dev->config_size;
  542. }
  543. static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
  544. void *buf, unsigned int len)
  545. {
  546. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  547. /* Initialize the buffer in case of partial copy. */
  548. memset(buf, 0, len);
  549. if (offset > dev->config_size)
  550. return;
  551. if (len > dev->config_size - offset)
  552. len = dev->config_size - offset;
  553. memcpy(buf, dev->config + offset, len);
  554. }
  555. static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
  556. const void *buf, unsigned int len)
  557. {
  558. /* Now we only support read-only configuration space */
  559. }
  560. static int vduse_vdpa_reset(struct vdpa_device *vdpa)
  561. {
  562. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  563. int ret = vduse_dev_set_status(dev, 0);
  564. vduse_dev_reset(dev);
  565. return ret;
  566. }
  567. static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
  568. {
  569. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  570. return dev->generation;
  571. }
  572. static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
  573. unsigned int asid,
  574. struct vhost_iotlb *iotlb)
  575. {
  576. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  577. int ret;
  578. ret = vduse_domain_set_map(dev->domain, iotlb);
  579. if (ret)
  580. return ret;
  581. ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
  582. if (ret) {
  583. vduse_domain_clear_map(dev->domain, iotlb);
  584. return ret;
  585. }
  586. return 0;
  587. }
  588. static void vduse_vdpa_free(struct vdpa_device *vdpa)
  589. {
  590. struct vduse_dev *dev = vdpa_to_vduse(vdpa);
  591. dev->vdev = NULL;
  592. }
  593. static const struct vdpa_config_ops vduse_vdpa_config_ops = {
  594. .set_vq_address = vduse_vdpa_set_vq_address,
  595. .kick_vq = vduse_vdpa_kick_vq,
  596. .set_vq_cb = vduse_vdpa_set_vq_cb,
  597. .set_vq_num = vduse_vdpa_set_vq_num,
  598. .set_vq_ready = vduse_vdpa_set_vq_ready,
  599. .get_vq_ready = vduse_vdpa_get_vq_ready,
  600. .set_vq_state = vduse_vdpa_set_vq_state,
  601. .get_vq_state = vduse_vdpa_get_vq_state,
  602. .get_vq_align = vduse_vdpa_get_vq_align,
  603. .get_device_features = vduse_vdpa_get_device_features,
  604. .set_driver_features = vduse_vdpa_set_driver_features,
  605. .get_driver_features = vduse_vdpa_get_driver_features,
  606. .set_config_cb = vduse_vdpa_set_config_cb,
  607. .get_vq_num_max = vduse_vdpa_get_vq_num_max,
  608. .get_device_id = vduse_vdpa_get_device_id,
  609. .get_vendor_id = vduse_vdpa_get_vendor_id,
  610. .get_status = vduse_vdpa_get_status,
  611. .set_status = vduse_vdpa_set_status,
  612. .get_config_size = vduse_vdpa_get_config_size,
  613. .get_config = vduse_vdpa_get_config,
  614. .set_config = vduse_vdpa_set_config,
  615. .get_generation = vduse_vdpa_get_generation,
  616. .reset = vduse_vdpa_reset,
  617. .set_map = vduse_vdpa_set_map,
  618. .free = vduse_vdpa_free,
  619. };
  620. static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
  621. unsigned long offset, size_t size,
  622. enum dma_data_direction dir,
  623. unsigned long attrs)
  624. {
  625. struct vduse_dev *vdev = dev_to_vduse(dev);
  626. struct vduse_iova_domain *domain = vdev->domain;
  627. return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
  628. }
  629. static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
  630. size_t size, enum dma_data_direction dir,
  631. unsigned long attrs)
  632. {
  633. struct vduse_dev *vdev = dev_to_vduse(dev);
  634. struct vduse_iova_domain *domain = vdev->domain;
  635. return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
  636. }
  637. static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
  638. dma_addr_t *dma_addr, gfp_t flag,
  639. unsigned long attrs)
  640. {
  641. struct vduse_dev *vdev = dev_to_vduse(dev);
  642. struct vduse_iova_domain *domain = vdev->domain;
  643. unsigned long iova;
  644. void *addr;
  645. *dma_addr = DMA_MAPPING_ERROR;
  646. addr = vduse_domain_alloc_coherent(domain, size,
  647. (dma_addr_t *)&iova, flag, attrs);
  648. if (!addr)
  649. return NULL;
  650. *dma_addr = (dma_addr_t)iova;
  651. return addr;
  652. }
  653. static void vduse_dev_free_coherent(struct device *dev, size_t size,
  654. void *vaddr, dma_addr_t dma_addr,
  655. unsigned long attrs)
  656. {
  657. struct vduse_dev *vdev = dev_to_vduse(dev);
  658. struct vduse_iova_domain *domain = vdev->domain;
  659. vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
  660. }
  661. static size_t vduse_dev_max_mapping_size(struct device *dev)
  662. {
  663. struct vduse_dev *vdev = dev_to_vduse(dev);
  664. struct vduse_iova_domain *domain = vdev->domain;
  665. return domain->bounce_size;
  666. }
  667. static const struct dma_map_ops vduse_dev_dma_ops = {
  668. .map_page = vduse_dev_map_page,
  669. .unmap_page = vduse_dev_unmap_page,
  670. .alloc = vduse_dev_alloc_coherent,
  671. .free = vduse_dev_free_coherent,
  672. .max_mapping_size = vduse_dev_max_mapping_size,
  673. };
  674. static unsigned int perm_to_file_flags(u8 perm)
  675. {
  676. unsigned int flags = 0;
  677. switch (perm) {
  678. case VDUSE_ACCESS_WO:
  679. flags |= O_WRONLY;
  680. break;
  681. case VDUSE_ACCESS_RO:
  682. flags |= O_RDONLY;
  683. break;
  684. case VDUSE_ACCESS_RW:
  685. flags |= O_RDWR;
  686. break;
  687. default:
  688. WARN(1, "invalidate vhost IOTLB permission\n");
  689. break;
  690. }
  691. return flags;
  692. }
  693. static int vduse_kickfd_setup(struct vduse_dev *dev,
  694. struct vduse_vq_eventfd *eventfd)
  695. {
  696. struct eventfd_ctx *ctx = NULL;
  697. struct vduse_virtqueue *vq;
  698. u32 index;
  699. if (eventfd->index >= dev->vq_num)
  700. return -EINVAL;
  701. index = array_index_nospec(eventfd->index, dev->vq_num);
  702. vq = &dev->vqs[index];
  703. if (eventfd->fd >= 0) {
  704. ctx = eventfd_ctx_fdget(eventfd->fd);
  705. if (IS_ERR(ctx))
  706. return PTR_ERR(ctx);
  707. } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
  708. return 0;
  709. spin_lock(&vq->kick_lock);
  710. if (vq->kickfd)
  711. eventfd_ctx_put(vq->kickfd);
  712. vq->kickfd = ctx;
  713. if (vq->ready && vq->kicked && vq->kickfd) {
  714. eventfd_signal(vq->kickfd, 1);
  715. vq->kicked = false;
  716. }
  717. spin_unlock(&vq->kick_lock);
  718. return 0;
  719. }
  720. static bool vduse_dev_is_ready(struct vduse_dev *dev)
  721. {
  722. int i;
  723. for (i = 0; i < dev->vq_num; i++)
  724. if (!dev->vqs[i].num_max)
  725. return false;
  726. return true;
  727. }
  728. static void vduse_dev_irq_inject(struct work_struct *work)
  729. {
  730. struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
  731. spin_lock_bh(&dev->irq_lock);
  732. if (dev->config_cb.callback)
  733. dev->config_cb.callback(dev->config_cb.private);
  734. spin_unlock_bh(&dev->irq_lock);
  735. }
  736. static void vduse_vq_irq_inject(struct work_struct *work)
  737. {
  738. struct vduse_virtqueue *vq = container_of(work,
  739. struct vduse_virtqueue, inject);
  740. spin_lock_bh(&vq->irq_lock);
  741. if (vq->ready && vq->cb.callback)
  742. vq->cb.callback(vq->cb.private);
  743. spin_unlock_bh(&vq->irq_lock);
  744. }
  745. static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
  746. struct work_struct *irq_work)
  747. {
  748. int ret = -EINVAL;
  749. down_read(&dev->rwsem);
  750. if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
  751. goto unlock;
  752. ret = 0;
  753. queue_work(vduse_irq_wq, irq_work);
  754. unlock:
  755. up_read(&dev->rwsem);
  756. return ret;
  757. }
  758. static int vduse_dev_dereg_umem(struct vduse_dev *dev,
  759. u64 iova, u64 size)
  760. {
  761. int ret;
  762. mutex_lock(&dev->mem_lock);
  763. ret = -ENOENT;
  764. if (!dev->umem)
  765. goto unlock;
  766. ret = -EINVAL;
  767. if (dev->umem->iova != iova || size != dev->domain->bounce_size)
  768. goto unlock;
  769. vduse_domain_remove_user_bounce_pages(dev->domain);
  770. unpin_user_pages_dirty_lock(dev->umem->pages,
  771. dev->umem->npages, true);
  772. atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
  773. mmdrop(dev->umem->mm);
  774. vfree(dev->umem->pages);
  775. kfree(dev->umem);
  776. dev->umem = NULL;
  777. ret = 0;
  778. unlock:
  779. mutex_unlock(&dev->mem_lock);
  780. return ret;
  781. }
  782. static int vduse_dev_reg_umem(struct vduse_dev *dev,
  783. u64 iova, u64 uaddr, u64 size)
  784. {
  785. struct page **page_list = NULL;
  786. struct vduse_umem *umem = NULL;
  787. long pinned = 0;
  788. unsigned long npages, lock_limit;
  789. int ret;
  790. if (!dev->domain->bounce_map ||
  791. size != dev->domain->bounce_size ||
  792. iova != 0 || uaddr & ~PAGE_MASK)
  793. return -EINVAL;
  794. mutex_lock(&dev->mem_lock);
  795. ret = -EEXIST;
  796. if (dev->umem)
  797. goto unlock;
  798. ret = -ENOMEM;
  799. npages = size >> PAGE_SHIFT;
  800. page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
  801. GFP_KERNEL_ACCOUNT);
  802. umem = kzalloc(sizeof(*umem), GFP_KERNEL);
  803. if (!page_list || !umem)
  804. goto unlock;
  805. mmap_read_lock(current->mm);
  806. lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
  807. if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
  808. goto out;
  809. pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
  810. page_list, NULL);
  811. if (pinned != npages) {
  812. ret = pinned < 0 ? pinned : -ENOMEM;
  813. goto out;
  814. }
  815. ret = vduse_domain_add_user_bounce_pages(dev->domain,
  816. page_list, pinned);
  817. if (ret)
  818. goto out;
  819. atomic64_add(npages, &current->mm->pinned_vm);
  820. umem->pages = page_list;
  821. umem->npages = pinned;
  822. umem->iova = iova;
  823. umem->mm = current->mm;
  824. mmgrab(current->mm);
  825. dev->umem = umem;
  826. out:
  827. if (ret && pinned > 0)
  828. unpin_user_pages(page_list, pinned);
  829. mmap_read_unlock(current->mm);
  830. unlock:
  831. if (ret) {
  832. vfree(page_list);
  833. kfree(umem);
  834. }
  835. mutex_unlock(&dev->mem_lock);
  836. return ret;
  837. }
  838. static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
  839. unsigned long arg)
  840. {
  841. struct vduse_dev *dev = file->private_data;
  842. void __user *argp = (void __user *)arg;
  843. int ret;
  844. if (unlikely(dev->broken))
  845. return -EPERM;
  846. switch (cmd) {
  847. case VDUSE_IOTLB_GET_FD: {
  848. struct vduse_iotlb_entry entry;
  849. struct vhost_iotlb_map *map;
  850. struct vdpa_map_file *map_file;
  851. struct vduse_iova_domain *domain = dev->domain;
  852. struct file *f = NULL;
  853. ret = -EFAULT;
  854. if (copy_from_user(&entry, argp, sizeof(entry)))
  855. break;
  856. ret = -EINVAL;
  857. if (entry.start > entry.last)
  858. break;
  859. spin_lock(&domain->iotlb_lock);
  860. map = vhost_iotlb_itree_first(domain->iotlb,
  861. entry.start, entry.last);
  862. if (map) {
  863. map_file = (struct vdpa_map_file *)map->opaque;
  864. f = get_file(map_file->file);
  865. entry.offset = map_file->offset;
  866. entry.start = map->start;
  867. entry.last = map->last;
  868. entry.perm = map->perm;
  869. }
  870. spin_unlock(&domain->iotlb_lock);
  871. ret = -EINVAL;
  872. if (!f)
  873. break;
  874. ret = -EFAULT;
  875. if (copy_to_user(argp, &entry, sizeof(entry))) {
  876. fput(f);
  877. break;
  878. }
  879. ret = receive_fd(f, perm_to_file_flags(entry.perm));
  880. fput(f);
  881. break;
  882. }
  883. case VDUSE_DEV_GET_FEATURES:
  884. /*
  885. * Just mirror what driver wrote here.
  886. * The driver is expected to check FEATURE_OK later.
  887. */
  888. ret = put_user(dev->driver_features, (u64 __user *)argp);
  889. break;
  890. case VDUSE_DEV_SET_CONFIG: {
  891. struct vduse_config_data config;
  892. unsigned long size = offsetof(struct vduse_config_data,
  893. buffer);
  894. ret = -EFAULT;
  895. if (copy_from_user(&config, argp, size))
  896. break;
  897. ret = -EINVAL;
  898. if (config.offset > dev->config_size ||
  899. config.length == 0 ||
  900. config.length > dev->config_size - config.offset)
  901. break;
  902. ret = -EFAULT;
  903. if (copy_from_user(dev->config + config.offset, argp + size,
  904. config.length))
  905. break;
  906. ret = 0;
  907. break;
  908. }
  909. case VDUSE_DEV_INJECT_CONFIG_IRQ:
  910. ret = vduse_dev_queue_irq_work(dev, &dev->inject);
  911. break;
  912. case VDUSE_VQ_SETUP: {
  913. struct vduse_vq_config config;
  914. u32 index;
  915. ret = -EFAULT;
  916. if (copy_from_user(&config, argp, sizeof(config)))
  917. break;
  918. ret = -EINVAL;
  919. if (config.index >= dev->vq_num)
  920. break;
  921. if (!is_mem_zero((const char *)config.reserved,
  922. sizeof(config.reserved)))
  923. break;
  924. index = array_index_nospec(config.index, dev->vq_num);
  925. dev->vqs[index].num_max = config.max_size;
  926. ret = 0;
  927. break;
  928. }
  929. case VDUSE_VQ_GET_INFO: {
  930. struct vduse_vq_info vq_info;
  931. struct vduse_virtqueue *vq;
  932. u32 index;
  933. ret = -EFAULT;
  934. if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
  935. break;
  936. ret = -EINVAL;
  937. if (vq_info.index >= dev->vq_num)
  938. break;
  939. index = array_index_nospec(vq_info.index, dev->vq_num);
  940. vq = &dev->vqs[index];
  941. vq_info.desc_addr = vq->desc_addr;
  942. vq_info.driver_addr = vq->driver_addr;
  943. vq_info.device_addr = vq->device_addr;
  944. vq_info.num = vq->num;
  945. if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
  946. vq_info.packed.last_avail_counter =
  947. vq->state.packed.last_avail_counter;
  948. vq_info.packed.last_avail_idx =
  949. vq->state.packed.last_avail_idx;
  950. vq_info.packed.last_used_counter =
  951. vq->state.packed.last_used_counter;
  952. vq_info.packed.last_used_idx =
  953. vq->state.packed.last_used_idx;
  954. } else
  955. vq_info.split.avail_index =
  956. vq->state.split.avail_index;
  957. vq_info.ready = vq->ready;
  958. ret = -EFAULT;
  959. if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
  960. break;
  961. ret = 0;
  962. break;
  963. }
  964. case VDUSE_VQ_SETUP_KICKFD: {
  965. struct vduse_vq_eventfd eventfd;
  966. ret = -EFAULT;
  967. if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
  968. break;
  969. ret = vduse_kickfd_setup(dev, &eventfd);
  970. break;
  971. }
  972. case VDUSE_VQ_INJECT_IRQ: {
  973. u32 index;
  974. ret = -EFAULT;
  975. if (get_user(index, (u32 __user *)argp))
  976. break;
  977. ret = -EINVAL;
  978. if (index >= dev->vq_num)
  979. break;
  980. index = array_index_nospec(index, dev->vq_num);
  981. ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
  982. break;
  983. }
  984. case VDUSE_IOTLB_REG_UMEM: {
  985. struct vduse_iova_umem umem;
  986. ret = -EFAULT;
  987. if (copy_from_user(&umem, argp, sizeof(umem)))
  988. break;
  989. ret = -EINVAL;
  990. if (!is_mem_zero((const char *)umem.reserved,
  991. sizeof(umem.reserved)))
  992. break;
  993. ret = vduse_dev_reg_umem(dev, umem.iova,
  994. umem.uaddr, umem.size);
  995. break;
  996. }
  997. case VDUSE_IOTLB_DEREG_UMEM: {
  998. struct vduse_iova_umem umem;
  999. ret = -EFAULT;
  1000. if (copy_from_user(&umem, argp, sizeof(umem)))
  1001. break;
  1002. ret = -EINVAL;
  1003. if (!is_mem_zero((const char *)umem.reserved,
  1004. sizeof(umem.reserved)))
  1005. break;
  1006. ret = vduse_dev_dereg_umem(dev, umem.iova,
  1007. umem.size);
  1008. break;
  1009. }
  1010. case VDUSE_IOTLB_GET_INFO: {
  1011. struct vduse_iova_info info;
  1012. struct vhost_iotlb_map *map;
  1013. struct vduse_iova_domain *domain = dev->domain;
  1014. ret = -EFAULT;
  1015. if (copy_from_user(&info, argp, sizeof(info)))
  1016. break;
  1017. ret = -EINVAL;
  1018. if (info.start > info.last)
  1019. break;
  1020. if (!is_mem_zero((const char *)info.reserved,
  1021. sizeof(info.reserved)))
  1022. break;
  1023. spin_lock(&domain->iotlb_lock);
  1024. map = vhost_iotlb_itree_first(domain->iotlb,
  1025. info.start, info.last);
  1026. if (map) {
  1027. info.start = map->start;
  1028. info.last = map->last;
  1029. info.capability = 0;
  1030. if (domain->bounce_map && map->start == 0 &&
  1031. map->last == domain->bounce_size - 1)
  1032. info.capability |= VDUSE_IOVA_CAP_UMEM;
  1033. }
  1034. spin_unlock(&domain->iotlb_lock);
  1035. if (!map)
  1036. break;
  1037. ret = -EFAULT;
  1038. if (copy_to_user(argp, &info, sizeof(info)))
  1039. break;
  1040. ret = 0;
  1041. break;
  1042. }
  1043. default:
  1044. ret = -ENOIOCTLCMD;
  1045. break;
  1046. }
  1047. return ret;
  1048. }
  1049. static int vduse_dev_release(struct inode *inode, struct file *file)
  1050. {
  1051. struct vduse_dev *dev = file->private_data;
  1052. vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
  1053. spin_lock(&dev->msg_lock);
  1054. /* Make sure the inflight messages can processed after reconncection */
  1055. list_splice_init(&dev->recv_list, &dev->send_list);
  1056. spin_unlock(&dev->msg_lock);
  1057. dev->connected = false;
  1058. return 0;
  1059. }
  1060. static struct vduse_dev *vduse_dev_get_from_minor(int minor)
  1061. {
  1062. struct vduse_dev *dev;
  1063. mutex_lock(&vduse_lock);
  1064. dev = idr_find(&vduse_idr, minor);
  1065. mutex_unlock(&vduse_lock);
  1066. return dev;
  1067. }
  1068. static int vduse_dev_open(struct inode *inode, struct file *file)
  1069. {
  1070. int ret;
  1071. struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
  1072. if (!dev)
  1073. return -ENODEV;
  1074. ret = -EBUSY;
  1075. mutex_lock(&dev->lock);
  1076. if (dev->connected)
  1077. goto unlock;
  1078. ret = 0;
  1079. dev->connected = true;
  1080. file->private_data = dev;
  1081. unlock:
  1082. mutex_unlock(&dev->lock);
  1083. return ret;
  1084. }
  1085. static const struct file_operations vduse_dev_fops = {
  1086. .owner = THIS_MODULE,
  1087. .open = vduse_dev_open,
  1088. .release = vduse_dev_release,
  1089. .read_iter = vduse_dev_read_iter,
  1090. .write_iter = vduse_dev_write_iter,
  1091. .poll = vduse_dev_poll,
  1092. .unlocked_ioctl = vduse_dev_ioctl,
  1093. .compat_ioctl = compat_ptr_ioctl,
  1094. .llseek = noop_llseek,
  1095. };
  1096. static struct vduse_dev *vduse_dev_create(void)
  1097. {
  1098. struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
  1099. if (!dev)
  1100. return NULL;
  1101. mutex_init(&dev->lock);
  1102. mutex_init(&dev->mem_lock);
  1103. spin_lock_init(&dev->msg_lock);
  1104. INIT_LIST_HEAD(&dev->send_list);
  1105. INIT_LIST_HEAD(&dev->recv_list);
  1106. spin_lock_init(&dev->irq_lock);
  1107. init_rwsem(&dev->rwsem);
  1108. INIT_WORK(&dev->inject, vduse_dev_irq_inject);
  1109. init_waitqueue_head(&dev->waitq);
  1110. return dev;
  1111. }
  1112. static void vduse_dev_destroy(struct vduse_dev *dev)
  1113. {
  1114. kfree(dev);
  1115. }
  1116. static struct vduse_dev *vduse_find_dev(const char *name)
  1117. {
  1118. struct vduse_dev *dev;
  1119. int id;
  1120. idr_for_each_entry(&vduse_idr, dev, id)
  1121. if (!strcmp(dev->name, name))
  1122. return dev;
  1123. return NULL;
  1124. }
  1125. static int vduse_destroy_dev(char *name)
  1126. {
  1127. struct vduse_dev *dev = vduse_find_dev(name);
  1128. if (!dev)
  1129. return -EINVAL;
  1130. mutex_lock(&dev->lock);
  1131. if (dev->vdev || dev->connected) {
  1132. mutex_unlock(&dev->lock);
  1133. return -EBUSY;
  1134. }
  1135. dev->connected = true;
  1136. mutex_unlock(&dev->lock);
  1137. vduse_dev_reset(dev);
  1138. device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
  1139. idr_remove(&vduse_idr, dev->minor);
  1140. kvfree(dev->config);
  1141. kfree(dev->vqs);
  1142. vduse_domain_destroy(dev->domain);
  1143. kfree(dev->name);
  1144. vduse_dev_destroy(dev);
  1145. module_put(THIS_MODULE);
  1146. return 0;
  1147. }
  1148. static bool device_is_allowed(u32 device_id)
  1149. {
  1150. int i;
  1151. for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
  1152. if (allowed_device_id[i] == device_id)
  1153. return true;
  1154. return false;
  1155. }
  1156. static bool features_is_valid(u64 features)
  1157. {
  1158. if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
  1159. return false;
  1160. /* Now we only support read-only configuration space */
  1161. if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
  1162. return false;
  1163. return true;
  1164. }
  1165. static bool vduse_validate_config(struct vduse_dev_config *config)
  1166. {
  1167. if (!is_mem_zero((const char *)config->reserved,
  1168. sizeof(config->reserved)))
  1169. return false;
  1170. if (config->vq_align > PAGE_SIZE)
  1171. return false;
  1172. if (config->config_size > PAGE_SIZE)
  1173. return false;
  1174. if (config->vq_num > 0xffff)
  1175. return false;
  1176. if (!config->name[0])
  1177. return false;
  1178. if (!device_is_allowed(config->device_id))
  1179. return false;
  1180. if (!features_is_valid(config->features))
  1181. return false;
  1182. return true;
  1183. }
  1184. static ssize_t msg_timeout_show(struct device *device,
  1185. struct device_attribute *attr, char *buf)
  1186. {
  1187. struct vduse_dev *dev = dev_get_drvdata(device);
  1188. return sysfs_emit(buf, "%u\n", dev->msg_timeout);
  1189. }
  1190. static ssize_t msg_timeout_store(struct device *device,
  1191. struct device_attribute *attr,
  1192. const char *buf, size_t count)
  1193. {
  1194. struct vduse_dev *dev = dev_get_drvdata(device);
  1195. int ret;
  1196. ret = kstrtouint(buf, 10, &dev->msg_timeout);
  1197. if (ret < 0)
  1198. return ret;
  1199. return count;
  1200. }
  1201. static DEVICE_ATTR_RW(msg_timeout);
  1202. static struct attribute *vduse_dev_attrs[] = {
  1203. &dev_attr_msg_timeout.attr,
  1204. NULL
  1205. };
  1206. ATTRIBUTE_GROUPS(vduse_dev);
  1207. static int vduse_create_dev(struct vduse_dev_config *config,
  1208. void *config_buf, u64 api_version)
  1209. {
  1210. int i, ret;
  1211. struct vduse_dev *dev;
  1212. ret = -EEXIST;
  1213. if (vduse_find_dev(config->name))
  1214. goto err;
  1215. ret = -ENOMEM;
  1216. dev = vduse_dev_create();
  1217. if (!dev)
  1218. goto err;
  1219. dev->api_version = api_version;
  1220. dev->device_features = config->features;
  1221. dev->device_id = config->device_id;
  1222. dev->vendor_id = config->vendor_id;
  1223. dev->name = kstrdup(config->name, GFP_KERNEL);
  1224. if (!dev->name)
  1225. goto err_str;
  1226. dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
  1227. VDUSE_BOUNCE_SIZE);
  1228. if (!dev->domain)
  1229. goto err_domain;
  1230. dev->config = config_buf;
  1231. dev->config_size = config->config_size;
  1232. dev->vq_align = config->vq_align;
  1233. dev->vq_num = config->vq_num;
  1234. dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
  1235. if (!dev->vqs)
  1236. goto err_vqs;
  1237. for (i = 0; i < dev->vq_num; i++) {
  1238. dev->vqs[i].index = i;
  1239. INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
  1240. INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
  1241. spin_lock_init(&dev->vqs[i].kick_lock);
  1242. spin_lock_init(&dev->vqs[i].irq_lock);
  1243. }
  1244. ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
  1245. if (ret < 0)
  1246. goto err_idr;
  1247. dev->minor = ret;
  1248. dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
  1249. dev->dev = device_create_with_groups(vduse_class, NULL,
  1250. MKDEV(MAJOR(vduse_major), dev->minor),
  1251. dev, vduse_dev_groups, "%s", config->name);
  1252. if (IS_ERR(dev->dev)) {
  1253. ret = PTR_ERR(dev->dev);
  1254. goto err_dev;
  1255. }
  1256. __module_get(THIS_MODULE);
  1257. return 0;
  1258. err_dev:
  1259. idr_remove(&vduse_idr, dev->minor);
  1260. err_idr:
  1261. kfree(dev->vqs);
  1262. err_vqs:
  1263. vduse_domain_destroy(dev->domain);
  1264. err_domain:
  1265. kfree(dev->name);
  1266. err_str:
  1267. vduse_dev_destroy(dev);
  1268. err:
  1269. return ret;
  1270. }
  1271. static long vduse_ioctl(struct file *file, unsigned int cmd,
  1272. unsigned long arg)
  1273. {
  1274. int ret;
  1275. void __user *argp = (void __user *)arg;
  1276. struct vduse_control *control = file->private_data;
  1277. mutex_lock(&vduse_lock);
  1278. switch (cmd) {
  1279. case VDUSE_GET_API_VERSION:
  1280. ret = put_user(control->api_version, (u64 __user *)argp);
  1281. break;
  1282. case VDUSE_SET_API_VERSION: {
  1283. u64 api_version;
  1284. ret = -EFAULT;
  1285. if (get_user(api_version, (u64 __user *)argp))
  1286. break;
  1287. ret = -EINVAL;
  1288. if (api_version > VDUSE_API_VERSION)
  1289. break;
  1290. ret = 0;
  1291. control->api_version = api_version;
  1292. break;
  1293. }
  1294. case VDUSE_CREATE_DEV: {
  1295. struct vduse_dev_config config;
  1296. unsigned long size = offsetof(struct vduse_dev_config, config);
  1297. void *buf;
  1298. ret = -EFAULT;
  1299. if (copy_from_user(&config, argp, size))
  1300. break;
  1301. ret = -EINVAL;
  1302. if (vduse_validate_config(&config) == false)
  1303. break;
  1304. buf = vmemdup_user(argp + size, config.config_size);
  1305. if (IS_ERR(buf)) {
  1306. ret = PTR_ERR(buf);
  1307. break;
  1308. }
  1309. config.name[VDUSE_NAME_MAX - 1] = '\0';
  1310. ret = vduse_create_dev(&config, buf, control->api_version);
  1311. if (ret)
  1312. kvfree(buf);
  1313. break;
  1314. }
  1315. case VDUSE_DESTROY_DEV: {
  1316. char name[VDUSE_NAME_MAX];
  1317. ret = -EFAULT;
  1318. if (copy_from_user(name, argp, VDUSE_NAME_MAX))
  1319. break;
  1320. name[VDUSE_NAME_MAX - 1] = '\0';
  1321. ret = vduse_destroy_dev(name);
  1322. break;
  1323. }
  1324. default:
  1325. ret = -EINVAL;
  1326. break;
  1327. }
  1328. mutex_unlock(&vduse_lock);
  1329. return ret;
  1330. }
  1331. static int vduse_release(struct inode *inode, struct file *file)
  1332. {
  1333. struct vduse_control *control = file->private_data;
  1334. kfree(control);
  1335. return 0;
  1336. }
  1337. static int vduse_open(struct inode *inode, struct file *file)
  1338. {
  1339. struct vduse_control *control;
  1340. control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
  1341. if (!control)
  1342. return -ENOMEM;
  1343. control->api_version = VDUSE_API_VERSION;
  1344. file->private_data = control;
  1345. return 0;
  1346. }
  1347. static const struct file_operations vduse_ctrl_fops = {
  1348. .owner = THIS_MODULE,
  1349. .open = vduse_open,
  1350. .release = vduse_release,
  1351. .unlocked_ioctl = vduse_ioctl,
  1352. .compat_ioctl = compat_ptr_ioctl,
  1353. .llseek = noop_llseek,
  1354. };
  1355. static char *vduse_devnode(struct device *dev, umode_t *mode)
  1356. {
  1357. return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
  1358. }
  1359. struct vduse_mgmt_dev {
  1360. struct vdpa_mgmt_dev mgmt_dev;
  1361. struct device dev;
  1362. };
  1363. static struct vduse_mgmt_dev *vduse_mgmt;
  1364. static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
  1365. {
  1366. struct vduse_vdpa *vdev;
  1367. int ret;
  1368. if (dev->vdev)
  1369. return -EEXIST;
  1370. vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
  1371. &vduse_vdpa_config_ops, 1, 1, name, true);
  1372. if (IS_ERR(vdev))
  1373. return PTR_ERR(vdev);
  1374. dev->vdev = vdev;
  1375. vdev->dev = dev;
  1376. vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
  1377. ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
  1378. if (ret) {
  1379. put_device(&vdev->vdpa.dev);
  1380. return ret;
  1381. }
  1382. set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
  1383. vdev->vdpa.dma_dev = &vdev->vdpa.dev;
  1384. vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
  1385. return 0;
  1386. }
  1387. static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
  1388. const struct vdpa_dev_set_config *config)
  1389. {
  1390. struct vduse_dev *dev;
  1391. int ret;
  1392. mutex_lock(&vduse_lock);
  1393. dev = vduse_find_dev(name);
  1394. if (!dev || !vduse_dev_is_ready(dev)) {
  1395. mutex_unlock(&vduse_lock);
  1396. return -EINVAL;
  1397. }
  1398. ret = vduse_dev_init_vdpa(dev, name);
  1399. mutex_unlock(&vduse_lock);
  1400. if (ret)
  1401. return ret;
  1402. ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
  1403. if (ret) {
  1404. put_device(&dev->vdev->vdpa.dev);
  1405. return ret;
  1406. }
  1407. return 0;
  1408. }
  1409. static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
  1410. {
  1411. _vdpa_unregister_device(dev);
  1412. }
  1413. static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
  1414. .dev_add = vdpa_dev_add,
  1415. .dev_del = vdpa_dev_del,
  1416. };
  1417. static struct virtio_device_id id_table[] = {
  1418. { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
  1419. { 0 },
  1420. };
  1421. static void vduse_mgmtdev_release(struct device *dev)
  1422. {
  1423. struct vduse_mgmt_dev *mgmt_dev;
  1424. mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
  1425. kfree(mgmt_dev);
  1426. }
  1427. static int vduse_mgmtdev_init(void)
  1428. {
  1429. int ret;
  1430. vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
  1431. if (!vduse_mgmt)
  1432. return -ENOMEM;
  1433. ret = dev_set_name(&vduse_mgmt->dev, "vduse");
  1434. if (ret) {
  1435. kfree(vduse_mgmt);
  1436. return ret;
  1437. }
  1438. vduse_mgmt->dev.release = vduse_mgmtdev_release;
  1439. ret = device_register(&vduse_mgmt->dev);
  1440. if (ret)
  1441. goto dev_reg_err;
  1442. vduse_mgmt->mgmt_dev.id_table = id_table;
  1443. vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
  1444. vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
  1445. ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
  1446. if (ret)
  1447. device_unregister(&vduse_mgmt->dev);
  1448. return ret;
  1449. dev_reg_err:
  1450. put_device(&vduse_mgmt->dev);
  1451. return ret;
  1452. }
  1453. static void vduse_mgmtdev_exit(void)
  1454. {
  1455. vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
  1456. device_unregister(&vduse_mgmt->dev);
  1457. }
  1458. static int vduse_init(void)
  1459. {
  1460. int ret;
  1461. struct device *dev;
  1462. vduse_class = class_create(THIS_MODULE, "vduse");
  1463. if (IS_ERR(vduse_class))
  1464. return PTR_ERR(vduse_class);
  1465. vduse_class->devnode = vduse_devnode;
  1466. ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
  1467. if (ret)
  1468. goto err_chardev_region;
  1469. /* /dev/vduse/control */
  1470. cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
  1471. vduse_ctrl_cdev.owner = THIS_MODULE;
  1472. ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
  1473. if (ret)
  1474. goto err_ctrl_cdev;
  1475. dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
  1476. if (IS_ERR(dev)) {
  1477. ret = PTR_ERR(dev);
  1478. goto err_device;
  1479. }
  1480. /* /dev/vduse/$DEVICE */
  1481. cdev_init(&vduse_cdev, &vduse_dev_fops);
  1482. vduse_cdev.owner = THIS_MODULE;
  1483. ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
  1484. VDUSE_DEV_MAX - 1);
  1485. if (ret)
  1486. goto err_cdev;
  1487. vduse_irq_wq = alloc_workqueue("vduse-irq",
  1488. WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
  1489. if (!vduse_irq_wq) {
  1490. ret = -ENOMEM;
  1491. goto err_wq;
  1492. }
  1493. ret = vduse_domain_init();
  1494. if (ret)
  1495. goto err_domain;
  1496. ret = vduse_mgmtdev_init();
  1497. if (ret)
  1498. goto err_mgmtdev;
  1499. return 0;
  1500. err_mgmtdev:
  1501. vduse_domain_exit();
  1502. err_domain:
  1503. destroy_workqueue(vduse_irq_wq);
  1504. err_wq:
  1505. cdev_del(&vduse_cdev);
  1506. err_cdev:
  1507. device_destroy(vduse_class, vduse_major);
  1508. err_device:
  1509. cdev_del(&vduse_ctrl_cdev);
  1510. err_ctrl_cdev:
  1511. unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  1512. err_chardev_region:
  1513. class_destroy(vduse_class);
  1514. return ret;
  1515. }
  1516. module_init(vduse_init);
  1517. static void vduse_exit(void)
  1518. {
  1519. vduse_mgmtdev_exit();
  1520. vduse_domain_exit();
  1521. destroy_workqueue(vduse_irq_wq);
  1522. cdev_del(&vduse_cdev);
  1523. device_destroy(vduse_class, vduse_major);
  1524. cdev_del(&vduse_ctrl_cdev);
  1525. unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
  1526. class_destroy(vduse_class);
  1527. }
  1528. module_exit(vduse_exit);
  1529. MODULE_LICENSE(DRV_LICENSE);
  1530. MODULE_AUTHOR(DRV_AUTHOR);
  1531. MODULE_DESCRIPTION(DRV_DESC);