vdpa.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2018-2020 Intel Corporation.
  4. * Copyright (C) 2020 Red Hat, Inc.
  5. *
  6. * Author: Tiwei Bie <[email protected]>
  7. * Jason Wang <[email protected]>
  8. *
  9. * Thanks Michael S. Tsirkin for the valuable comments and
  10. * suggestions. And thanks to Cunming Liang and Zhihong Wang for all
  11. * their supports.
  12. */
  13. #include <linux/kernel.h>
  14. #include <linux/module.h>
  15. #include <linux/cdev.h>
  16. #include <linux/device.h>
  17. #include <linux/mm.h>
  18. #include <linux/slab.h>
  19. #include <linux/iommu.h>
  20. #include <linux/uuid.h>
  21. #include <linux/vdpa.h>
  22. #include <linux/nospec.h>
  23. #include <linux/vhost.h>
  24. #include "vhost.h"
  25. enum {
  26. VHOST_VDPA_BACKEND_FEATURES =
  27. (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  28. (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
  29. (1ULL << VHOST_BACKEND_F_IOTLB_ASID),
  30. };
  31. #define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  32. #define VHOST_VDPA_IOTLB_BUCKETS 16
  33. struct vhost_vdpa_as {
  34. struct hlist_node hash_link;
  35. struct vhost_iotlb iotlb;
  36. u32 id;
  37. };
  38. struct vhost_vdpa {
  39. struct vhost_dev vdev;
  40. struct iommu_domain *domain;
  41. struct vhost_virtqueue *vqs;
  42. struct completion completion;
  43. struct vdpa_device *vdpa;
  44. struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
  45. struct device dev;
  46. struct cdev cdev;
  47. atomic_t opened;
  48. u32 nvqs;
  49. int virtio_id;
  50. int minor;
  51. struct eventfd_ctx *config_ctx;
  52. int in_batch;
  53. struct vdpa_iova_range range;
  54. u32 batch_asid;
  55. };
  56. static DEFINE_IDA(vhost_vdpa_ida);
  57. static dev_t vhost_vdpa_major;
  58. static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
  59. struct vhost_iotlb *iotlb, u64 start,
  60. u64 last, u32 asid);
  61. static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
  62. {
  63. struct vhost_vdpa_as *as = container_of(iotlb, struct
  64. vhost_vdpa_as, iotlb);
  65. return as->id;
  66. }
  67. static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
  68. {
  69. struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
  70. struct vhost_vdpa_as *as;
  71. hlist_for_each_entry(as, head, hash_link)
  72. if (as->id == asid)
  73. return as;
  74. return NULL;
  75. }
  76. static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
  77. {
  78. struct vhost_vdpa_as *as = asid_to_as(v, asid);
  79. if (!as)
  80. return NULL;
  81. return &as->iotlb;
  82. }
  83. static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
  84. {
  85. struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
  86. struct vhost_vdpa_as *as;
  87. if (asid_to_as(v, asid))
  88. return NULL;
  89. if (asid >= v->vdpa->nas)
  90. return NULL;
  91. as = kmalloc(sizeof(*as), GFP_KERNEL);
  92. if (!as)
  93. return NULL;
  94. vhost_iotlb_init(&as->iotlb, 0, 0);
  95. as->id = asid;
  96. hlist_add_head(&as->hash_link, head);
  97. return as;
  98. }
  99. static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
  100. u32 asid)
  101. {
  102. struct vhost_vdpa_as *as = asid_to_as(v, asid);
  103. if (as)
  104. return as;
  105. return vhost_vdpa_alloc_as(v, asid);
  106. }
  107. static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
  108. {
  109. struct vhost_vdpa_as *as = asid_to_as(v, asid);
  110. if (!as)
  111. return -EINVAL;
  112. hlist_del(&as->hash_link);
  113. vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
  114. kfree(as);
  115. return 0;
  116. }
  117. static void handle_vq_kick(struct vhost_work *work)
  118. {
  119. struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
  120. poll.work);
  121. struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
  122. const struct vdpa_config_ops *ops = v->vdpa->config;
  123. ops->kick_vq(v->vdpa, vq - v->vqs);
  124. }
  125. static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
  126. {
  127. struct vhost_virtqueue *vq = private;
  128. struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
  129. if (call_ctx)
  130. eventfd_signal(call_ctx, 1);
  131. return IRQ_HANDLED;
  132. }
  133. static irqreturn_t vhost_vdpa_config_cb(void *private)
  134. {
  135. struct vhost_vdpa *v = private;
  136. struct eventfd_ctx *config_ctx = v->config_ctx;
  137. if (config_ctx)
  138. eventfd_signal(config_ctx, 1);
  139. return IRQ_HANDLED;
  140. }
  141. static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
  142. {
  143. struct vhost_virtqueue *vq = &v->vqs[qid];
  144. const struct vdpa_config_ops *ops = v->vdpa->config;
  145. struct vdpa_device *vdpa = v->vdpa;
  146. int ret, irq;
  147. if (!ops->get_vq_irq)
  148. return;
  149. irq = ops->get_vq_irq(vdpa, qid);
  150. if (irq < 0)
  151. return;
  152. irq_bypass_unregister_producer(&vq->call_ctx.producer);
  153. if (!vq->call_ctx.ctx)
  154. return;
  155. vq->call_ctx.producer.token = vq->call_ctx.ctx;
  156. vq->call_ctx.producer.irq = irq;
  157. ret = irq_bypass_register_producer(&vq->call_ctx.producer);
  158. if (unlikely(ret))
  159. dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
  160. qid, vq->call_ctx.producer.token, ret);
  161. }
  162. static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
  163. {
  164. struct vhost_virtqueue *vq = &v->vqs[qid];
  165. irq_bypass_unregister_producer(&vq->call_ctx.producer);
  166. }
  167. static int vhost_vdpa_reset(struct vhost_vdpa *v)
  168. {
  169. struct vdpa_device *vdpa = v->vdpa;
  170. v->in_batch = 0;
  171. return vdpa_reset(vdpa);
  172. }
  173. static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
  174. {
  175. struct vdpa_device *vdpa = v->vdpa;
  176. const struct vdpa_config_ops *ops = vdpa->config;
  177. u32 device_id;
  178. device_id = ops->get_device_id(vdpa);
  179. if (copy_to_user(argp, &device_id, sizeof(device_id)))
  180. return -EFAULT;
  181. return 0;
  182. }
  183. static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
  184. {
  185. struct vdpa_device *vdpa = v->vdpa;
  186. const struct vdpa_config_ops *ops = vdpa->config;
  187. u8 status;
  188. status = ops->get_status(vdpa);
  189. if (copy_to_user(statusp, &status, sizeof(status)))
  190. return -EFAULT;
  191. return 0;
  192. }
  193. static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
  194. {
  195. struct vdpa_device *vdpa = v->vdpa;
  196. const struct vdpa_config_ops *ops = vdpa->config;
  197. u8 status, status_old;
  198. u32 nvqs = v->nvqs;
  199. int ret;
  200. u16 i;
  201. if (copy_from_user(&status, statusp, sizeof(status)))
  202. return -EFAULT;
  203. status_old = ops->get_status(vdpa);
  204. /*
  205. * Userspace shouldn't remove status bits unless reset the
  206. * status to 0.
  207. */
  208. if (status != 0 && (status_old & ~status) != 0)
  209. return -EINVAL;
  210. if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
  211. for (i = 0; i < nvqs; i++)
  212. vhost_vdpa_unsetup_vq_irq(v, i);
  213. if (status == 0) {
  214. ret = vdpa_reset(vdpa);
  215. if (ret)
  216. return ret;
  217. } else
  218. vdpa_set_status(vdpa, status);
  219. if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
  220. for (i = 0; i < nvqs; i++)
  221. vhost_vdpa_setup_vq_irq(v, i);
  222. return 0;
  223. }
  224. static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
  225. struct vhost_vdpa_config *c)
  226. {
  227. struct vdpa_device *vdpa = v->vdpa;
  228. size_t size = vdpa->config->get_config_size(vdpa);
  229. if (c->len == 0 || c->off > size)
  230. return -EINVAL;
  231. if (c->len > size - c->off)
  232. return -E2BIG;
  233. return 0;
  234. }
  235. static long vhost_vdpa_get_config(struct vhost_vdpa *v,
  236. struct vhost_vdpa_config __user *c)
  237. {
  238. struct vdpa_device *vdpa = v->vdpa;
  239. struct vhost_vdpa_config config;
  240. unsigned long size = offsetof(struct vhost_vdpa_config, buf);
  241. u8 *buf;
  242. if (copy_from_user(&config, c, size))
  243. return -EFAULT;
  244. if (vhost_vdpa_config_validate(v, &config))
  245. return -EINVAL;
  246. buf = kvzalloc(config.len, GFP_KERNEL);
  247. if (!buf)
  248. return -ENOMEM;
  249. vdpa_get_config(vdpa, config.off, buf, config.len);
  250. if (copy_to_user(c->buf, buf, config.len)) {
  251. kvfree(buf);
  252. return -EFAULT;
  253. }
  254. kvfree(buf);
  255. return 0;
  256. }
  257. static long vhost_vdpa_set_config(struct vhost_vdpa *v,
  258. struct vhost_vdpa_config __user *c)
  259. {
  260. struct vdpa_device *vdpa = v->vdpa;
  261. struct vhost_vdpa_config config;
  262. unsigned long size = offsetof(struct vhost_vdpa_config, buf);
  263. u8 *buf;
  264. if (copy_from_user(&config, c, size))
  265. return -EFAULT;
  266. if (vhost_vdpa_config_validate(v, &config))
  267. return -EINVAL;
  268. buf = vmemdup_user(c->buf, config.len);
  269. if (IS_ERR(buf))
  270. return PTR_ERR(buf);
  271. vdpa_set_config(vdpa, config.off, buf, config.len);
  272. kvfree(buf);
  273. return 0;
  274. }
  275. static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
  276. {
  277. struct vdpa_device *vdpa = v->vdpa;
  278. const struct vdpa_config_ops *ops = vdpa->config;
  279. return ops->suspend;
  280. }
  281. static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
  282. {
  283. struct vdpa_device *vdpa = v->vdpa;
  284. const struct vdpa_config_ops *ops = vdpa->config;
  285. u64 features;
  286. features = ops->get_device_features(vdpa);
  287. if (copy_to_user(featurep, &features, sizeof(features)))
  288. return -EFAULT;
  289. return 0;
  290. }
  291. static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
  292. {
  293. struct vdpa_device *vdpa = v->vdpa;
  294. const struct vdpa_config_ops *ops = vdpa->config;
  295. struct vhost_dev *d = &v->vdev;
  296. u64 actual_features;
  297. u64 features;
  298. int i;
  299. /*
  300. * It's not allowed to change the features after they have
  301. * been negotiated.
  302. */
  303. if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
  304. return -EBUSY;
  305. if (copy_from_user(&features, featurep, sizeof(features)))
  306. return -EFAULT;
  307. if (vdpa_set_features(vdpa, features))
  308. return -EINVAL;
  309. /* let the vqs know what has been configured */
  310. actual_features = ops->get_driver_features(vdpa);
  311. for (i = 0; i < d->nvqs; ++i) {
  312. struct vhost_virtqueue *vq = d->vqs[i];
  313. mutex_lock(&vq->mutex);
  314. vq->acked_features = actual_features;
  315. mutex_unlock(&vq->mutex);
  316. }
  317. return 0;
  318. }
  319. static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
  320. {
  321. struct vdpa_device *vdpa = v->vdpa;
  322. const struct vdpa_config_ops *ops = vdpa->config;
  323. u16 num;
  324. num = ops->get_vq_num_max(vdpa);
  325. if (copy_to_user(argp, &num, sizeof(num)))
  326. return -EFAULT;
  327. return 0;
  328. }
  329. static void vhost_vdpa_config_put(struct vhost_vdpa *v)
  330. {
  331. if (v->config_ctx) {
  332. eventfd_ctx_put(v->config_ctx);
  333. v->config_ctx = NULL;
  334. }
  335. }
  336. static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
  337. {
  338. struct vdpa_callback cb;
  339. int fd;
  340. struct eventfd_ctx *ctx;
  341. cb.callback = vhost_vdpa_config_cb;
  342. cb.private = v;
  343. if (copy_from_user(&fd, argp, sizeof(fd)))
  344. return -EFAULT;
  345. ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
  346. swap(ctx, v->config_ctx);
  347. if (!IS_ERR_OR_NULL(ctx))
  348. eventfd_ctx_put(ctx);
  349. if (IS_ERR(v->config_ctx)) {
  350. long ret = PTR_ERR(v->config_ctx);
  351. v->config_ctx = NULL;
  352. return ret;
  353. }
  354. v->vdpa->config->set_config_cb(v->vdpa, &cb);
  355. return 0;
  356. }
  357. static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
  358. {
  359. struct vhost_vdpa_iova_range range = {
  360. .first = v->range.first,
  361. .last = v->range.last,
  362. };
  363. if (copy_to_user(argp, &range, sizeof(range)))
  364. return -EFAULT;
  365. return 0;
  366. }
  367. static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
  368. {
  369. struct vdpa_device *vdpa = v->vdpa;
  370. const struct vdpa_config_ops *ops = vdpa->config;
  371. u32 size;
  372. size = ops->get_config_size(vdpa);
  373. if (copy_to_user(argp, &size, sizeof(size)))
  374. return -EFAULT;
  375. return 0;
  376. }
  377. static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
  378. {
  379. struct vdpa_device *vdpa = v->vdpa;
  380. if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
  381. return -EFAULT;
  382. return 0;
  383. }
  384. /* After a successful return of ioctl the device must not process more
  385. * virtqueue descriptors. The device can answer to read or writes of config
  386. * fields as if it were not suspended. In particular, writing to "queue_enable"
  387. * with a value of 1 will not make the device start processing buffers.
  388. */
  389. static long vhost_vdpa_suspend(struct vhost_vdpa *v)
  390. {
  391. struct vdpa_device *vdpa = v->vdpa;
  392. const struct vdpa_config_ops *ops = vdpa->config;
  393. if (!ops->suspend)
  394. return -EOPNOTSUPP;
  395. return ops->suspend(vdpa);
  396. }
  397. static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
  398. void __user *argp)
  399. {
  400. struct vdpa_device *vdpa = v->vdpa;
  401. const struct vdpa_config_ops *ops = vdpa->config;
  402. struct vdpa_vq_state vq_state;
  403. struct vdpa_callback cb;
  404. struct vhost_virtqueue *vq;
  405. struct vhost_vring_state s;
  406. u32 idx;
  407. long r;
  408. r = get_user(idx, (u32 __user *)argp);
  409. if (r < 0)
  410. return r;
  411. if (idx >= v->nvqs)
  412. return -ENOBUFS;
  413. idx = array_index_nospec(idx, v->nvqs);
  414. vq = &v->vqs[idx];
  415. switch (cmd) {
  416. case VHOST_VDPA_SET_VRING_ENABLE:
  417. if (copy_from_user(&s, argp, sizeof(s)))
  418. return -EFAULT;
  419. ops->set_vq_ready(vdpa, idx, s.num);
  420. return 0;
  421. case VHOST_VDPA_GET_VRING_GROUP:
  422. if (!ops->get_vq_group)
  423. return -EOPNOTSUPP;
  424. s.index = idx;
  425. s.num = ops->get_vq_group(vdpa, idx);
  426. if (s.num >= vdpa->ngroups)
  427. return -EIO;
  428. else if (copy_to_user(argp, &s, sizeof(s)))
  429. return -EFAULT;
  430. return 0;
  431. case VHOST_VDPA_SET_GROUP_ASID:
  432. if (copy_from_user(&s, argp, sizeof(s)))
  433. return -EFAULT;
  434. if (s.num >= vdpa->nas)
  435. return -EINVAL;
  436. if (!ops->set_group_asid)
  437. return -EOPNOTSUPP;
  438. return ops->set_group_asid(vdpa, idx, s.num);
  439. case VHOST_GET_VRING_BASE:
  440. r = ops->get_vq_state(v->vdpa, idx, &vq_state);
  441. if (r)
  442. return r;
  443. if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
  444. vq->last_avail_idx = vq_state.packed.last_avail_idx |
  445. (vq_state.packed.last_avail_counter << 15);
  446. vq->last_used_idx = vq_state.packed.last_used_idx |
  447. (vq_state.packed.last_used_counter << 15);
  448. } else {
  449. vq->last_avail_idx = vq_state.split.avail_index;
  450. }
  451. break;
  452. }
  453. r = vhost_vring_ioctl(&v->vdev, cmd, argp);
  454. if (r)
  455. return r;
  456. switch (cmd) {
  457. case VHOST_SET_VRING_ADDR:
  458. if (ops->set_vq_address(vdpa, idx,
  459. (u64)(uintptr_t)vq->desc,
  460. (u64)(uintptr_t)vq->avail,
  461. (u64)(uintptr_t)vq->used))
  462. r = -EINVAL;
  463. break;
  464. case VHOST_SET_VRING_BASE:
  465. if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
  466. vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
  467. vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
  468. vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
  469. vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
  470. } else {
  471. vq_state.split.avail_index = vq->last_avail_idx;
  472. }
  473. r = ops->set_vq_state(vdpa, idx, &vq_state);
  474. break;
  475. case VHOST_SET_VRING_CALL:
  476. if (vq->call_ctx.ctx) {
  477. cb.callback = vhost_vdpa_virtqueue_cb;
  478. cb.private = vq;
  479. } else {
  480. cb.callback = NULL;
  481. cb.private = NULL;
  482. }
  483. ops->set_vq_cb(vdpa, idx, &cb);
  484. vhost_vdpa_setup_vq_irq(v, idx);
  485. break;
  486. case VHOST_SET_VRING_NUM:
  487. ops->set_vq_num(vdpa, idx, vq->num);
  488. break;
  489. }
  490. return r;
  491. }
  492. static long vhost_vdpa_unlocked_ioctl(struct file *filep,
  493. unsigned int cmd, unsigned long arg)
  494. {
  495. struct vhost_vdpa *v = filep->private_data;
  496. struct vhost_dev *d = &v->vdev;
  497. void __user *argp = (void __user *)arg;
  498. u64 __user *featurep = argp;
  499. u64 features;
  500. long r = 0;
  501. if (cmd == VHOST_SET_BACKEND_FEATURES) {
  502. if (copy_from_user(&features, featurep, sizeof(features)))
  503. return -EFAULT;
  504. if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
  505. BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
  506. return -EOPNOTSUPP;
  507. if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
  508. !vhost_vdpa_can_suspend(v))
  509. return -EOPNOTSUPP;
  510. vhost_set_backend_features(&v->vdev, features);
  511. return 0;
  512. }
  513. mutex_lock(&d->mutex);
  514. switch (cmd) {
  515. case VHOST_VDPA_GET_DEVICE_ID:
  516. r = vhost_vdpa_get_device_id(v, argp);
  517. break;
  518. case VHOST_VDPA_GET_STATUS:
  519. r = vhost_vdpa_get_status(v, argp);
  520. break;
  521. case VHOST_VDPA_SET_STATUS:
  522. r = vhost_vdpa_set_status(v, argp);
  523. break;
  524. case VHOST_VDPA_GET_CONFIG:
  525. r = vhost_vdpa_get_config(v, argp);
  526. break;
  527. case VHOST_VDPA_SET_CONFIG:
  528. r = vhost_vdpa_set_config(v, argp);
  529. break;
  530. case VHOST_GET_FEATURES:
  531. r = vhost_vdpa_get_features(v, argp);
  532. break;
  533. case VHOST_SET_FEATURES:
  534. r = vhost_vdpa_set_features(v, argp);
  535. break;
  536. case VHOST_VDPA_GET_VRING_NUM:
  537. r = vhost_vdpa_get_vring_num(v, argp);
  538. break;
  539. case VHOST_VDPA_GET_GROUP_NUM:
  540. if (copy_to_user(argp, &v->vdpa->ngroups,
  541. sizeof(v->vdpa->ngroups)))
  542. r = -EFAULT;
  543. break;
  544. case VHOST_VDPA_GET_AS_NUM:
  545. if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
  546. r = -EFAULT;
  547. break;
  548. case VHOST_SET_LOG_BASE:
  549. case VHOST_SET_LOG_FD:
  550. r = -ENOIOCTLCMD;
  551. break;
  552. case VHOST_VDPA_SET_CONFIG_CALL:
  553. r = vhost_vdpa_set_config_call(v, argp);
  554. break;
  555. case VHOST_GET_BACKEND_FEATURES:
  556. features = VHOST_VDPA_BACKEND_FEATURES;
  557. if (vhost_vdpa_can_suspend(v))
  558. features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
  559. if (copy_to_user(featurep, &features, sizeof(features)))
  560. r = -EFAULT;
  561. break;
  562. case VHOST_VDPA_GET_IOVA_RANGE:
  563. r = vhost_vdpa_get_iova_range(v, argp);
  564. break;
  565. case VHOST_VDPA_GET_CONFIG_SIZE:
  566. r = vhost_vdpa_get_config_size(v, argp);
  567. break;
  568. case VHOST_VDPA_GET_VQS_COUNT:
  569. r = vhost_vdpa_get_vqs_count(v, argp);
  570. break;
  571. case VHOST_VDPA_SUSPEND:
  572. r = vhost_vdpa_suspend(v);
  573. break;
  574. default:
  575. r = vhost_dev_ioctl(&v->vdev, cmd, argp);
  576. if (r == -ENOIOCTLCMD)
  577. r = vhost_vdpa_vring_ioctl(v, cmd, argp);
  578. break;
  579. }
  580. mutex_unlock(&d->mutex);
  581. return r;
  582. }
  583. static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
  584. struct vhost_iotlb_map *map, u32 asid)
  585. {
  586. struct vdpa_device *vdpa = v->vdpa;
  587. const struct vdpa_config_ops *ops = vdpa->config;
  588. if (ops->dma_map) {
  589. ops->dma_unmap(vdpa, asid, map->start, map->size);
  590. } else if (ops->set_map == NULL) {
  591. iommu_unmap(v->domain, map->start, map->size);
  592. }
  593. }
  594. static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
  595. u64 start, u64 last, u32 asid)
  596. {
  597. struct vhost_dev *dev = &v->vdev;
  598. struct vhost_iotlb_map *map;
  599. struct page *page;
  600. unsigned long pfn, pinned;
  601. while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
  602. pinned = PFN_DOWN(map->size);
  603. for (pfn = PFN_DOWN(map->addr);
  604. pinned > 0; pfn++, pinned--) {
  605. page = pfn_to_page(pfn);
  606. if (map->perm & VHOST_ACCESS_WO)
  607. set_page_dirty_lock(page);
  608. unpin_user_page(page);
  609. }
  610. atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
  611. vhost_vdpa_general_unmap(v, map, asid);
  612. vhost_iotlb_map_free(iotlb, map);
  613. }
  614. }
  615. static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
  616. u64 start, u64 last, u32 asid)
  617. {
  618. struct vhost_iotlb_map *map;
  619. struct vdpa_map_file *map_file;
  620. while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
  621. map_file = (struct vdpa_map_file *)map->opaque;
  622. fput(map_file->file);
  623. kfree(map_file);
  624. vhost_vdpa_general_unmap(v, map, asid);
  625. vhost_iotlb_map_free(iotlb, map);
  626. }
  627. }
  628. static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
  629. struct vhost_iotlb *iotlb, u64 start,
  630. u64 last, u32 asid)
  631. {
  632. struct vdpa_device *vdpa = v->vdpa;
  633. if (vdpa->use_va)
  634. return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
  635. return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
  636. }
  637. static int perm_to_iommu_flags(u32 perm)
  638. {
  639. int flags = 0;
  640. switch (perm) {
  641. case VHOST_ACCESS_WO:
  642. flags |= IOMMU_WRITE;
  643. break;
  644. case VHOST_ACCESS_RO:
  645. flags |= IOMMU_READ;
  646. break;
  647. case VHOST_ACCESS_RW:
  648. flags |= (IOMMU_WRITE | IOMMU_READ);
  649. break;
  650. default:
  651. WARN(1, "invalidate vhost IOTLB permission\n");
  652. break;
  653. }
  654. return flags | IOMMU_CACHE;
  655. }
  656. static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
  657. u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
  658. {
  659. struct vhost_dev *dev = &v->vdev;
  660. struct vdpa_device *vdpa = v->vdpa;
  661. const struct vdpa_config_ops *ops = vdpa->config;
  662. u32 asid = iotlb_to_asid(iotlb);
  663. int r = 0;
  664. r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
  665. pa, perm, opaque);
  666. if (r)
  667. return r;
  668. if (ops->dma_map) {
  669. r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
  670. } else if (ops->set_map) {
  671. if (!v->in_batch)
  672. r = ops->set_map(vdpa, asid, iotlb);
  673. } else {
  674. r = iommu_map(v->domain, iova, pa, size,
  675. perm_to_iommu_flags(perm));
  676. }
  677. if (r) {
  678. vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
  679. return r;
  680. }
  681. if (!vdpa->use_va)
  682. atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
  683. return 0;
  684. }
  685. static void vhost_vdpa_unmap(struct vhost_vdpa *v,
  686. struct vhost_iotlb *iotlb,
  687. u64 iova, u64 size)
  688. {
  689. struct vdpa_device *vdpa = v->vdpa;
  690. const struct vdpa_config_ops *ops = vdpa->config;
  691. u32 asid = iotlb_to_asid(iotlb);
  692. vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
  693. if (ops->set_map) {
  694. if (!v->in_batch)
  695. ops->set_map(vdpa, asid, iotlb);
  696. }
  697. }
  698. static int vhost_vdpa_va_map(struct vhost_vdpa *v,
  699. struct vhost_iotlb *iotlb,
  700. u64 iova, u64 size, u64 uaddr, u32 perm)
  701. {
  702. struct vhost_dev *dev = &v->vdev;
  703. u64 offset, map_size, map_iova = iova;
  704. struct vdpa_map_file *map_file;
  705. struct vm_area_struct *vma;
  706. int ret = 0;
  707. mmap_read_lock(dev->mm);
  708. while (size) {
  709. vma = find_vma(dev->mm, uaddr);
  710. if (!vma) {
  711. ret = -EINVAL;
  712. break;
  713. }
  714. map_size = min(size, vma->vm_end - uaddr);
  715. if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
  716. !(vma->vm_flags & (VM_IO | VM_PFNMAP))))
  717. goto next;
  718. map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
  719. if (!map_file) {
  720. ret = -ENOMEM;
  721. break;
  722. }
  723. offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
  724. map_file->offset = offset;
  725. map_file->file = get_file(vma->vm_file);
  726. ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
  727. perm, map_file);
  728. if (ret) {
  729. fput(map_file->file);
  730. kfree(map_file);
  731. break;
  732. }
  733. next:
  734. size -= map_size;
  735. uaddr += map_size;
  736. map_iova += map_size;
  737. }
  738. if (ret)
  739. vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
  740. mmap_read_unlock(dev->mm);
  741. return ret;
  742. }
  743. static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
  744. struct vhost_iotlb *iotlb,
  745. u64 iova, u64 size, u64 uaddr, u32 perm)
  746. {
  747. struct vhost_dev *dev = &v->vdev;
  748. struct page **page_list;
  749. unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  750. unsigned int gup_flags = FOLL_LONGTERM;
  751. unsigned long npages, cur_base, map_pfn, last_pfn = 0;
  752. unsigned long lock_limit, sz2pin, nchunks, i;
  753. u64 start = iova;
  754. long pinned;
  755. int ret = 0;
  756. /* Limit the use of memory for bookkeeping */
  757. page_list = (struct page **) __get_free_page(GFP_KERNEL);
  758. if (!page_list)
  759. return -ENOMEM;
  760. if (perm & VHOST_ACCESS_WO)
  761. gup_flags |= FOLL_WRITE;
  762. npages = PFN_UP(size + (iova & ~PAGE_MASK));
  763. if (!npages) {
  764. ret = -EINVAL;
  765. goto free;
  766. }
  767. mmap_read_lock(dev->mm);
  768. lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
  769. if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
  770. ret = -ENOMEM;
  771. goto unlock;
  772. }
  773. cur_base = uaddr & PAGE_MASK;
  774. iova &= PAGE_MASK;
  775. nchunks = 0;
  776. while (npages) {
  777. sz2pin = min_t(unsigned long, npages, list_size);
  778. pinned = pin_user_pages(cur_base, sz2pin,
  779. gup_flags, page_list, NULL);
  780. if (sz2pin != pinned) {
  781. if (pinned < 0) {
  782. ret = pinned;
  783. } else {
  784. unpin_user_pages(page_list, pinned);
  785. ret = -ENOMEM;
  786. }
  787. goto out;
  788. }
  789. nchunks++;
  790. if (!last_pfn)
  791. map_pfn = page_to_pfn(page_list[0]);
  792. for (i = 0; i < pinned; i++) {
  793. unsigned long this_pfn = page_to_pfn(page_list[i]);
  794. u64 csize;
  795. if (last_pfn && (this_pfn != last_pfn + 1)) {
  796. /* Pin a contiguous chunk of memory */
  797. csize = PFN_PHYS(last_pfn - map_pfn + 1);
  798. ret = vhost_vdpa_map(v, iotlb, iova, csize,
  799. PFN_PHYS(map_pfn),
  800. perm, NULL);
  801. if (ret) {
  802. /*
  803. * Unpin the pages that are left unmapped
  804. * from this point on in the current
  805. * page_list. The remaining outstanding
  806. * ones which may stride across several
  807. * chunks will be covered in the common
  808. * error path subsequently.
  809. */
  810. unpin_user_pages(&page_list[i],
  811. pinned - i);
  812. goto out;
  813. }
  814. map_pfn = this_pfn;
  815. iova += csize;
  816. nchunks = 0;
  817. }
  818. last_pfn = this_pfn;
  819. }
  820. cur_base += PFN_PHYS(pinned);
  821. npages -= pinned;
  822. }
  823. /* Pin the rest chunk */
  824. ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
  825. PFN_PHYS(map_pfn), perm, NULL);
  826. out:
  827. if (ret) {
  828. if (nchunks) {
  829. unsigned long pfn;
  830. /*
  831. * Unpin the outstanding pages which are yet to be
  832. * mapped but haven't due to vdpa_map() or
  833. * pin_user_pages() failure.
  834. *
  835. * Mapped pages are accounted in vdpa_map(), hence
  836. * the corresponding unpinning will be handled by
  837. * vdpa_unmap().
  838. */
  839. WARN_ON(!last_pfn);
  840. for (pfn = map_pfn; pfn <= last_pfn; pfn++)
  841. unpin_user_page(pfn_to_page(pfn));
  842. }
  843. vhost_vdpa_unmap(v, iotlb, start, size);
  844. }
  845. unlock:
  846. mmap_read_unlock(dev->mm);
  847. free:
  848. free_page((unsigned long)page_list);
  849. return ret;
  850. }
  851. static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
  852. struct vhost_iotlb *iotlb,
  853. struct vhost_iotlb_msg *msg)
  854. {
  855. struct vdpa_device *vdpa = v->vdpa;
  856. if (msg->iova < v->range.first || !msg->size ||
  857. msg->iova > U64_MAX - msg->size + 1 ||
  858. msg->iova + msg->size - 1 > v->range.last)
  859. return -EINVAL;
  860. if (vhost_iotlb_itree_first(iotlb, msg->iova,
  861. msg->iova + msg->size - 1))
  862. return -EEXIST;
  863. if (vdpa->use_va)
  864. return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
  865. msg->uaddr, msg->perm);
  866. return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
  867. msg->perm);
  868. }
  869. static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
  870. struct vhost_iotlb_msg *msg)
  871. {
  872. struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
  873. struct vdpa_device *vdpa = v->vdpa;
  874. const struct vdpa_config_ops *ops = vdpa->config;
  875. struct vhost_iotlb *iotlb = NULL;
  876. struct vhost_vdpa_as *as = NULL;
  877. int r = 0;
  878. mutex_lock(&dev->mutex);
  879. r = vhost_dev_check_owner(dev);
  880. if (r)
  881. goto unlock;
  882. if (msg->type == VHOST_IOTLB_UPDATE ||
  883. msg->type == VHOST_IOTLB_BATCH_BEGIN) {
  884. as = vhost_vdpa_find_alloc_as(v, asid);
  885. if (!as) {
  886. dev_err(&v->dev, "can't find and alloc asid %d\n",
  887. asid);
  888. r = -EINVAL;
  889. goto unlock;
  890. }
  891. iotlb = &as->iotlb;
  892. } else
  893. iotlb = asid_to_iotlb(v, asid);
  894. if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
  895. if (v->in_batch && v->batch_asid != asid) {
  896. dev_info(&v->dev, "batch id %d asid %d\n",
  897. v->batch_asid, asid);
  898. }
  899. if (!iotlb)
  900. dev_err(&v->dev, "no iotlb for asid %d\n", asid);
  901. r = -EINVAL;
  902. goto unlock;
  903. }
  904. switch (msg->type) {
  905. case VHOST_IOTLB_UPDATE:
  906. r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
  907. break;
  908. case VHOST_IOTLB_INVALIDATE:
  909. vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
  910. break;
  911. case VHOST_IOTLB_BATCH_BEGIN:
  912. v->batch_asid = asid;
  913. v->in_batch = true;
  914. break;
  915. case VHOST_IOTLB_BATCH_END:
  916. if (v->in_batch && ops->set_map)
  917. ops->set_map(vdpa, asid, iotlb);
  918. v->in_batch = false;
  919. break;
  920. default:
  921. r = -EINVAL;
  922. break;
  923. }
  924. unlock:
  925. mutex_unlock(&dev->mutex);
  926. return r;
  927. }
  928. static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
  929. struct iov_iter *from)
  930. {
  931. struct file *file = iocb->ki_filp;
  932. struct vhost_vdpa *v = file->private_data;
  933. struct vhost_dev *dev = &v->vdev;
  934. return vhost_chr_write_iter(dev, from);
  935. }
  936. static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
  937. {
  938. struct vdpa_device *vdpa = v->vdpa;
  939. const struct vdpa_config_ops *ops = vdpa->config;
  940. struct device *dma_dev = vdpa_get_dma_dev(vdpa);
  941. struct bus_type *bus;
  942. int ret;
  943. /* Device want to do DMA by itself */
  944. if (ops->set_map || ops->dma_map)
  945. return 0;
  946. bus = dma_dev->bus;
  947. if (!bus)
  948. return -EFAULT;
  949. if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
  950. return -ENOTSUPP;
  951. v->domain = iommu_domain_alloc(bus);
  952. if (!v->domain)
  953. return -EIO;
  954. ret = iommu_attach_device(v->domain, dma_dev);
  955. if (ret)
  956. goto err_attach;
  957. return 0;
  958. err_attach:
  959. iommu_domain_free(v->domain);
  960. v->domain = NULL;
  961. return ret;
  962. }
  963. static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
  964. {
  965. struct vdpa_device *vdpa = v->vdpa;
  966. struct device *dma_dev = vdpa_get_dma_dev(vdpa);
  967. if (v->domain) {
  968. iommu_detach_device(v->domain, dma_dev);
  969. iommu_domain_free(v->domain);
  970. }
  971. v->domain = NULL;
  972. }
  973. static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
  974. {
  975. struct vdpa_iova_range *range = &v->range;
  976. struct vdpa_device *vdpa = v->vdpa;
  977. const struct vdpa_config_ops *ops = vdpa->config;
  978. if (ops->get_iova_range) {
  979. *range = ops->get_iova_range(vdpa);
  980. } else if (v->domain && v->domain->geometry.force_aperture) {
  981. range->first = v->domain->geometry.aperture_start;
  982. range->last = v->domain->geometry.aperture_end;
  983. } else {
  984. range->first = 0;
  985. range->last = ULLONG_MAX;
  986. }
  987. }
  988. static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
  989. {
  990. struct vhost_vdpa_as *as;
  991. u32 asid;
  992. for (asid = 0; asid < v->vdpa->nas; asid++) {
  993. as = asid_to_as(v, asid);
  994. if (as)
  995. vhost_vdpa_remove_as(v, asid);
  996. }
  997. vhost_vdpa_free_domain(v);
  998. vhost_dev_cleanup(&v->vdev);
  999. kfree(v->vdev.vqs);
  1000. }
  1001. static int vhost_vdpa_open(struct inode *inode, struct file *filep)
  1002. {
  1003. struct vhost_vdpa *v;
  1004. struct vhost_dev *dev;
  1005. struct vhost_virtqueue **vqs;
  1006. int r, opened;
  1007. u32 i, nvqs;
  1008. v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
  1009. opened = atomic_cmpxchg(&v->opened, 0, 1);
  1010. if (opened)
  1011. return -EBUSY;
  1012. nvqs = v->nvqs;
  1013. r = vhost_vdpa_reset(v);
  1014. if (r)
  1015. goto err;
  1016. vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
  1017. if (!vqs) {
  1018. r = -ENOMEM;
  1019. goto err;
  1020. }
  1021. dev = &v->vdev;
  1022. for (i = 0; i < nvqs; i++) {
  1023. vqs[i] = &v->vqs[i];
  1024. vqs[i]->handle_kick = handle_vq_kick;
  1025. }
  1026. vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
  1027. vhost_vdpa_process_iotlb_msg);
  1028. r = vhost_vdpa_alloc_domain(v);
  1029. if (r)
  1030. goto err_alloc_domain;
  1031. vhost_vdpa_set_iova_range(v);
  1032. filep->private_data = v;
  1033. return 0;
  1034. err_alloc_domain:
  1035. vhost_vdpa_cleanup(v);
  1036. err:
  1037. atomic_dec(&v->opened);
  1038. return r;
  1039. }
  1040. static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
  1041. {
  1042. u32 i;
  1043. for (i = 0; i < v->nvqs; i++)
  1044. vhost_vdpa_unsetup_vq_irq(v, i);
  1045. }
  1046. static int vhost_vdpa_release(struct inode *inode, struct file *filep)
  1047. {
  1048. struct vhost_vdpa *v = filep->private_data;
  1049. struct vhost_dev *d = &v->vdev;
  1050. mutex_lock(&d->mutex);
  1051. filep->private_data = NULL;
  1052. vhost_vdpa_clean_irq(v);
  1053. vhost_vdpa_reset(v);
  1054. vhost_dev_stop(&v->vdev);
  1055. vhost_vdpa_config_put(v);
  1056. vhost_vdpa_cleanup(v);
  1057. mutex_unlock(&d->mutex);
  1058. atomic_dec(&v->opened);
  1059. complete(&v->completion);
  1060. return 0;
  1061. }
  1062. #ifdef CONFIG_MMU
  1063. static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
  1064. {
  1065. struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
  1066. struct vdpa_device *vdpa = v->vdpa;
  1067. const struct vdpa_config_ops *ops = vdpa->config;
  1068. struct vdpa_notification_area notify;
  1069. struct vm_area_struct *vma = vmf->vma;
  1070. u16 index = vma->vm_pgoff;
  1071. notify = ops->get_vq_notification(vdpa, index);
  1072. vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  1073. if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
  1074. PFN_DOWN(notify.addr), PAGE_SIZE,
  1075. vma->vm_page_prot))
  1076. return VM_FAULT_SIGBUS;
  1077. return VM_FAULT_NOPAGE;
  1078. }
  1079. static const struct vm_operations_struct vhost_vdpa_vm_ops = {
  1080. .fault = vhost_vdpa_fault,
  1081. };
  1082. static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
  1083. {
  1084. struct vhost_vdpa *v = vma->vm_file->private_data;
  1085. struct vdpa_device *vdpa = v->vdpa;
  1086. const struct vdpa_config_ops *ops = vdpa->config;
  1087. struct vdpa_notification_area notify;
  1088. unsigned long index = vma->vm_pgoff;
  1089. if (vma->vm_end - vma->vm_start != PAGE_SIZE)
  1090. return -EINVAL;
  1091. if ((vma->vm_flags & VM_SHARED) == 0)
  1092. return -EINVAL;
  1093. if (vma->vm_flags & VM_READ)
  1094. return -EINVAL;
  1095. if (index > 65535)
  1096. return -EINVAL;
  1097. if (!ops->get_vq_notification)
  1098. return -ENOTSUPP;
  1099. /* To be safe and easily modelled by userspace, We only
  1100. * support the doorbell which sits on the page boundary and
  1101. * does not share the page with other registers.
  1102. */
  1103. notify = ops->get_vq_notification(vdpa, index);
  1104. if (notify.addr & (PAGE_SIZE - 1))
  1105. return -EINVAL;
  1106. if (vma->vm_end - vma->vm_start != notify.size)
  1107. return -ENOTSUPP;
  1108. vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
  1109. vma->vm_ops = &vhost_vdpa_vm_ops;
  1110. return 0;
  1111. }
  1112. #endif /* CONFIG_MMU */
  1113. static const struct file_operations vhost_vdpa_fops = {
  1114. .owner = THIS_MODULE,
  1115. .open = vhost_vdpa_open,
  1116. .release = vhost_vdpa_release,
  1117. .write_iter = vhost_vdpa_chr_write_iter,
  1118. .unlocked_ioctl = vhost_vdpa_unlocked_ioctl,
  1119. #ifdef CONFIG_MMU
  1120. .mmap = vhost_vdpa_mmap,
  1121. #endif /* CONFIG_MMU */
  1122. .compat_ioctl = compat_ptr_ioctl,
  1123. };
  1124. static void vhost_vdpa_release_dev(struct device *device)
  1125. {
  1126. struct vhost_vdpa *v =
  1127. container_of(device, struct vhost_vdpa, dev);
  1128. ida_simple_remove(&vhost_vdpa_ida, v->minor);
  1129. kfree(v->vqs);
  1130. kfree(v);
  1131. }
  1132. static int vhost_vdpa_probe(struct vdpa_device *vdpa)
  1133. {
  1134. const struct vdpa_config_ops *ops = vdpa->config;
  1135. struct vhost_vdpa *v;
  1136. int minor;
  1137. int i, r;
  1138. /* We can't support platform IOMMU device with more than 1
  1139. * group or as
  1140. */
  1141. if (!ops->set_map && !ops->dma_map &&
  1142. (vdpa->ngroups > 1 || vdpa->nas > 1))
  1143. return -EOPNOTSUPP;
  1144. v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
  1145. if (!v)
  1146. return -ENOMEM;
  1147. minor = ida_simple_get(&vhost_vdpa_ida, 0,
  1148. VHOST_VDPA_DEV_MAX, GFP_KERNEL);
  1149. if (minor < 0) {
  1150. kfree(v);
  1151. return minor;
  1152. }
  1153. atomic_set(&v->opened, 0);
  1154. v->minor = minor;
  1155. v->vdpa = vdpa;
  1156. v->nvqs = vdpa->nvqs;
  1157. v->virtio_id = ops->get_device_id(vdpa);
  1158. device_initialize(&v->dev);
  1159. v->dev.release = vhost_vdpa_release_dev;
  1160. v->dev.parent = &vdpa->dev;
  1161. v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
  1162. v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
  1163. GFP_KERNEL);
  1164. if (!v->vqs) {
  1165. r = -ENOMEM;
  1166. goto err;
  1167. }
  1168. r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
  1169. if (r)
  1170. goto err;
  1171. cdev_init(&v->cdev, &vhost_vdpa_fops);
  1172. v->cdev.owner = THIS_MODULE;
  1173. r = cdev_device_add(&v->cdev, &v->dev);
  1174. if (r)
  1175. goto err;
  1176. init_completion(&v->completion);
  1177. vdpa_set_drvdata(vdpa, v);
  1178. for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
  1179. INIT_HLIST_HEAD(&v->as[i]);
  1180. return 0;
  1181. err:
  1182. put_device(&v->dev);
  1183. return r;
  1184. }
  1185. static void vhost_vdpa_remove(struct vdpa_device *vdpa)
  1186. {
  1187. struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
  1188. int opened;
  1189. cdev_device_del(&v->cdev, &v->dev);
  1190. do {
  1191. opened = atomic_cmpxchg(&v->opened, 0, 1);
  1192. if (!opened)
  1193. break;
  1194. wait_for_completion(&v->completion);
  1195. } while (1);
  1196. put_device(&v->dev);
  1197. }
  1198. static struct vdpa_driver vhost_vdpa_driver = {
  1199. .driver = {
  1200. .name = "vhost_vdpa",
  1201. },
  1202. .probe = vhost_vdpa_probe,
  1203. .remove = vhost_vdpa_remove,
  1204. };
  1205. static int __init vhost_vdpa_init(void)
  1206. {
  1207. int r;
  1208. r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
  1209. "vhost-vdpa");
  1210. if (r)
  1211. goto err_alloc_chrdev;
  1212. r = vdpa_register_driver(&vhost_vdpa_driver);
  1213. if (r)
  1214. goto err_vdpa_register_driver;
  1215. return 0;
  1216. err_vdpa_register_driver:
  1217. unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
  1218. err_alloc_chrdev:
  1219. return r;
  1220. }
  1221. module_init(vhost_vdpa_init);
  1222. static void __exit vhost_vdpa_exit(void)
  1223. {
  1224. vdpa_unregister_driver(&vhost_vdpa_driver);
  1225. unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
  1226. }
  1227. module_exit(vhost_vdpa_exit);
  1228. MODULE_VERSION("0.0.1");
  1229. MODULE_LICENSE("GPL v2");
  1230. MODULE_AUTHOR("Intel Corporation");
  1231. MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");