vringh.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Helpers for the host side of a virtio ring.
  4. *
  5. * Since these may be in userspace, we use (inline) accessors.
  6. */
  7. #include <linux/compiler.h>
  8. #include <linux/module.h>
  9. #include <linux/vringh.h>
  10. #include <linux/virtio_ring.h>
  11. #include <linux/kernel.h>
  12. #include <linux/ratelimit.h>
  13. #include <linux/uaccess.h>
  14. #include <linux/slab.h>
  15. #include <linux/export.h>
  16. #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
  17. #include <linux/bvec.h>
  18. #include <linux/highmem.h>
  19. #include <linux/vhost_iotlb.h>
  20. #endif
  21. #include <uapi/linux/virtio_config.h>
  22. static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
  23. {
  24. static DEFINE_RATELIMIT_STATE(vringh_rs,
  25. DEFAULT_RATELIMIT_INTERVAL,
  26. DEFAULT_RATELIMIT_BURST);
  27. if (__ratelimit(&vringh_rs)) {
  28. va_list ap;
  29. va_start(ap, fmt);
  30. printk(KERN_NOTICE "vringh:");
  31. vprintk(fmt, ap);
  32. va_end(ap);
  33. }
  34. }
  35. /* Returns vring->num if empty, -ve on error. */
  36. static inline int __vringh_get_head(const struct vringh *vrh,
  37. int (*getu16)(const struct vringh *vrh,
  38. u16 *val, const __virtio16 *p),
  39. u16 *last_avail_idx)
  40. {
  41. u16 avail_idx, i, head;
  42. int err;
  43. err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
  44. if (err) {
  45. vringh_bad("Failed to access avail idx at %p",
  46. &vrh->vring.avail->idx);
  47. return err;
  48. }
  49. if (*last_avail_idx == avail_idx)
  50. return vrh->vring.num;
  51. /* Only get avail ring entries after they have been exposed by guest. */
  52. virtio_rmb(vrh->weak_barriers);
  53. i = *last_avail_idx & (vrh->vring.num - 1);
  54. err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
  55. if (err) {
  56. vringh_bad("Failed to read head: idx %d address %p",
  57. *last_avail_idx, &vrh->vring.avail->ring[i]);
  58. return err;
  59. }
  60. if (head >= vrh->vring.num) {
  61. vringh_bad("Guest says index %u > %u is available",
  62. head, vrh->vring.num);
  63. return -EINVAL;
  64. }
  65. (*last_avail_idx)++;
  66. return head;
  67. }
  68. /**
  69. * vringh_kiov_advance - skip bytes from vring_kiov
  70. * @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
  71. * @len: the maximum length to advance
  72. */
  73. void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
  74. {
  75. while (len && iov->i < iov->used) {
  76. size_t partlen = min(iov->iov[iov->i].iov_len, len);
  77. iov->consumed += partlen;
  78. iov->iov[iov->i].iov_len -= partlen;
  79. iov->iov[iov->i].iov_base += partlen;
  80. if (!iov->iov[iov->i].iov_len) {
  81. /* Fix up old iov element then increment. */
  82. iov->iov[iov->i].iov_len = iov->consumed;
  83. iov->iov[iov->i].iov_base -= iov->consumed;
  84. iov->consumed = 0;
  85. iov->i++;
  86. }
  87. len -= partlen;
  88. }
  89. }
  90. EXPORT_SYMBOL(vringh_kiov_advance);
  91. /* Copy some bytes to/from the iovec. Returns num copied. */
  92. static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
  93. struct vringh_kiov *iov,
  94. void *ptr, size_t len,
  95. int (*xfer)(const struct vringh *vrh,
  96. void *addr, void *ptr,
  97. size_t len))
  98. {
  99. int err, done = 0;
  100. while (len && iov->i < iov->used) {
  101. size_t partlen;
  102. partlen = min(iov->iov[iov->i].iov_len, len);
  103. err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen);
  104. if (err)
  105. return err;
  106. done += partlen;
  107. len -= partlen;
  108. ptr += partlen;
  109. iov->consumed += partlen;
  110. iov->iov[iov->i].iov_len -= partlen;
  111. iov->iov[iov->i].iov_base += partlen;
  112. if (!iov->iov[iov->i].iov_len) {
  113. /* Fix up old iov element then increment. */
  114. iov->iov[iov->i].iov_len = iov->consumed;
  115. iov->iov[iov->i].iov_base -= iov->consumed;
  116. iov->consumed = 0;
  117. iov->i++;
  118. }
  119. }
  120. return done;
  121. }
  122. /* May reduce *len if range is shorter. */
  123. static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
  124. struct vringh_range *range,
  125. bool (*getrange)(struct vringh *,
  126. u64, struct vringh_range *))
  127. {
  128. if (addr < range->start || addr > range->end_incl) {
  129. if (!getrange(vrh, addr, range))
  130. return false;
  131. }
  132. BUG_ON(addr < range->start || addr > range->end_incl);
  133. /* To end of memory? */
  134. if (unlikely(addr + *len == 0)) {
  135. if (range->end_incl == -1ULL)
  136. return true;
  137. goto truncate;
  138. }
  139. /* Otherwise, don't wrap. */
  140. if (addr + *len < addr) {
  141. vringh_bad("Wrapping descriptor %zu@0x%llx",
  142. *len, (unsigned long long)addr);
  143. return false;
  144. }
  145. if (unlikely(addr + *len - 1 > range->end_incl))
  146. goto truncate;
  147. return true;
  148. truncate:
  149. *len = range->end_incl + 1 - addr;
  150. return true;
  151. }
  152. static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
  153. struct vringh_range *range,
  154. bool (*getrange)(struct vringh *,
  155. u64, struct vringh_range *))
  156. {
  157. return true;
  158. }
  159. /* No reason for this code to be inline. */
  160. static int move_to_indirect(const struct vringh *vrh,
  161. int *up_next, u16 *i, void *addr,
  162. const struct vring_desc *desc,
  163. struct vring_desc **descs, int *desc_max)
  164. {
  165. u32 len;
  166. /* Indirect tables can't have indirect. */
  167. if (*up_next != -1) {
  168. vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
  169. return -EINVAL;
  170. }
  171. len = vringh32_to_cpu(vrh, desc->len);
  172. if (unlikely(len % sizeof(struct vring_desc))) {
  173. vringh_bad("Strange indirect len %u", desc->len);
  174. return -EINVAL;
  175. }
  176. /* We will check this when we follow it! */
  177. if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
  178. *up_next = vringh16_to_cpu(vrh, desc->next);
  179. else
  180. *up_next = -2;
  181. *descs = addr;
  182. *desc_max = len / sizeof(struct vring_desc);
  183. /* Now, start at the first indirect. */
  184. *i = 0;
  185. return 0;
  186. }
  187. static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
  188. {
  189. struct kvec *new;
  190. unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
  191. if (new_num < 8)
  192. new_num = 8;
  193. flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
  194. if (flag)
  195. new = krealloc_array(iov->iov, new_num,
  196. sizeof(struct iovec), gfp);
  197. else {
  198. new = kmalloc_array(new_num, sizeof(struct iovec), gfp);
  199. if (new) {
  200. memcpy(new, iov->iov,
  201. iov->max_num * sizeof(struct iovec));
  202. flag = VRINGH_IOV_ALLOCATED;
  203. }
  204. }
  205. if (!new)
  206. return -ENOMEM;
  207. iov->iov = new;
  208. iov->max_num = (new_num | flag);
  209. return 0;
  210. }
  211. static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
  212. struct vring_desc **descs, int *desc_max)
  213. {
  214. u16 i = *up_next;
  215. *up_next = -1;
  216. *descs = vrh->vring.desc;
  217. *desc_max = vrh->vring.num;
  218. return i;
  219. }
  220. static int slow_copy(struct vringh *vrh, void *dst, const void *src,
  221. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  222. struct vringh_range *range,
  223. bool (*getrange)(struct vringh *vrh,
  224. u64,
  225. struct vringh_range *)),
  226. bool (*getrange)(struct vringh *vrh,
  227. u64 addr,
  228. struct vringh_range *r),
  229. struct vringh_range *range,
  230. int (*copy)(const struct vringh *vrh,
  231. void *dst, const void *src, size_t len))
  232. {
  233. size_t part, len = sizeof(struct vring_desc);
  234. do {
  235. u64 addr;
  236. int err;
  237. part = len;
  238. addr = (u64)(unsigned long)src - range->offset;
  239. if (!rcheck(vrh, addr, &part, range, getrange))
  240. return -EINVAL;
  241. err = copy(vrh, dst, src, part);
  242. if (err)
  243. return err;
  244. dst += part;
  245. src += part;
  246. len -= part;
  247. } while (len);
  248. return 0;
  249. }
  250. static inline int
  251. __vringh_iov(struct vringh *vrh, u16 i,
  252. struct vringh_kiov *riov,
  253. struct vringh_kiov *wiov,
  254. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  255. struct vringh_range *range,
  256. bool (*getrange)(struct vringh *, u64,
  257. struct vringh_range *)),
  258. bool (*getrange)(struct vringh *, u64, struct vringh_range *),
  259. gfp_t gfp,
  260. int (*copy)(const struct vringh *vrh,
  261. void *dst, const void *src, size_t len))
  262. {
  263. int err, count = 0, indirect_count = 0, up_next, desc_max;
  264. struct vring_desc desc, *descs;
  265. struct vringh_range range = { -1ULL, 0 }, slowrange;
  266. bool slow = false;
  267. /* We start traversing vring's descriptor table. */
  268. descs = vrh->vring.desc;
  269. desc_max = vrh->vring.num;
  270. up_next = -1;
  271. /* You must want something! */
  272. if (WARN_ON(!riov && !wiov))
  273. return -EINVAL;
  274. if (riov)
  275. riov->i = riov->used = riov->consumed = 0;
  276. if (wiov)
  277. wiov->i = wiov->used = wiov->consumed = 0;
  278. for (;;) {
  279. void *addr;
  280. struct vringh_kiov *iov;
  281. size_t len;
  282. if (unlikely(slow))
  283. err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
  284. &slowrange, copy);
  285. else
  286. err = copy(vrh, &desc, &descs[i], sizeof(desc));
  287. if (unlikely(err))
  288. goto fail;
  289. if (unlikely(desc.flags &
  290. cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
  291. u64 a = vringh64_to_cpu(vrh, desc.addr);
  292. /* Make sure it's OK, and get offset. */
  293. len = vringh32_to_cpu(vrh, desc.len);
  294. if (!rcheck(vrh, a, &len, &range, getrange)) {
  295. err = -EINVAL;
  296. goto fail;
  297. }
  298. if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
  299. slow = true;
  300. /* We need to save this range to use offset */
  301. slowrange = range;
  302. }
  303. addr = (void *)(long)(a + range.offset);
  304. err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
  305. &descs, &desc_max);
  306. if (err)
  307. goto fail;
  308. continue;
  309. }
  310. if (up_next == -1)
  311. count++;
  312. else
  313. indirect_count++;
  314. if (count > vrh->vring.num || indirect_count > desc_max) {
  315. vringh_bad("Descriptor loop in %p", descs);
  316. err = -ELOOP;
  317. goto fail;
  318. }
  319. if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
  320. iov = wiov;
  321. else {
  322. iov = riov;
  323. if (unlikely(wiov && wiov->used)) {
  324. vringh_bad("Readable desc %p after writable",
  325. &descs[i]);
  326. err = -EINVAL;
  327. goto fail;
  328. }
  329. }
  330. if (!iov) {
  331. vringh_bad("Unexpected %s desc",
  332. !wiov ? "writable" : "readable");
  333. err = -EPROTO;
  334. goto fail;
  335. }
  336. again:
  337. /* Make sure it's OK, and get offset. */
  338. len = vringh32_to_cpu(vrh, desc.len);
  339. if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
  340. getrange)) {
  341. err = -EINVAL;
  342. goto fail;
  343. }
  344. addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
  345. range.offset);
  346. if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
  347. err = resize_iovec(iov, gfp);
  348. if (err)
  349. goto fail;
  350. }
  351. iov->iov[iov->used].iov_base = addr;
  352. iov->iov[iov->used].iov_len = len;
  353. iov->used++;
  354. if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
  355. desc.len = cpu_to_vringh32(vrh,
  356. vringh32_to_cpu(vrh, desc.len) - len);
  357. desc.addr = cpu_to_vringh64(vrh,
  358. vringh64_to_cpu(vrh, desc.addr) + len);
  359. goto again;
  360. }
  361. if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
  362. i = vringh16_to_cpu(vrh, desc.next);
  363. } else {
  364. /* Just in case we need to finish traversing above. */
  365. if (unlikely(up_next > 0)) {
  366. i = return_from_indirect(vrh, &up_next,
  367. &descs, &desc_max);
  368. slow = false;
  369. indirect_count = 0;
  370. } else
  371. break;
  372. }
  373. if (i >= desc_max) {
  374. vringh_bad("Chained index %u > %u", i, desc_max);
  375. err = -EINVAL;
  376. goto fail;
  377. }
  378. }
  379. return 0;
  380. fail:
  381. return err;
  382. }
  383. static inline int __vringh_complete(struct vringh *vrh,
  384. const struct vring_used_elem *used,
  385. unsigned int num_used,
  386. int (*putu16)(const struct vringh *vrh,
  387. __virtio16 *p, u16 val),
  388. int (*putused)(const struct vringh *vrh,
  389. struct vring_used_elem *dst,
  390. const struct vring_used_elem
  391. *src, unsigned num))
  392. {
  393. struct vring_used *used_ring;
  394. int err;
  395. u16 used_idx, off;
  396. used_ring = vrh->vring.used;
  397. used_idx = vrh->last_used_idx + vrh->completed;
  398. off = used_idx % vrh->vring.num;
  399. /* Compiler knows num_used == 1 sometimes, hence extra check */
  400. if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
  401. u16 part = vrh->vring.num - off;
  402. err = putused(vrh, &used_ring->ring[off], used, part);
  403. if (!err)
  404. err = putused(vrh, &used_ring->ring[0], used + part,
  405. num_used - part);
  406. } else
  407. err = putused(vrh, &used_ring->ring[off], used, num_used);
  408. if (err) {
  409. vringh_bad("Failed to write %u used entries %u at %p",
  410. num_used, off, &used_ring->ring[off]);
  411. return err;
  412. }
  413. /* Make sure buffer is written before we update index. */
  414. virtio_wmb(vrh->weak_barriers);
  415. err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
  416. if (err) {
  417. vringh_bad("Failed to update used index at %p",
  418. &vrh->vring.used->idx);
  419. return err;
  420. }
  421. vrh->completed += num_used;
  422. return 0;
  423. }
  424. static inline int __vringh_need_notify(struct vringh *vrh,
  425. int (*getu16)(const struct vringh *vrh,
  426. u16 *val,
  427. const __virtio16 *p))
  428. {
  429. bool notify;
  430. u16 used_event;
  431. int err;
  432. /* Flush out used index update. This is paired with the
  433. * barrier that the Guest executes when enabling
  434. * interrupts. */
  435. virtio_mb(vrh->weak_barriers);
  436. /* Old-style, without event indices. */
  437. if (!vrh->event_indices) {
  438. u16 flags;
  439. err = getu16(vrh, &flags, &vrh->vring.avail->flags);
  440. if (err) {
  441. vringh_bad("Failed to get flags at %p",
  442. &vrh->vring.avail->flags);
  443. return err;
  444. }
  445. return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
  446. }
  447. /* Modern: we know when other side wants to know. */
  448. err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
  449. if (err) {
  450. vringh_bad("Failed to get used event idx at %p",
  451. &vring_used_event(&vrh->vring));
  452. return err;
  453. }
  454. /* Just in case we added so many that we wrap. */
  455. if (unlikely(vrh->completed > 0xffff))
  456. notify = true;
  457. else
  458. notify = vring_need_event(used_event,
  459. vrh->last_used_idx + vrh->completed,
  460. vrh->last_used_idx);
  461. vrh->last_used_idx += vrh->completed;
  462. vrh->completed = 0;
  463. return notify;
  464. }
  465. static inline bool __vringh_notify_enable(struct vringh *vrh,
  466. int (*getu16)(const struct vringh *vrh,
  467. u16 *val, const __virtio16 *p),
  468. int (*putu16)(const struct vringh *vrh,
  469. __virtio16 *p, u16 val))
  470. {
  471. u16 avail;
  472. if (!vrh->event_indices) {
  473. /* Old-school; update flags. */
  474. if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
  475. vringh_bad("Clearing used flags %p",
  476. &vrh->vring.used->flags);
  477. return true;
  478. }
  479. } else {
  480. if (putu16(vrh, &vring_avail_event(&vrh->vring),
  481. vrh->last_avail_idx) != 0) {
  482. vringh_bad("Updating avail event index %p",
  483. &vring_avail_event(&vrh->vring));
  484. return true;
  485. }
  486. }
  487. /* They could have slipped one in as we were doing that: make
  488. * sure it's written, then check again. */
  489. virtio_mb(vrh->weak_barriers);
  490. if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
  491. vringh_bad("Failed to check avail idx at %p",
  492. &vrh->vring.avail->idx);
  493. return true;
  494. }
  495. /* This is unlikely, so we just leave notifications enabled
  496. * (if we're using event_indices, we'll only get one
  497. * notification anyway). */
  498. return avail == vrh->last_avail_idx;
  499. }
  500. static inline void __vringh_notify_disable(struct vringh *vrh,
  501. int (*putu16)(const struct vringh *vrh,
  502. __virtio16 *p, u16 val))
  503. {
  504. if (!vrh->event_indices) {
  505. /* Old-school; update flags. */
  506. if (putu16(vrh, &vrh->vring.used->flags,
  507. VRING_USED_F_NO_NOTIFY)) {
  508. vringh_bad("Setting used flags %p",
  509. &vrh->vring.used->flags);
  510. }
  511. }
  512. }
  513. /* Userspace access helpers: in this case, addresses are really userspace. */
  514. static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
  515. {
  516. __virtio16 v = 0;
  517. int rc = get_user(v, (__force __virtio16 __user *)p);
  518. *val = vringh16_to_cpu(vrh, v);
  519. return rc;
  520. }
  521. static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
  522. {
  523. __virtio16 v = cpu_to_vringh16(vrh, val);
  524. return put_user(v, (__force __virtio16 __user *)p);
  525. }
  526. static inline int copydesc_user(const struct vringh *vrh,
  527. void *dst, const void *src, size_t len)
  528. {
  529. return copy_from_user(dst, (__force void __user *)src, len) ?
  530. -EFAULT : 0;
  531. }
  532. static inline int putused_user(const struct vringh *vrh,
  533. struct vring_used_elem *dst,
  534. const struct vring_used_elem *src,
  535. unsigned int num)
  536. {
  537. return copy_to_user((__force void __user *)dst, src,
  538. sizeof(*dst) * num) ? -EFAULT : 0;
  539. }
  540. static inline int xfer_from_user(const struct vringh *vrh, void *src,
  541. void *dst, size_t len)
  542. {
  543. return copy_from_user(dst, (__force void __user *)src, len) ?
  544. -EFAULT : 0;
  545. }
  546. static inline int xfer_to_user(const struct vringh *vrh,
  547. void *dst, void *src, size_t len)
  548. {
  549. return copy_to_user((__force void __user *)dst, src, len) ?
  550. -EFAULT : 0;
  551. }
  552. /**
  553. * vringh_init_user - initialize a vringh for a userspace vring.
  554. * @vrh: the vringh to initialize.
  555. * @features: the feature bits for this ring.
  556. * @num: the number of elements.
  557. * @weak_barriers: true if we only need memory barriers, not I/O.
  558. * @desc: the userpace descriptor pointer.
  559. * @avail: the userpace avail pointer.
  560. * @used: the userpace used pointer.
  561. *
  562. * Returns an error if num is invalid: you should check pointers
  563. * yourself!
  564. */
  565. int vringh_init_user(struct vringh *vrh, u64 features,
  566. unsigned int num, bool weak_barriers,
  567. vring_desc_t __user *desc,
  568. vring_avail_t __user *avail,
  569. vring_used_t __user *used)
  570. {
  571. /* Sane power of 2 please! */
  572. if (!num || num > 0xffff || (num & (num - 1))) {
  573. vringh_bad("Bad ring size %u", num);
  574. return -EINVAL;
  575. }
  576. vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
  577. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  578. vrh->weak_barriers = weak_barriers;
  579. vrh->completed = 0;
  580. vrh->last_avail_idx = 0;
  581. vrh->last_used_idx = 0;
  582. vrh->vring.num = num;
  583. /* vring expects kernel addresses, but only used via accessors. */
  584. vrh->vring.desc = (__force struct vring_desc *)desc;
  585. vrh->vring.avail = (__force struct vring_avail *)avail;
  586. vrh->vring.used = (__force struct vring_used *)used;
  587. return 0;
  588. }
  589. EXPORT_SYMBOL(vringh_init_user);
  590. /**
  591. * vringh_getdesc_user - get next available descriptor from userspace ring.
  592. * @vrh: the userspace vring.
  593. * @riov: where to put the readable descriptors (or NULL)
  594. * @wiov: where to put the writable descriptors (or NULL)
  595. * @getrange: function to call to check ranges.
  596. * @head: head index we received, for passing to vringh_complete_user().
  597. *
  598. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  599. *
  600. * Note that on error return, you can tell the difference between an
  601. * invalid ring and a single invalid descriptor: in the former case,
  602. * *head will be vrh->vring.num. You may be able to ignore an invalid
  603. * descriptor, but there's not much you can do with an invalid ring.
  604. *
  605. * Note that you can reuse riov and wiov with subsequent calls. Content is
  606. * overwritten and memory reallocated if more space is needed.
  607. * When you don't have to use riov and wiov anymore, you should clean up them
  608. * calling vringh_iov_cleanup() to release the memory, even on error!
  609. */
  610. int vringh_getdesc_user(struct vringh *vrh,
  611. struct vringh_iov *riov,
  612. struct vringh_iov *wiov,
  613. bool (*getrange)(struct vringh *vrh,
  614. u64 addr, struct vringh_range *r),
  615. u16 *head)
  616. {
  617. int err;
  618. *head = vrh->vring.num;
  619. err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
  620. if (err < 0)
  621. return err;
  622. /* Empty... */
  623. if (err == vrh->vring.num)
  624. return 0;
  625. /* We need the layouts to be the identical for this to work */
  626. BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
  627. BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
  628. offsetof(struct vringh_iov, iov));
  629. BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
  630. offsetof(struct vringh_iov, i));
  631. BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
  632. offsetof(struct vringh_iov, used));
  633. BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
  634. offsetof(struct vringh_iov, max_num));
  635. BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
  636. BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
  637. offsetof(struct kvec, iov_base));
  638. BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
  639. offsetof(struct kvec, iov_len));
  640. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
  641. != sizeof(((struct kvec *)NULL)->iov_base));
  642. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
  643. != sizeof(((struct kvec *)NULL)->iov_len));
  644. *head = err;
  645. err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
  646. (struct vringh_kiov *)wiov,
  647. range_check, getrange, GFP_KERNEL, copydesc_user);
  648. if (err)
  649. return err;
  650. return 1;
  651. }
  652. EXPORT_SYMBOL(vringh_getdesc_user);
  653. /**
  654. * vringh_iov_pull_user - copy bytes from vring_iov.
  655. * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
  656. * @dst: the place to copy.
  657. * @len: the maximum length to copy.
  658. *
  659. * Returns the bytes copied <= len or a negative errno.
  660. */
  661. ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
  662. {
  663. return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov,
  664. dst, len, xfer_from_user);
  665. }
  666. EXPORT_SYMBOL(vringh_iov_pull_user);
  667. /**
  668. * vringh_iov_push_user - copy bytes into vring_iov.
  669. * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
  670. * @src: the place to copy from.
  671. * @len: the maximum length to copy.
  672. *
  673. * Returns the bytes copied <= len or a negative errno.
  674. */
  675. ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
  676. const void *src, size_t len)
  677. {
  678. return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov,
  679. (void *)src, len, xfer_to_user);
  680. }
  681. EXPORT_SYMBOL(vringh_iov_push_user);
  682. /**
  683. * vringh_abandon_user - we've decided not to handle the descriptor(s).
  684. * @vrh: the vring.
  685. * @num: the number of descriptors to put back (ie. num
  686. * vringh_get_user() to undo).
  687. *
  688. * The next vringh_get_user() will return the old descriptor(s) again.
  689. */
  690. void vringh_abandon_user(struct vringh *vrh, unsigned int num)
  691. {
  692. /* We only update vring_avail_event(vr) when we want to be notified,
  693. * so we haven't changed that yet. */
  694. vrh->last_avail_idx -= num;
  695. }
  696. EXPORT_SYMBOL(vringh_abandon_user);
  697. /**
  698. * vringh_complete_user - we've finished with descriptor, publish it.
  699. * @vrh: the vring.
  700. * @head: the head as filled in by vringh_getdesc_user.
  701. * @len: the length of data we have written.
  702. *
  703. * You should check vringh_need_notify_user() after one or more calls
  704. * to this function.
  705. */
  706. int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
  707. {
  708. struct vring_used_elem used;
  709. used.id = cpu_to_vringh32(vrh, head);
  710. used.len = cpu_to_vringh32(vrh, len);
  711. return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
  712. }
  713. EXPORT_SYMBOL(vringh_complete_user);
  714. /**
  715. * vringh_complete_multi_user - we've finished with many descriptors.
  716. * @vrh: the vring.
  717. * @used: the head, length pairs.
  718. * @num_used: the number of used elements.
  719. *
  720. * You should check vringh_need_notify_user() after one or more calls
  721. * to this function.
  722. */
  723. int vringh_complete_multi_user(struct vringh *vrh,
  724. const struct vring_used_elem used[],
  725. unsigned num_used)
  726. {
  727. return __vringh_complete(vrh, used, num_used,
  728. putu16_user, putused_user);
  729. }
  730. EXPORT_SYMBOL(vringh_complete_multi_user);
  731. /**
  732. * vringh_notify_enable_user - we want to know if something changes.
  733. * @vrh: the vring.
  734. *
  735. * This always enables notifications, but returns false if there are
  736. * now more buffers available in the vring.
  737. */
  738. bool vringh_notify_enable_user(struct vringh *vrh)
  739. {
  740. return __vringh_notify_enable(vrh, getu16_user, putu16_user);
  741. }
  742. EXPORT_SYMBOL(vringh_notify_enable_user);
  743. /**
  744. * vringh_notify_disable_user - don't tell us if something changes.
  745. * @vrh: the vring.
  746. *
  747. * This is our normal running state: we disable and then only enable when
  748. * we're going to sleep.
  749. */
  750. void vringh_notify_disable_user(struct vringh *vrh)
  751. {
  752. __vringh_notify_disable(vrh, putu16_user);
  753. }
  754. EXPORT_SYMBOL(vringh_notify_disable_user);
  755. /**
  756. * vringh_need_notify_user - must we tell the other side about used buffers?
  757. * @vrh: the vring we've called vringh_complete_user() on.
  758. *
  759. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  760. */
  761. int vringh_need_notify_user(struct vringh *vrh)
  762. {
  763. return __vringh_need_notify(vrh, getu16_user);
  764. }
  765. EXPORT_SYMBOL(vringh_need_notify_user);
  766. /* Kernelspace access helpers. */
  767. static inline int getu16_kern(const struct vringh *vrh,
  768. u16 *val, const __virtio16 *p)
  769. {
  770. *val = vringh16_to_cpu(vrh, READ_ONCE(*p));
  771. return 0;
  772. }
  773. static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
  774. {
  775. WRITE_ONCE(*p, cpu_to_vringh16(vrh, val));
  776. return 0;
  777. }
  778. static inline int copydesc_kern(const struct vringh *vrh,
  779. void *dst, const void *src, size_t len)
  780. {
  781. memcpy(dst, src, len);
  782. return 0;
  783. }
  784. static inline int putused_kern(const struct vringh *vrh,
  785. struct vring_used_elem *dst,
  786. const struct vring_used_elem *src,
  787. unsigned int num)
  788. {
  789. memcpy(dst, src, num * sizeof(*dst));
  790. return 0;
  791. }
  792. static inline int xfer_kern(const struct vringh *vrh, void *src,
  793. void *dst, size_t len)
  794. {
  795. memcpy(dst, src, len);
  796. return 0;
  797. }
  798. static inline int kern_xfer(const struct vringh *vrh, void *dst,
  799. void *src, size_t len)
  800. {
  801. memcpy(dst, src, len);
  802. return 0;
  803. }
  804. /**
  805. * vringh_init_kern - initialize a vringh for a kernelspace vring.
  806. * @vrh: the vringh to initialize.
  807. * @features: the feature bits for this ring.
  808. * @num: the number of elements.
  809. * @weak_barriers: true if we only need memory barriers, not I/O.
  810. * @desc: the userpace descriptor pointer.
  811. * @avail: the userpace avail pointer.
  812. * @used: the userpace used pointer.
  813. *
  814. * Returns an error if num is invalid.
  815. */
  816. int vringh_init_kern(struct vringh *vrh, u64 features,
  817. unsigned int num, bool weak_barriers,
  818. struct vring_desc *desc,
  819. struct vring_avail *avail,
  820. struct vring_used *used)
  821. {
  822. /* Sane power of 2 please! */
  823. if (!num || num > 0xffff || (num & (num - 1))) {
  824. vringh_bad("Bad ring size %u", num);
  825. return -EINVAL;
  826. }
  827. vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
  828. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  829. vrh->weak_barriers = weak_barriers;
  830. vrh->completed = 0;
  831. vrh->last_avail_idx = 0;
  832. vrh->last_used_idx = 0;
  833. vrh->vring.num = num;
  834. vrh->vring.desc = desc;
  835. vrh->vring.avail = avail;
  836. vrh->vring.used = used;
  837. return 0;
  838. }
  839. EXPORT_SYMBOL(vringh_init_kern);
  840. /**
  841. * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
  842. * @vrh: the kernelspace vring.
  843. * @riov: where to put the readable descriptors (or NULL)
  844. * @wiov: where to put the writable descriptors (or NULL)
  845. * @head: head index we received, for passing to vringh_complete_kern().
  846. * @gfp: flags for allocating larger riov/wiov.
  847. *
  848. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  849. *
  850. * Note that on error return, you can tell the difference between an
  851. * invalid ring and a single invalid descriptor: in the former case,
  852. * *head will be vrh->vring.num. You may be able to ignore an invalid
  853. * descriptor, but there's not much you can do with an invalid ring.
  854. *
  855. * Note that you can reuse riov and wiov with subsequent calls. Content is
  856. * overwritten and memory reallocated if more space is needed.
  857. * When you don't have to use riov and wiov anymore, you should clean up them
  858. * calling vringh_kiov_cleanup() to release the memory, even on error!
  859. */
  860. int vringh_getdesc_kern(struct vringh *vrh,
  861. struct vringh_kiov *riov,
  862. struct vringh_kiov *wiov,
  863. u16 *head,
  864. gfp_t gfp)
  865. {
  866. int err;
  867. err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
  868. if (err < 0)
  869. return err;
  870. /* Empty... */
  871. if (err == vrh->vring.num)
  872. return 0;
  873. *head = err;
  874. err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
  875. gfp, copydesc_kern);
  876. if (err)
  877. return err;
  878. return 1;
  879. }
  880. EXPORT_SYMBOL(vringh_getdesc_kern);
  881. /**
  882. * vringh_iov_pull_kern - copy bytes from vring_iov.
  883. * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
  884. * @dst: the place to copy.
  885. * @len: the maximum length to copy.
  886. *
  887. * Returns the bytes copied <= len or a negative errno.
  888. */
  889. ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
  890. {
  891. return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern);
  892. }
  893. EXPORT_SYMBOL(vringh_iov_pull_kern);
  894. /**
  895. * vringh_iov_push_kern - copy bytes into vring_iov.
  896. * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
  897. * @src: the place to copy from.
  898. * @len: the maximum length to copy.
  899. *
  900. * Returns the bytes copied <= len or a negative errno.
  901. */
  902. ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
  903. const void *src, size_t len)
  904. {
  905. return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer);
  906. }
  907. EXPORT_SYMBOL(vringh_iov_push_kern);
  908. /**
  909. * vringh_abandon_kern - we've decided not to handle the descriptor(s).
  910. * @vrh: the vring.
  911. * @num: the number of descriptors to put back (ie. num
  912. * vringh_get_kern() to undo).
  913. *
  914. * The next vringh_get_kern() will return the old descriptor(s) again.
  915. */
  916. void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
  917. {
  918. /* We only update vring_avail_event(vr) when we want to be notified,
  919. * so we haven't changed that yet. */
  920. vrh->last_avail_idx -= num;
  921. }
  922. EXPORT_SYMBOL(vringh_abandon_kern);
  923. /**
  924. * vringh_complete_kern - we've finished with descriptor, publish it.
  925. * @vrh: the vring.
  926. * @head: the head as filled in by vringh_getdesc_kern.
  927. * @len: the length of data we have written.
  928. *
  929. * You should check vringh_need_notify_kern() after one or more calls
  930. * to this function.
  931. */
  932. int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
  933. {
  934. struct vring_used_elem used;
  935. used.id = cpu_to_vringh32(vrh, head);
  936. used.len = cpu_to_vringh32(vrh, len);
  937. return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
  938. }
  939. EXPORT_SYMBOL(vringh_complete_kern);
  940. /**
  941. * vringh_notify_enable_kern - we want to know if something changes.
  942. * @vrh: the vring.
  943. *
  944. * This always enables notifications, but returns false if there are
  945. * now more buffers available in the vring.
  946. */
  947. bool vringh_notify_enable_kern(struct vringh *vrh)
  948. {
  949. return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
  950. }
  951. EXPORT_SYMBOL(vringh_notify_enable_kern);
  952. /**
  953. * vringh_notify_disable_kern - don't tell us if something changes.
  954. * @vrh: the vring.
  955. *
  956. * This is our normal running state: we disable and then only enable when
  957. * we're going to sleep.
  958. */
  959. void vringh_notify_disable_kern(struct vringh *vrh)
  960. {
  961. __vringh_notify_disable(vrh, putu16_kern);
  962. }
  963. EXPORT_SYMBOL(vringh_notify_disable_kern);
  964. /**
  965. * vringh_need_notify_kern - must we tell the other side about used buffers?
  966. * @vrh: the vring we've called vringh_complete_kern() on.
  967. *
  968. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  969. */
  970. int vringh_need_notify_kern(struct vringh *vrh)
  971. {
  972. return __vringh_need_notify(vrh, getu16_kern);
  973. }
  974. EXPORT_SYMBOL(vringh_need_notify_kern);
  975. #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
  976. static int iotlb_translate(const struct vringh *vrh,
  977. u64 addr, u64 len, u64 *translated,
  978. struct bio_vec iov[],
  979. int iov_size, u32 perm)
  980. {
  981. struct vhost_iotlb_map *map;
  982. struct vhost_iotlb *iotlb = vrh->iotlb;
  983. int ret = 0;
  984. u64 s = 0, last = addr + len - 1;
  985. spin_lock(vrh->iotlb_lock);
  986. while (len > s) {
  987. u64 size, pa, pfn;
  988. if (unlikely(ret >= iov_size)) {
  989. ret = -ENOBUFS;
  990. break;
  991. }
  992. map = vhost_iotlb_itree_first(iotlb, addr, last);
  993. if (!map || map->start > addr) {
  994. ret = -EINVAL;
  995. break;
  996. } else if (!(map->perm & perm)) {
  997. ret = -EPERM;
  998. break;
  999. }
  1000. size = map->size - addr + map->start;
  1001. pa = map->addr + addr - map->start;
  1002. pfn = pa >> PAGE_SHIFT;
  1003. iov[ret].bv_page = pfn_to_page(pfn);
  1004. iov[ret].bv_len = min(len - s, size);
  1005. iov[ret].bv_offset = pa & (PAGE_SIZE - 1);
  1006. s += size;
  1007. addr += size;
  1008. ++ret;
  1009. }
  1010. spin_unlock(vrh->iotlb_lock);
  1011. if (translated)
  1012. *translated = min(len, s);
  1013. return ret;
  1014. }
  1015. static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
  1016. void *src, size_t len)
  1017. {
  1018. u64 total_translated = 0;
  1019. while (total_translated < len) {
  1020. struct bio_vec iov[16];
  1021. struct iov_iter iter;
  1022. u64 translated;
  1023. int ret;
  1024. ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
  1025. len - total_translated, &translated,
  1026. iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
  1027. if (ret == -ENOBUFS)
  1028. ret = ARRAY_SIZE(iov);
  1029. else if (ret < 0)
  1030. return ret;
  1031. iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
  1032. ret = copy_from_iter(dst, translated, &iter);
  1033. if (ret < 0)
  1034. return ret;
  1035. src += translated;
  1036. dst += translated;
  1037. total_translated += translated;
  1038. }
  1039. return total_translated;
  1040. }
  1041. static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
  1042. void *src, size_t len)
  1043. {
  1044. u64 total_translated = 0;
  1045. while (total_translated < len) {
  1046. struct bio_vec iov[16];
  1047. struct iov_iter iter;
  1048. u64 translated;
  1049. int ret;
  1050. ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
  1051. len - total_translated, &translated,
  1052. iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
  1053. if (ret == -ENOBUFS)
  1054. ret = ARRAY_SIZE(iov);
  1055. else if (ret < 0)
  1056. return ret;
  1057. iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
  1058. ret = copy_to_iter(src, translated, &iter);
  1059. if (ret < 0)
  1060. return ret;
  1061. src += translated;
  1062. dst += translated;
  1063. total_translated += translated;
  1064. }
  1065. return total_translated;
  1066. }
  1067. static inline int getu16_iotlb(const struct vringh *vrh,
  1068. u16 *val, const __virtio16 *p)
  1069. {
  1070. struct bio_vec iov;
  1071. void *kaddr, *from;
  1072. int ret;
  1073. /* Atomic read is needed for getu16 */
  1074. ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
  1075. &iov, 1, VHOST_MAP_RO);
  1076. if (ret < 0)
  1077. return ret;
  1078. kaddr = kmap_atomic(iov.bv_page);
  1079. from = kaddr + iov.bv_offset;
  1080. *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
  1081. kunmap_atomic(kaddr);
  1082. return 0;
  1083. }
  1084. static inline int putu16_iotlb(const struct vringh *vrh,
  1085. __virtio16 *p, u16 val)
  1086. {
  1087. struct bio_vec iov;
  1088. void *kaddr, *to;
  1089. int ret;
  1090. /* Atomic write is needed for putu16 */
  1091. ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
  1092. &iov, 1, VHOST_MAP_WO);
  1093. if (ret < 0)
  1094. return ret;
  1095. kaddr = kmap_atomic(iov.bv_page);
  1096. to = kaddr + iov.bv_offset;
  1097. WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
  1098. kunmap_atomic(kaddr);
  1099. return 0;
  1100. }
  1101. static inline int copydesc_iotlb(const struct vringh *vrh,
  1102. void *dst, const void *src, size_t len)
  1103. {
  1104. int ret;
  1105. ret = copy_from_iotlb(vrh, dst, (void *)src, len);
  1106. if (ret != len)
  1107. return -EFAULT;
  1108. return 0;
  1109. }
  1110. static inline int xfer_from_iotlb(const struct vringh *vrh, void *src,
  1111. void *dst, size_t len)
  1112. {
  1113. int ret;
  1114. ret = copy_from_iotlb(vrh, dst, src, len);
  1115. if (ret != len)
  1116. return -EFAULT;
  1117. return 0;
  1118. }
  1119. static inline int xfer_to_iotlb(const struct vringh *vrh,
  1120. void *dst, void *src, size_t len)
  1121. {
  1122. int ret;
  1123. ret = copy_to_iotlb(vrh, dst, src, len);
  1124. if (ret != len)
  1125. return -EFAULT;
  1126. return 0;
  1127. }
  1128. static inline int putused_iotlb(const struct vringh *vrh,
  1129. struct vring_used_elem *dst,
  1130. const struct vring_used_elem *src,
  1131. unsigned int num)
  1132. {
  1133. int size = num * sizeof(*dst);
  1134. int ret;
  1135. ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst));
  1136. if (ret != size)
  1137. return -EFAULT;
  1138. return 0;
  1139. }
  1140. /**
  1141. * vringh_init_iotlb - initialize a vringh for a ring with IOTLB.
  1142. * @vrh: the vringh to initialize.
  1143. * @features: the feature bits for this ring.
  1144. * @num: the number of elements.
  1145. * @weak_barriers: true if we only need memory barriers, not I/O.
  1146. * @desc: the userpace descriptor pointer.
  1147. * @avail: the userpace avail pointer.
  1148. * @used: the userpace used pointer.
  1149. *
  1150. * Returns an error if num is invalid.
  1151. */
  1152. int vringh_init_iotlb(struct vringh *vrh, u64 features,
  1153. unsigned int num, bool weak_barriers,
  1154. struct vring_desc *desc,
  1155. struct vring_avail *avail,
  1156. struct vring_used *used)
  1157. {
  1158. return vringh_init_kern(vrh, features, num, weak_barriers,
  1159. desc, avail, used);
  1160. }
  1161. EXPORT_SYMBOL(vringh_init_iotlb);
  1162. /**
  1163. * vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
  1164. * @vrh: the vring
  1165. * @iotlb: iotlb associated with this vring
  1166. * @iotlb_lock: spinlock to synchronize the iotlb accesses
  1167. */
  1168. void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
  1169. spinlock_t *iotlb_lock)
  1170. {
  1171. vrh->iotlb = iotlb;
  1172. vrh->iotlb_lock = iotlb_lock;
  1173. }
  1174. EXPORT_SYMBOL(vringh_set_iotlb);
  1175. /**
  1176. * vringh_getdesc_iotlb - get next available descriptor from ring with
  1177. * IOTLB.
  1178. * @vrh: the kernelspace vring.
  1179. * @riov: where to put the readable descriptors (or NULL)
  1180. * @wiov: where to put the writable descriptors (or NULL)
  1181. * @head: head index we received, for passing to vringh_complete_iotlb().
  1182. * @gfp: flags for allocating larger riov/wiov.
  1183. *
  1184. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  1185. *
  1186. * Note that on error return, you can tell the difference between an
  1187. * invalid ring and a single invalid descriptor: in the former case,
  1188. * *head will be vrh->vring.num. You may be able to ignore an invalid
  1189. * descriptor, but there's not much you can do with an invalid ring.
  1190. *
  1191. * Note that you can reuse riov and wiov with subsequent calls. Content is
  1192. * overwritten and memory reallocated if more space is needed.
  1193. * When you don't have to use riov and wiov anymore, you should clean up them
  1194. * calling vringh_kiov_cleanup() to release the memory, even on error!
  1195. */
  1196. int vringh_getdesc_iotlb(struct vringh *vrh,
  1197. struct vringh_kiov *riov,
  1198. struct vringh_kiov *wiov,
  1199. u16 *head,
  1200. gfp_t gfp)
  1201. {
  1202. int err;
  1203. err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx);
  1204. if (err < 0)
  1205. return err;
  1206. /* Empty... */
  1207. if (err == vrh->vring.num)
  1208. return 0;
  1209. *head = err;
  1210. err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
  1211. gfp, copydesc_iotlb);
  1212. if (err)
  1213. return err;
  1214. return 1;
  1215. }
  1216. EXPORT_SYMBOL(vringh_getdesc_iotlb);
  1217. /**
  1218. * vringh_iov_pull_iotlb - copy bytes from vring_iov.
  1219. * @vrh: the vring.
  1220. * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume)
  1221. * @dst: the place to copy.
  1222. * @len: the maximum length to copy.
  1223. *
  1224. * Returns the bytes copied <= len or a negative errno.
  1225. */
  1226. ssize_t vringh_iov_pull_iotlb(struct vringh *vrh,
  1227. struct vringh_kiov *riov,
  1228. void *dst, size_t len)
  1229. {
  1230. return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb);
  1231. }
  1232. EXPORT_SYMBOL(vringh_iov_pull_iotlb);
  1233. /**
  1234. * vringh_iov_push_iotlb - copy bytes into vring_iov.
  1235. * @vrh: the vring.
  1236. * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume)
  1237. * @src: the place to copy from.
  1238. * @len: the maximum length to copy.
  1239. *
  1240. * Returns the bytes copied <= len or a negative errno.
  1241. */
  1242. ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
  1243. struct vringh_kiov *wiov,
  1244. const void *src, size_t len)
  1245. {
  1246. return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb);
  1247. }
  1248. EXPORT_SYMBOL(vringh_iov_push_iotlb);
  1249. /**
  1250. * vringh_abandon_iotlb - we've decided not to handle the descriptor(s).
  1251. * @vrh: the vring.
  1252. * @num: the number of descriptors to put back (ie. num
  1253. * vringh_get_iotlb() to undo).
  1254. *
  1255. * The next vringh_get_iotlb() will return the old descriptor(s) again.
  1256. */
  1257. void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num)
  1258. {
  1259. /* We only update vring_avail_event(vr) when we want to be notified,
  1260. * so we haven't changed that yet.
  1261. */
  1262. vrh->last_avail_idx -= num;
  1263. }
  1264. EXPORT_SYMBOL(vringh_abandon_iotlb);
  1265. /**
  1266. * vringh_complete_iotlb - we've finished with descriptor, publish it.
  1267. * @vrh: the vring.
  1268. * @head: the head as filled in by vringh_getdesc_iotlb.
  1269. * @len: the length of data we have written.
  1270. *
  1271. * You should check vringh_need_notify_iotlb() after one or more calls
  1272. * to this function.
  1273. */
  1274. int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len)
  1275. {
  1276. struct vring_used_elem used;
  1277. used.id = cpu_to_vringh32(vrh, head);
  1278. used.len = cpu_to_vringh32(vrh, len);
  1279. return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb);
  1280. }
  1281. EXPORT_SYMBOL(vringh_complete_iotlb);
  1282. /**
  1283. * vringh_notify_enable_iotlb - we want to know if something changes.
  1284. * @vrh: the vring.
  1285. *
  1286. * This always enables notifications, but returns false if there are
  1287. * now more buffers available in the vring.
  1288. */
  1289. bool vringh_notify_enable_iotlb(struct vringh *vrh)
  1290. {
  1291. return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb);
  1292. }
  1293. EXPORT_SYMBOL(vringh_notify_enable_iotlb);
  1294. /**
  1295. * vringh_notify_disable_iotlb - don't tell us if something changes.
  1296. * @vrh: the vring.
  1297. *
  1298. * This is our normal running state: we disable and then only enable when
  1299. * we're going to sleep.
  1300. */
  1301. void vringh_notify_disable_iotlb(struct vringh *vrh)
  1302. {
  1303. __vringh_notify_disable(vrh, putu16_iotlb);
  1304. }
  1305. EXPORT_SYMBOL(vringh_notify_disable_iotlb);
  1306. /**
  1307. * vringh_need_notify_iotlb - must we tell the other side about used buffers?
  1308. * @vrh: the vring we've called vringh_complete_iotlb() on.
  1309. *
  1310. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  1311. */
  1312. int vringh_need_notify_iotlb(struct vringh *vrh)
  1313. {
  1314. return __vringh_need_notify(vrh, getu16_iotlb);
  1315. }
  1316. EXPORT_SYMBOL(vringh_need_notify_iotlb);
  1317. #endif
  1318. MODULE_LICENSE("GPL");