iova_domain.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * MMU-based software IOTLB.
  4. *
  5. * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
  6. *
  7. * Author: Xie Yongji <[email protected]>
  8. *
  9. */
  10. #include <linux/slab.h>
  11. #include <linux/file.h>
  12. #include <linux/anon_inodes.h>
  13. #include <linux/highmem.h>
  14. #include <linux/vmalloc.h>
  15. #include <linux/vdpa.h>
  16. #include "iova_domain.h"
  17. static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
  18. u64 start, u64 last,
  19. u64 addr, unsigned int perm,
  20. struct file *file, u64 offset)
  21. {
  22. struct vdpa_map_file *map_file;
  23. int ret;
  24. map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
  25. if (!map_file)
  26. return -ENOMEM;
  27. map_file->file = get_file(file);
  28. map_file->offset = offset;
  29. ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
  30. addr, perm, map_file);
  31. if (ret) {
  32. fput(map_file->file);
  33. kfree(map_file);
  34. return ret;
  35. }
  36. return 0;
  37. }
  38. static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
  39. u64 start, u64 last)
  40. {
  41. struct vdpa_map_file *map_file;
  42. struct vhost_iotlb_map *map;
  43. while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
  44. map_file = (struct vdpa_map_file *)map->opaque;
  45. fput(map_file->file);
  46. kfree(map_file);
  47. vhost_iotlb_map_free(domain->iotlb, map);
  48. }
  49. }
  50. int vduse_domain_set_map(struct vduse_iova_domain *domain,
  51. struct vhost_iotlb *iotlb)
  52. {
  53. struct vdpa_map_file *map_file;
  54. struct vhost_iotlb_map *map;
  55. u64 start = 0ULL, last = ULLONG_MAX;
  56. int ret;
  57. spin_lock(&domain->iotlb_lock);
  58. vduse_iotlb_del_range(domain, start, last);
  59. for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
  60. map = vhost_iotlb_itree_next(map, start, last)) {
  61. map_file = (struct vdpa_map_file *)map->opaque;
  62. ret = vduse_iotlb_add_range(domain, map->start, map->last,
  63. map->addr, map->perm,
  64. map_file->file,
  65. map_file->offset);
  66. if (ret)
  67. goto err;
  68. }
  69. spin_unlock(&domain->iotlb_lock);
  70. return 0;
  71. err:
  72. vduse_iotlb_del_range(domain, start, last);
  73. spin_unlock(&domain->iotlb_lock);
  74. return ret;
  75. }
  76. void vduse_domain_clear_map(struct vduse_iova_domain *domain,
  77. struct vhost_iotlb *iotlb)
  78. {
  79. struct vhost_iotlb_map *map;
  80. u64 start = 0ULL, last = ULLONG_MAX;
  81. spin_lock(&domain->iotlb_lock);
  82. for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
  83. map = vhost_iotlb_itree_next(map, start, last)) {
  84. vduse_iotlb_del_range(domain, map->start, map->last);
  85. }
  86. spin_unlock(&domain->iotlb_lock);
  87. }
  88. static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
  89. u64 iova, u64 size, u64 paddr)
  90. {
  91. struct vduse_bounce_map *map;
  92. u64 last = iova + size - 1;
  93. while (iova <= last) {
  94. map = &domain->bounce_maps[iova >> PAGE_SHIFT];
  95. if (!map->bounce_page) {
  96. map->bounce_page = alloc_page(GFP_ATOMIC);
  97. if (!map->bounce_page)
  98. return -ENOMEM;
  99. }
  100. map->orig_phys = paddr;
  101. paddr += PAGE_SIZE;
  102. iova += PAGE_SIZE;
  103. }
  104. return 0;
  105. }
  106. static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
  107. u64 iova, u64 size)
  108. {
  109. struct vduse_bounce_map *map;
  110. u64 last = iova + size - 1;
  111. while (iova <= last) {
  112. map = &domain->bounce_maps[iova >> PAGE_SHIFT];
  113. map->orig_phys = INVALID_PHYS_ADDR;
  114. iova += PAGE_SIZE;
  115. }
  116. }
  117. static void do_bounce(phys_addr_t orig, void *addr, size_t size,
  118. enum dma_data_direction dir)
  119. {
  120. unsigned long pfn = PFN_DOWN(orig);
  121. unsigned int offset = offset_in_page(orig);
  122. struct page *page;
  123. unsigned int sz = 0;
  124. while (size) {
  125. sz = min_t(size_t, PAGE_SIZE - offset, size);
  126. page = pfn_to_page(pfn);
  127. if (dir == DMA_TO_DEVICE)
  128. memcpy_from_page(addr, page, offset, sz);
  129. else
  130. memcpy_to_page(page, offset, addr, sz);
  131. size -= sz;
  132. pfn++;
  133. addr += sz;
  134. offset = 0;
  135. }
  136. }
  137. static void vduse_domain_bounce(struct vduse_iova_domain *domain,
  138. dma_addr_t iova, size_t size,
  139. enum dma_data_direction dir)
  140. {
  141. struct vduse_bounce_map *map;
  142. unsigned int offset;
  143. void *addr;
  144. size_t sz;
  145. if (iova >= domain->bounce_size)
  146. return;
  147. while (size) {
  148. map = &domain->bounce_maps[iova >> PAGE_SHIFT];
  149. offset = offset_in_page(iova);
  150. sz = min_t(size_t, PAGE_SIZE - offset, size);
  151. if (WARN_ON(!map->bounce_page ||
  152. map->orig_phys == INVALID_PHYS_ADDR))
  153. return;
  154. addr = kmap_local_page(map->bounce_page);
  155. do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
  156. kunmap_local(addr);
  157. size -= sz;
  158. iova += sz;
  159. }
  160. }
  161. static struct page *
  162. vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
  163. {
  164. u64 start = iova & PAGE_MASK;
  165. u64 last = start + PAGE_SIZE - 1;
  166. struct vhost_iotlb_map *map;
  167. struct page *page = NULL;
  168. spin_lock(&domain->iotlb_lock);
  169. map = vhost_iotlb_itree_first(domain->iotlb, start, last);
  170. if (!map)
  171. goto out;
  172. page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
  173. get_page(page);
  174. out:
  175. spin_unlock(&domain->iotlb_lock);
  176. return page;
  177. }
  178. static struct page *
  179. vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
  180. {
  181. struct vduse_bounce_map *map;
  182. struct page *page = NULL;
  183. read_lock(&domain->bounce_lock);
  184. map = &domain->bounce_maps[iova >> PAGE_SHIFT];
  185. if (domain->user_bounce_pages || !map->bounce_page)
  186. goto out;
  187. page = map->bounce_page;
  188. get_page(page);
  189. out:
  190. read_unlock(&domain->bounce_lock);
  191. return page;
  192. }
  193. static void
  194. vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
  195. {
  196. struct vduse_bounce_map *map;
  197. unsigned long pfn, bounce_pfns;
  198. bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
  199. for (pfn = 0; pfn < bounce_pfns; pfn++) {
  200. map = &domain->bounce_maps[pfn];
  201. if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
  202. continue;
  203. if (!map->bounce_page)
  204. continue;
  205. __free_page(map->bounce_page);
  206. map->bounce_page = NULL;
  207. }
  208. }
  209. int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
  210. struct page **pages, int count)
  211. {
  212. struct vduse_bounce_map *map;
  213. int i, ret;
  214. /* Now we don't support partial mapping */
  215. if (count != (domain->bounce_size >> PAGE_SHIFT))
  216. return -EINVAL;
  217. write_lock(&domain->bounce_lock);
  218. ret = -EEXIST;
  219. if (domain->user_bounce_pages)
  220. goto out;
  221. for (i = 0; i < count; i++) {
  222. map = &domain->bounce_maps[i];
  223. if (map->bounce_page) {
  224. /* Copy kernel page to user page if it's in use */
  225. if (map->orig_phys != INVALID_PHYS_ADDR)
  226. memcpy_to_page(pages[i], 0,
  227. page_address(map->bounce_page),
  228. PAGE_SIZE);
  229. __free_page(map->bounce_page);
  230. }
  231. map->bounce_page = pages[i];
  232. get_page(pages[i]);
  233. }
  234. domain->user_bounce_pages = true;
  235. ret = 0;
  236. out:
  237. write_unlock(&domain->bounce_lock);
  238. return ret;
  239. }
  240. void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
  241. {
  242. struct vduse_bounce_map *map;
  243. unsigned long i, count;
  244. write_lock(&domain->bounce_lock);
  245. if (!domain->user_bounce_pages)
  246. goto out;
  247. count = domain->bounce_size >> PAGE_SHIFT;
  248. for (i = 0; i < count; i++) {
  249. struct page *page = NULL;
  250. map = &domain->bounce_maps[i];
  251. if (WARN_ON(!map->bounce_page))
  252. continue;
  253. /* Copy user page to kernel page if it's in use */
  254. if (map->orig_phys != INVALID_PHYS_ADDR) {
  255. page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
  256. memcpy_from_page(page_address(page),
  257. map->bounce_page, 0, PAGE_SIZE);
  258. }
  259. put_page(map->bounce_page);
  260. map->bounce_page = page;
  261. }
  262. domain->user_bounce_pages = false;
  263. out:
  264. write_unlock(&domain->bounce_lock);
  265. }
  266. void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
  267. {
  268. if (!domain->bounce_map)
  269. return;
  270. spin_lock(&domain->iotlb_lock);
  271. if (!domain->bounce_map)
  272. goto unlock;
  273. vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
  274. domain->bounce_map = 0;
  275. unlock:
  276. spin_unlock(&domain->iotlb_lock);
  277. }
  278. static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
  279. {
  280. int ret = 0;
  281. if (domain->bounce_map)
  282. return 0;
  283. spin_lock(&domain->iotlb_lock);
  284. if (domain->bounce_map)
  285. goto unlock;
  286. ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
  287. 0, VHOST_MAP_RW, domain->file, 0);
  288. if (ret)
  289. goto unlock;
  290. domain->bounce_map = 1;
  291. unlock:
  292. spin_unlock(&domain->iotlb_lock);
  293. return ret;
  294. }
  295. static dma_addr_t
  296. vduse_domain_alloc_iova(struct iova_domain *iovad,
  297. unsigned long size, unsigned long limit)
  298. {
  299. unsigned long shift = iova_shift(iovad);
  300. unsigned long iova_len = iova_align(iovad, size) >> shift;
  301. unsigned long iova_pfn;
  302. iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
  303. return (dma_addr_t)iova_pfn << shift;
  304. }
  305. static void vduse_domain_free_iova(struct iova_domain *iovad,
  306. dma_addr_t iova, size_t size)
  307. {
  308. unsigned long shift = iova_shift(iovad);
  309. unsigned long iova_len = iova_align(iovad, size) >> shift;
  310. free_iova_fast(iovad, iova >> shift, iova_len);
  311. }
  312. dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
  313. struct page *page, unsigned long offset,
  314. size_t size, enum dma_data_direction dir,
  315. unsigned long attrs)
  316. {
  317. struct iova_domain *iovad = &domain->stream_iovad;
  318. unsigned long limit = domain->bounce_size - 1;
  319. phys_addr_t pa = page_to_phys(page) + offset;
  320. dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
  321. if (!iova)
  322. return DMA_MAPPING_ERROR;
  323. if (vduse_domain_init_bounce_map(domain))
  324. goto err;
  325. read_lock(&domain->bounce_lock);
  326. if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
  327. goto err_unlock;
  328. if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
  329. vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
  330. read_unlock(&domain->bounce_lock);
  331. return iova;
  332. err_unlock:
  333. read_unlock(&domain->bounce_lock);
  334. err:
  335. vduse_domain_free_iova(iovad, iova, size);
  336. return DMA_MAPPING_ERROR;
  337. }
  338. void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
  339. dma_addr_t dma_addr, size_t size,
  340. enum dma_data_direction dir, unsigned long attrs)
  341. {
  342. struct iova_domain *iovad = &domain->stream_iovad;
  343. read_lock(&domain->bounce_lock);
  344. if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
  345. vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
  346. vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
  347. read_unlock(&domain->bounce_lock);
  348. vduse_domain_free_iova(iovad, dma_addr, size);
  349. }
  350. void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
  351. size_t size, dma_addr_t *dma_addr,
  352. gfp_t flag, unsigned long attrs)
  353. {
  354. struct iova_domain *iovad = &domain->consistent_iovad;
  355. unsigned long limit = domain->iova_limit;
  356. dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
  357. void *orig = alloc_pages_exact(size, flag);
  358. if (!iova || !orig)
  359. goto err;
  360. spin_lock(&domain->iotlb_lock);
  361. if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
  362. virt_to_phys(orig), VHOST_MAP_RW,
  363. domain->file, (u64)iova)) {
  364. spin_unlock(&domain->iotlb_lock);
  365. goto err;
  366. }
  367. spin_unlock(&domain->iotlb_lock);
  368. *dma_addr = iova;
  369. return orig;
  370. err:
  371. *dma_addr = DMA_MAPPING_ERROR;
  372. if (orig)
  373. free_pages_exact(orig, size);
  374. if (iova)
  375. vduse_domain_free_iova(iovad, iova, size);
  376. return NULL;
  377. }
  378. void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
  379. void *vaddr, dma_addr_t dma_addr,
  380. unsigned long attrs)
  381. {
  382. struct iova_domain *iovad = &domain->consistent_iovad;
  383. struct vhost_iotlb_map *map;
  384. struct vdpa_map_file *map_file;
  385. phys_addr_t pa;
  386. spin_lock(&domain->iotlb_lock);
  387. map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
  388. (u64)dma_addr + size - 1);
  389. if (WARN_ON(!map)) {
  390. spin_unlock(&domain->iotlb_lock);
  391. return;
  392. }
  393. map_file = (struct vdpa_map_file *)map->opaque;
  394. fput(map_file->file);
  395. kfree(map_file);
  396. pa = map->addr;
  397. vhost_iotlb_map_free(domain->iotlb, map);
  398. spin_unlock(&domain->iotlb_lock);
  399. vduse_domain_free_iova(iovad, dma_addr, size);
  400. free_pages_exact(phys_to_virt(pa), size);
  401. }
  402. static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
  403. {
  404. struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
  405. unsigned long iova = vmf->pgoff << PAGE_SHIFT;
  406. struct page *page;
  407. if (!domain)
  408. return VM_FAULT_SIGBUS;
  409. if (iova < domain->bounce_size)
  410. page = vduse_domain_get_bounce_page(domain, iova);
  411. else
  412. page = vduse_domain_get_coherent_page(domain, iova);
  413. if (!page)
  414. return VM_FAULT_SIGBUS;
  415. vmf->page = page;
  416. return 0;
  417. }
  418. static const struct vm_operations_struct vduse_domain_mmap_ops = {
  419. .fault = vduse_domain_mmap_fault,
  420. };
  421. static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
  422. {
  423. struct vduse_iova_domain *domain = file->private_data;
  424. vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND);
  425. vma->vm_private_data = domain;
  426. vma->vm_ops = &vduse_domain_mmap_ops;
  427. return 0;
  428. }
  429. static int vduse_domain_release(struct inode *inode, struct file *file)
  430. {
  431. struct vduse_iova_domain *domain = file->private_data;
  432. spin_lock(&domain->iotlb_lock);
  433. vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
  434. vduse_domain_remove_user_bounce_pages(domain);
  435. vduse_domain_free_kernel_bounce_pages(domain);
  436. spin_unlock(&domain->iotlb_lock);
  437. put_iova_domain(&domain->stream_iovad);
  438. put_iova_domain(&domain->consistent_iovad);
  439. vhost_iotlb_free(domain->iotlb);
  440. vfree(domain->bounce_maps);
  441. kfree(domain);
  442. return 0;
  443. }
  444. static const struct file_operations vduse_domain_fops = {
  445. .owner = THIS_MODULE,
  446. .mmap = vduse_domain_mmap,
  447. .release = vduse_domain_release,
  448. };
  449. void vduse_domain_destroy(struct vduse_iova_domain *domain)
  450. {
  451. fput(domain->file);
  452. }
  453. struct vduse_iova_domain *
  454. vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
  455. {
  456. struct vduse_iova_domain *domain;
  457. struct file *file;
  458. struct vduse_bounce_map *map;
  459. unsigned long pfn, bounce_pfns;
  460. int ret;
  461. bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
  462. if (iova_limit <= bounce_size)
  463. return NULL;
  464. domain = kzalloc(sizeof(*domain), GFP_KERNEL);
  465. if (!domain)
  466. return NULL;
  467. domain->iotlb = vhost_iotlb_alloc(0, 0);
  468. if (!domain->iotlb)
  469. goto err_iotlb;
  470. domain->iova_limit = iova_limit;
  471. domain->bounce_size = PAGE_ALIGN(bounce_size);
  472. domain->bounce_maps = vzalloc(bounce_pfns *
  473. sizeof(struct vduse_bounce_map));
  474. if (!domain->bounce_maps)
  475. goto err_map;
  476. for (pfn = 0; pfn < bounce_pfns; pfn++) {
  477. map = &domain->bounce_maps[pfn];
  478. map->orig_phys = INVALID_PHYS_ADDR;
  479. }
  480. file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
  481. domain, O_RDWR);
  482. if (IS_ERR(file))
  483. goto err_file;
  484. domain->file = file;
  485. rwlock_init(&domain->bounce_lock);
  486. spin_lock_init(&domain->iotlb_lock);
  487. init_iova_domain(&domain->stream_iovad,
  488. PAGE_SIZE, IOVA_START_PFN);
  489. ret = iova_domain_init_rcaches(&domain->stream_iovad);
  490. if (ret)
  491. goto err_iovad_stream;
  492. init_iova_domain(&domain->consistent_iovad,
  493. PAGE_SIZE, bounce_pfns);
  494. ret = iova_domain_init_rcaches(&domain->consistent_iovad);
  495. if (ret)
  496. goto err_iovad_consistent;
  497. return domain;
  498. err_iovad_consistent:
  499. put_iova_domain(&domain->stream_iovad);
  500. err_iovad_stream:
  501. fput(file);
  502. err_file:
  503. vfree(domain->bounce_maps);
  504. err_map:
  505. vhost_iotlb_free(domain->iotlb);
  506. err_iotlb:
  507. kfree(domain);
  508. return NULL;
  509. }
  510. int vduse_domain_init(void)
  511. {
  512. return iova_cache_get();
  513. }
  514. void vduse_domain_exit(void)
  515. {
  516. iova_cache_put();
  517. }