iova_bitmap.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2022, Oracle and/or its affiliates.
  4. * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
  5. */
  6. #include <linux/iova_bitmap.h>
  7. #include <linux/mm.h>
  8. #include <linux/highmem.h>
  9. #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
  10. /*
  11. * struct iova_bitmap_map - A bitmap representing an IOVA range
  12. *
  13. * Main data structure for tracking mapped user pages of bitmap data.
  14. *
  15. * For example, for something recording dirty IOVAs, it will be provided a
  16. * struct iova_bitmap structure, as a general structure for iterating the
  17. * total IOVA range. The struct iova_bitmap_map, though, represents the
  18. * subset of said IOVA space that is pinned by its parent structure (struct
  19. * iova_bitmap).
  20. *
  21. * The user does not need to exact location of the bits in the bitmap.
  22. * From user perspective the only API available is iova_bitmap_set() which
  23. * records the IOVA *range* in the bitmap by setting the corresponding
  24. * bits.
  25. *
  26. * The bitmap is an array of u64 whereas each bit represents an IOVA of
  27. * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
  28. *
  29. * data[(iova / page_size) / 64] & (1ULL << (iova % 64))
  30. */
  31. struct iova_bitmap_map {
  32. /* base IOVA representing bit 0 of the first page */
  33. unsigned long iova;
  34. /* page size order that each bit granules to */
  35. unsigned long pgshift;
  36. /* page offset of the first user page pinned */
  37. unsigned long pgoff;
  38. /* number of pages pinned */
  39. unsigned long npages;
  40. /* pinned pages representing the bitmap data */
  41. struct page **pages;
  42. };
  43. /*
  44. * struct iova_bitmap - The IOVA bitmap object
  45. *
  46. * Main data structure for iterating over the bitmap data.
  47. *
  48. * Abstracts the pinning work and iterates in IOVA ranges.
  49. * It uses a windowing scheme and pins the bitmap in relatively
  50. * big ranges e.g.
  51. *
  52. * The bitmap object uses one base page to store all the pinned pages
  53. * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
  54. * 512 struct page pointers which, if the base page size is 4K, it means
  55. * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
  56. * also 4K then the range window to iterate is 64G.
  57. *
  58. * For example iterating on a total IOVA range of 4G..128G, it will walk
  59. * through this set of ranges:
  60. *
  61. * 4G - 68G-1 (64G)
  62. * 68G - 128G-1 (64G)
  63. *
  64. * An example of the APIs on how to use/iterate over the IOVA bitmap:
  65. *
  66. * bitmap = iova_bitmap_alloc(iova, length, page_size, data);
  67. * if (IS_ERR(bitmap))
  68. * return PTR_ERR(bitmap);
  69. *
  70. * ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
  71. *
  72. * iova_bitmap_free(bitmap);
  73. *
  74. * Each iteration of the @dirty_reporter_fn is called with a unique @iova
  75. * and @length argument, indicating the current range available through the
  76. * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
  77. * areas (@iova_length) within that provided range, as following:
  78. *
  79. * iova_bitmap_set(bitmap, iova, iova_length);
  80. *
  81. * The internals of the object uses an index @mapped_base_index that indexes
  82. * which u64 word of the bitmap is mapped, up to @mapped_total_index.
  83. * Those keep being incremented until @mapped_total_index is reached while
  84. * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
  85. *
  86. * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
  87. * some form of IOVA range tracking that co-relates to the user passed
  88. * bitmap.
  89. */
  90. struct iova_bitmap {
  91. /* IOVA range representing the currently mapped bitmap data */
  92. struct iova_bitmap_map mapped;
  93. /* userspace address of the bitmap */
  94. u64 __user *bitmap;
  95. /* u64 index that @mapped points to */
  96. unsigned long mapped_base_index;
  97. /* how many u64 can we walk in total */
  98. unsigned long mapped_total_index;
  99. /* base IOVA of the whole bitmap */
  100. unsigned long iova;
  101. /* length of the IOVA range for the whole bitmap */
  102. size_t length;
  103. };
  104. /*
  105. * Converts a relative IOVA to a bitmap index.
  106. * This function provides the index into the u64 array (bitmap::bitmap)
  107. * for a given IOVA offset.
  108. * Relative IOVA means relative to the bitmap::mapped base IOVA
  109. * (stored in mapped::iova). All computations in this file are done using
  110. * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
  111. * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
  112. */
  113. static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
  114. unsigned long iova)
  115. {
  116. unsigned long pgsize = 1 << bitmap->mapped.pgshift;
  117. return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize);
  118. }
  119. /*
  120. * Converts a bitmap index to a *relative* IOVA.
  121. */
  122. static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap,
  123. unsigned long index)
  124. {
  125. unsigned long pgshift = bitmap->mapped.pgshift;
  126. return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift;
  127. }
  128. /*
  129. * Returns the base IOVA of the mapped range.
  130. */
  131. static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
  132. {
  133. unsigned long skip = bitmap->mapped_base_index;
  134. return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
  135. }
  136. /*
  137. * Pins the bitmap user pages for the current range window.
  138. * This is internal to IOVA bitmap and called when advancing the
  139. * index (@mapped_base_index) or allocating the bitmap.
  140. */
  141. static int iova_bitmap_get(struct iova_bitmap *bitmap)
  142. {
  143. struct iova_bitmap_map *mapped = &bitmap->mapped;
  144. unsigned long npages;
  145. u64 __user *addr;
  146. long ret;
  147. /*
  148. * @mapped_base_index is the index of the currently mapped u64 words
  149. * that we have access. Anything before @mapped_base_index is not
  150. * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
  151. * mapped but capped at a maximum number of pages.
  152. */
  153. npages = DIV_ROUND_UP((bitmap->mapped_total_index -
  154. bitmap->mapped_base_index) *
  155. sizeof(*bitmap->bitmap), PAGE_SIZE);
  156. /*
  157. * We always cap at max number of 'struct page' a base page can fit.
  158. * This is, for example, on x86 means 2M of bitmap data max.
  159. */
  160. npages = min(npages, PAGE_SIZE / sizeof(struct page *));
  161. /*
  162. * Bitmap address to be pinned is calculated via pointer arithmetic
  163. * with bitmap u64 word index.
  164. */
  165. addr = bitmap->bitmap + bitmap->mapped_base_index;
  166. ret = pin_user_pages_fast((unsigned long)addr, npages,
  167. FOLL_WRITE, mapped->pages);
  168. if (ret <= 0)
  169. return -EFAULT;
  170. mapped->npages = (unsigned long)ret;
  171. /* Base IOVA where @pages point to i.e. bit 0 of the first page */
  172. mapped->iova = iova_bitmap_mapped_iova(bitmap);
  173. /*
  174. * offset of the page where pinned pages bit 0 is located.
  175. * This handles the case where the bitmap is not PAGE_SIZE
  176. * aligned.
  177. */
  178. mapped->pgoff = offset_in_page(addr);
  179. return 0;
  180. }
  181. /*
  182. * Unpins the bitmap user pages and clears @npages
  183. * (un)pinning is abstracted from API user and it's done when advancing
  184. * the index or freeing the bitmap.
  185. */
  186. static void iova_bitmap_put(struct iova_bitmap *bitmap)
  187. {
  188. struct iova_bitmap_map *mapped = &bitmap->mapped;
  189. if (mapped->npages) {
  190. unpin_user_pages(mapped->pages, mapped->npages);
  191. mapped->npages = 0;
  192. }
  193. }
  194. /**
  195. * iova_bitmap_alloc() - Allocates an IOVA bitmap object
  196. * @iova: Start address of the IOVA range
  197. * @length: Length of the IOVA range
  198. * @page_size: Page size of the IOVA bitmap. It defines what each bit
  199. * granularity represents
  200. * @data: Userspace address of the bitmap
  201. *
  202. * Allocates an IOVA object and initializes all its fields including the
  203. * first user pages of @data.
  204. *
  205. * Return: A pointer to a newly allocated struct iova_bitmap
  206. * or ERR_PTR() on error.
  207. */
  208. struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
  209. unsigned long page_size, u64 __user *data)
  210. {
  211. struct iova_bitmap_map *mapped;
  212. struct iova_bitmap *bitmap;
  213. int rc;
  214. bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
  215. if (!bitmap)
  216. return ERR_PTR(-ENOMEM);
  217. mapped = &bitmap->mapped;
  218. mapped->pgshift = __ffs(page_size);
  219. bitmap->bitmap = data;
  220. bitmap->mapped_total_index =
  221. iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
  222. bitmap->iova = iova;
  223. bitmap->length = length;
  224. mapped->iova = iova;
  225. mapped->pages = (struct page **)__get_free_page(GFP_KERNEL);
  226. if (!mapped->pages) {
  227. rc = -ENOMEM;
  228. goto err;
  229. }
  230. rc = iova_bitmap_get(bitmap);
  231. if (rc)
  232. goto err;
  233. return bitmap;
  234. err:
  235. iova_bitmap_free(bitmap);
  236. return ERR_PTR(rc);
  237. }
  238. /**
  239. * iova_bitmap_free() - Frees an IOVA bitmap object
  240. * @bitmap: IOVA bitmap to free
  241. *
  242. * It unpins and releases pages array memory and clears any leftover
  243. * state.
  244. */
  245. void iova_bitmap_free(struct iova_bitmap *bitmap)
  246. {
  247. struct iova_bitmap_map *mapped = &bitmap->mapped;
  248. iova_bitmap_put(bitmap);
  249. if (mapped->pages) {
  250. free_page((unsigned long)mapped->pages);
  251. mapped->pages = NULL;
  252. }
  253. kfree(bitmap);
  254. }
  255. /*
  256. * Returns the remaining bitmap indexes from mapped_total_index to process for
  257. * the currently pinned bitmap pages.
  258. */
  259. static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
  260. {
  261. unsigned long remaining, bytes;
  262. bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff;
  263. remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
  264. remaining = min_t(unsigned long, remaining,
  265. bytes / sizeof(*bitmap->bitmap));
  266. return remaining;
  267. }
  268. /*
  269. * Returns the length of the mapped IOVA range.
  270. */
  271. static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
  272. {
  273. unsigned long max_iova = bitmap->iova + bitmap->length - 1;
  274. unsigned long iova = iova_bitmap_mapped_iova(bitmap);
  275. unsigned long remaining;
  276. /*
  277. * iova_bitmap_mapped_remaining() returns a number of indexes which
  278. * when converted to IOVA gives us a max length that the bitmap
  279. * pinned data can cover. Afterwards, that is capped to
  280. * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
  281. */
  282. remaining = iova_bitmap_index_to_offset(bitmap,
  283. iova_bitmap_mapped_remaining(bitmap));
  284. if (iova + remaining - 1 > max_iova)
  285. remaining -= ((iova + remaining - 1) - max_iova);
  286. return remaining;
  287. }
  288. /*
  289. * Returns true if there's not more data to iterate.
  290. */
  291. static bool iova_bitmap_done(struct iova_bitmap *bitmap)
  292. {
  293. return bitmap->mapped_base_index >= bitmap->mapped_total_index;
  294. }
  295. /*
  296. * Advances to the next range, releases the current pinned
  297. * pages and pins the next set of bitmap pages.
  298. * Returns 0 on success or otherwise errno.
  299. */
  300. static int iova_bitmap_advance(struct iova_bitmap *bitmap)
  301. {
  302. unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1;
  303. unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1;
  304. bitmap->mapped_base_index += count;
  305. iova_bitmap_put(bitmap);
  306. if (iova_bitmap_done(bitmap))
  307. return 0;
  308. /* When advancing the index we pin the next set of bitmap pages */
  309. return iova_bitmap_get(bitmap);
  310. }
  311. /**
  312. * iova_bitmap_for_each() - Iterates over the bitmap
  313. * @bitmap: IOVA bitmap to iterate
  314. * @opaque: Additional argument to pass to the callback
  315. * @fn: Function that gets called for each IOVA range
  316. *
  317. * Helper function to iterate over bitmap data representing a portion of IOVA
  318. * space. It hides the complexity of iterating bitmaps and translating the
  319. * mapped bitmap user pages into IOVA ranges to process.
  320. *
  321. * Return: 0 on success, and an error on failure either upon
  322. * iteration or when the callback returns an error.
  323. */
  324. int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
  325. iova_bitmap_fn_t fn)
  326. {
  327. int ret = 0;
  328. for (; !iova_bitmap_done(bitmap) && !ret;
  329. ret = iova_bitmap_advance(bitmap)) {
  330. ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap),
  331. iova_bitmap_mapped_length(bitmap), opaque);
  332. if (ret)
  333. break;
  334. }
  335. return ret;
  336. }
  337. /**
  338. * iova_bitmap_set() - Records an IOVA range in bitmap
  339. * @bitmap: IOVA bitmap
  340. * @iova: IOVA to start
  341. * @length: IOVA range length
  342. *
  343. * Set the bits corresponding to the range [iova .. iova+length-1] in
  344. * the user bitmap.
  345. *
  346. */
  347. void iova_bitmap_set(struct iova_bitmap *bitmap,
  348. unsigned long iova, size_t length)
  349. {
  350. struct iova_bitmap_map *mapped = &bitmap->mapped;
  351. unsigned long cur_bit = ((iova - mapped->iova) >>
  352. mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
  353. unsigned long last_bit = (((iova + length - 1) - mapped->iova) >>
  354. mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
  355. do {
  356. unsigned int page_idx = cur_bit / BITS_PER_PAGE;
  357. unsigned int offset = cur_bit % BITS_PER_PAGE;
  358. unsigned int nbits = min(BITS_PER_PAGE - offset,
  359. last_bit - cur_bit + 1);
  360. void *kaddr;
  361. kaddr = kmap_local_page(mapped->pages[page_idx]);
  362. bitmap_set(kaddr, offset, nbits);
  363. kunmap_local(kaddr);
  364. cur_bit += nbits;
  365. } while (cur_bit <= last_bit);
  366. }
  367. EXPORT_SYMBOL_GPL(iova_bitmap_set);