memory.c 78 KB


  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright 2016-2022 HabanaLabs, Ltd.
  4. * All Rights Reserved.
  5. */
  6. #include <uapi/misc/habanalabs.h>
  7. #include "habanalabs.h"
  8. #include "../include/hw_ip/mmu/mmu_general.h"
  9. #include <linux/uaccess.h>
  10. #include <linux/slab.h>
  11. #include <linux/vmalloc.h>
  12. #include <linux/pci-p2pdma.h>
  13. MODULE_IMPORT_NS(DMA_BUF);
  14. #define HL_MMU_DEBUG 0
  15. /* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
  16. #define DRAM_POOL_PAGE_SIZE SZ_8M
  17. static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
  18. struct hl_mem_in *args, u64 *handle);
  19. static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size)
  20. {
  21. struct asic_fixed_properties *prop = &hdev->asic_prop;
  22. u64 psize;
  23. /*
  24. * for ASIC that supports setting the allocation page size by user we will address
  25. * user's choice only if it is not 0 (as 0 means taking the default page size)
  26. */
  27. if (prop->supports_user_set_page_size && args->alloc.page_size) {
  28. psize = args->alloc.page_size;
  29. if (!is_power_of_2(psize)) {
  30. dev_err(hdev->dev, "user page size (%#llx) is not power of 2\n", psize);
  31. return -EINVAL;
  32. }
  33. } else {
  34. psize = prop->device_mem_alloc_default_page_size;
  35. }
  36. *page_size = psize;
  37. return 0;
  38. }
  39. /*
  40. * The va ranges in context object contain a list with the available chunks of
  41. * device virtual memory.
  42. * There is one range for host allocations and one for DRAM allocations.
  43. *
  44. * On initialization each range contains one chunk of all of its available
  45. * virtual range which is a half of the total device virtual range.
  46. *
  47. * On each mapping of physical pages, a suitable virtual range chunk (with a
  48. * minimum size) is selected from the list. If the chunk size equals the
  49. * requested size, the chunk is returned. Otherwise, the chunk is split into
  50. * two chunks - one to return as result and a remainder to stay in the list.
  51. *
  52. * On each Unmapping of a virtual address, the relevant virtual chunk is
  53. * returned to the list. The chunk is added to the list and if its edges match
  54. * the edges of the adjacent chunks (means a contiguous chunk can be created),
  55. * the chunks are merged.
  56. *
  57. * On finish, the list is checked to have only one chunk of all the relevant
  58. * virtual range (which is a half of the device total virtual range).
  59. * If not (means not all mappings were unmapped), a warning is printed.
  60. */
  61. /*
  62. * alloc_device_memory() - allocate device memory.
  63. * @ctx: pointer to the context structure.
  64. * @args: host parameters containing the requested size.
  65. * @ret_handle: result handle.
  66. *
  67. * This function does the following:
  68. * - Allocate the requested size rounded up to 'dram_page_size' pages.
  69. * - Return unique handle for later map/unmap/free.
  70. */
  71. static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
  72. u32 *ret_handle)
  73. {
  74. struct hl_device *hdev = ctx->hdev;
  75. struct hl_vm *vm = &hdev->vm;
  76. struct hl_vm_phys_pg_pack *phys_pg_pack;
  77. u64 paddr = 0, total_size, num_pgs, i;
  78. u32 num_curr_pgs, page_size;
  79. bool contiguous;
  80. int handle, rc;
  81. num_curr_pgs = 0;
  82. rc = set_alloc_page_size(hdev, args, &page_size);
  83. if (rc)
  84. return rc;
  85. num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
  86. total_size = num_pgs * page_size;
  87. if (!total_size) {
  88. dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
  89. return -EINVAL;
  90. }
  91. contiguous = args->flags & HL_MEM_CONTIGUOUS;
  92. if (contiguous) {
  93. if (is_power_of_2(page_size))
  94. paddr = (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool,
  95. total_size, NULL, page_size);
  96. else
  97. paddr = gen_pool_alloc(vm->dram_pg_pool, total_size);
  98. if (!paddr) {
  99. dev_err(hdev->dev,
  100. "Cannot allocate %llu contiguous pages with total size of %llu\n",
  101. num_pgs, total_size);
  102. return -ENOMEM;
  103. }
  104. }
  105. phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
  106. if (!phys_pg_pack) {
  107. rc = -ENOMEM;
  108. goto pages_pack_err;
  109. }
  110. phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
  111. phys_pg_pack->asid = ctx->asid;
  112. phys_pg_pack->npages = num_pgs;
  113. phys_pg_pack->page_size = page_size;
  114. phys_pg_pack->total_size = total_size;
  115. phys_pg_pack->flags = args->flags;
  116. phys_pg_pack->contiguous = contiguous;
  117. phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
  118. if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
  119. rc = -ENOMEM;
  120. goto pages_arr_err;
  121. }
  122. if (phys_pg_pack->contiguous) {
  123. for (i = 0 ; i < num_pgs ; i++)
  124. phys_pg_pack->pages[i] = paddr + i * page_size;
  125. } else {
  126. for (i = 0 ; i < num_pgs ; i++) {
  127. if (is_power_of_2(page_size))
  128. phys_pg_pack->pages[i] =
  129. (uintptr_t)gen_pool_dma_alloc_align(vm->dram_pg_pool,
  130. page_size, NULL,
  131. page_size);
  132. else
  133. phys_pg_pack->pages[i] = gen_pool_alloc(vm->dram_pg_pool,
  134. page_size);
  135. if (!phys_pg_pack->pages[i]) {
  136. dev_err(hdev->dev,
  137. "Cannot allocate device memory (out of memory)\n");
  138. rc = -ENOMEM;
  139. goto page_err;
  140. }
  141. num_curr_pgs++;
  142. }
  143. }
  144. spin_lock(&vm->idr_lock);
  145. handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
  146. GFP_ATOMIC);
  147. spin_unlock(&vm->idr_lock);
  148. if (handle < 0) {
  149. dev_err(hdev->dev, "Failed to get handle for page\n");
  150. rc = -EFAULT;
  151. goto idr_err;
  152. }
  153. for (i = 0 ; i < num_pgs ; i++)
  154. kref_get(&vm->dram_pg_pool_refcount);
  155. phys_pg_pack->handle = handle;
  156. atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
  157. atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
  158. *ret_handle = handle;
  159. return 0;
  160. idr_err:
  161. page_err:
  162. if (!phys_pg_pack->contiguous)
  163. for (i = 0 ; i < num_curr_pgs ; i++)
  164. gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
  165. page_size);
  166. kvfree(phys_pg_pack->pages);
  167. pages_arr_err:
  168. kfree(phys_pg_pack);
  169. pages_pack_err:
  170. if (contiguous)
  171. gen_pool_free(vm->dram_pg_pool, paddr, total_size);
  172. return rc;
  173. }
  174. /**
  175. * dma_map_host_va() - DMA mapping of the given host virtual address.
  176. * @hdev: habanalabs device structure.
  177. * @addr: the host virtual address of the memory area.
  178. * @size: the size of the memory area.
  179. * @p_userptr: pointer to result userptr structure.
  180. *
  181. * This function does the following:
  182. * - Allocate userptr structure.
  183. * - Pin the given host memory using the userptr structure.
  184. * - Perform DMA mapping to have the DMA addresses of the pages.
  185. */
  186. static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
  187. struct hl_userptr **p_userptr)
  188. {
  189. struct hl_userptr *userptr;
  190. int rc;
  191. userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
  192. if (!userptr) {
  193. rc = -ENOMEM;
  194. goto userptr_err;
  195. }
  196. rc = hl_pin_host_memory(hdev, addr, size, userptr);
  197. if (rc) {
  198. dev_err(hdev->dev, "Failed to pin host memory\n");
  199. goto pin_err;
  200. }
  201. userptr->dma_mapped = true;
  202. userptr->dir = DMA_BIDIRECTIONAL;
  203. userptr->vm_type = VM_TYPE_USERPTR;
  204. *p_userptr = userptr;
  205. rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
  206. if (rc) {
  207. dev_err(hdev->dev, "failed to map sgt with DMA region\n");
  208. goto dma_map_err;
  209. }
  210. return 0;
  211. dma_map_err:
  212. hl_unpin_host_memory(hdev, userptr);
  213. pin_err:
  214. kfree(userptr);
  215. userptr_err:
  216. return rc;
  217. }
  218. /**
  219. * dma_unmap_host_va() - DMA unmapping of the given host virtual address.
  220. * @hdev: habanalabs device structure.
  221. * @userptr: userptr to free.
  222. *
  223. * This function does the following:
  224. * - Unpins the physical pages.
  225. * - Frees the userptr structure.
  226. */
  227. static void dma_unmap_host_va(struct hl_device *hdev,
  228. struct hl_userptr *userptr)
  229. {
  230. hl_unpin_host_memory(hdev, userptr);
  231. kfree(userptr);
  232. }
  233. /**
  234. * dram_pg_pool_do_release() - free DRAM pages pool
  235. * @ref: pointer to reference object.
  236. *
  237. * This function does the following:
  238. * - Frees the idr structure of physical pages handles.
  239. * - Frees the generic pool of DRAM physical pages.
  240. */
  241. static void dram_pg_pool_do_release(struct kref *ref)
  242. {
  243. struct hl_vm *vm = container_of(ref, struct hl_vm,
  244. dram_pg_pool_refcount);
  245. /*
  246. * free the idr here as only here we know for sure that there are no
  247. * allocated physical pages and hence there are no handles in use
  248. */
  249. idr_destroy(&vm->phys_pg_pack_handles);
  250. gen_pool_destroy(vm->dram_pg_pool);
  251. }
  252. /**
  253. * free_phys_pg_pack() - free physical page pack.
  254. * @hdev: habanalabs device structure.
  255. * @phys_pg_pack: physical page pack to free.
  256. *
  257. * This function does the following:
  258. * - For DRAM memory only
  259. * - iterate over the pack, free each physical block structure by
  260. * returning it to the general pool.
  261. * - Free the hl_vm_phys_pg_pack structure.
  262. */
  263. static void free_phys_pg_pack(struct hl_device *hdev,
  264. struct hl_vm_phys_pg_pack *phys_pg_pack)
  265. {
  266. struct hl_vm *vm = &hdev->vm;
  267. u64 i;
  268. if (phys_pg_pack->created_from_userptr)
  269. goto end;
  270. if (phys_pg_pack->contiguous) {
  271. gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
  272. phys_pg_pack->total_size);
  273. for (i = 0; i < phys_pg_pack->npages ; i++)
  274. kref_put(&vm->dram_pg_pool_refcount,
  275. dram_pg_pool_do_release);
  276. } else {
  277. for (i = 0 ; i < phys_pg_pack->npages ; i++) {
  278. gen_pool_free(vm->dram_pg_pool,
  279. phys_pg_pack->pages[i],
  280. phys_pg_pack->page_size);
  281. kref_put(&vm->dram_pg_pool_refcount,
  282. dram_pg_pool_do_release);
  283. }
  284. }
  285. end:
  286. kvfree(phys_pg_pack->pages);
  287. kfree(phys_pg_pack);
  288. return;
  289. }
  290. /**
  291. * free_device_memory() - free device memory.
  292. * @ctx: pointer to the context structure.
  293. * @args: host parameters containing the requested size.
  294. *
  295. * This function does the following:
  296. * - Free the device memory related to the given handle.
  297. */
  298. static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
  299. {
  300. struct hl_device *hdev = ctx->hdev;
  301. struct hl_vm *vm = &hdev->vm;
  302. struct hl_vm_phys_pg_pack *phys_pg_pack;
  303. u32 handle = args->free.handle;
  304. spin_lock(&vm->idr_lock);
  305. phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
  306. if (!phys_pg_pack) {
  307. spin_unlock(&vm->idr_lock);
  308. dev_err(hdev->dev, "free device memory failed, no match for handle %u\n", handle);
  309. return -EINVAL;
  310. }
  311. if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
  312. spin_unlock(&vm->idr_lock);
  313. dev_err(hdev->dev, "handle %u is mapped, cannot free\n", handle);
  314. return -EINVAL;
  315. }
  316. if (phys_pg_pack->exporting_cnt) {
  317. spin_unlock(&vm->idr_lock);
  318. dev_dbg(hdev->dev, "handle %u is exported, cannot free\n", handle);
  319. return -EINVAL;
  320. }
  321. /* must remove from idr before the freeing of the physical pages as the refcount of the pool
  322. * is also the trigger of the idr destroy
  323. */
  324. idr_remove(&vm->phys_pg_pack_handles, handle);
  325. spin_unlock(&vm->idr_lock);
  326. atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
  327. atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
  328. free_phys_pg_pack(hdev, phys_pg_pack);
  329. return 0;
  330. }
  331. /**
  332. * clear_va_list_locked() - free virtual addresses list.
  333. * @hdev: habanalabs device structure.
  334. * @va_list: list of virtual addresses to free.
  335. *
  336. * This function does the following:
  337. * - Iterate over the list and free each virtual addresses block.
  338. *
  339. * This function should be called only when va_list lock is taken.
  340. */
  341. static void clear_va_list_locked(struct hl_device *hdev,
  342. struct list_head *va_list)
  343. {
  344. struct hl_vm_va_block *va_block, *tmp;
  345. list_for_each_entry_safe(va_block, tmp, va_list, node) {
  346. list_del(&va_block->node);
  347. kfree(va_block);
  348. }
  349. }
  350. /**
  351. * print_va_list_locked() - print virtual addresses list.
  352. * @hdev: habanalabs device structure.
  353. * @va_list: list of virtual addresses to print.
  354. *
  355. * This function does the following:
  356. * - Iterate over the list and print each virtual addresses block.
  357. *
  358. * This function should be called only when va_list lock is taken.
  359. */
  360. static void print_va_list_locked(struct hl_device *hdev,
  361. struct list_head *va_list)
  362. {
  363. #if HL_MMU_DEBUG
  364. struct hl_vm_va_block *va_block;
  365. dev_dbg(hdev->dev, "print va list:\n");
  366. list_for_each_entry(va_block, va_list, node)
  367. dev_dbg(hdev->dev,
  368. "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
  369. va_block->start, va_block->end, va_block->size);
  370. #endif
  371. }
  372. /**
  373. * merge_va_blocks_locked() - merge a virtual block if possible.
  374. * @hdev: pointer to the habanalabs device structure.
  375. * @va_list: pointer to the virtual addresses block list.
  376. * @va_block: virtual block to merge with adjacent blocks.
  377. *
  378. * This function does the following:
  379. * - Merge the given blocks with the adjacent blocks if their virtual ranges
  380. * create a contiguous virtual range.
  381. *
  382. * This Function should be called only when va_list lock is taken.
  383. */
  384. static void merge_va_blocks_locked(struct hl_device *hdev,
  385. struct list_head *va_list, struct hl_vm_va_block *va_block)
  386. {
  387. struct hl_vm_va_block *prev, *next;
  388. prev = list_prev_entry(va_block, node);
  389. if (&prev->node != va_list && prev->end + 1 == va_block->start) {
  390. prev->end = va_block->end;
  391. prev->size = prev->end - prev->start + 1;
  392. list_del(&va_block->node);
  393. kfree(va_block);
  394. va_block = prev;
  395. }
  396. next = list_next_entry(va_block, node);
  397. if (&next->node != va_list && va_block->end + 1 == next->start) {
  398. next->start = va_block->start;
  399. next->size = next->end - next->start + 1;
  400. list_del(&va_block->node);
  401. kfree(va_block);
  402. }
  403. }
  404. /**
  405. * add_va_block_locked() - add a virtual block to the virtual addresses list.
  406. * @hdev: pointer to the habanalabs device structure.
  407. * @va_list: pointer to the virtual addresses block list.
  408. * @start: start virtual address.
  409. * @end: end virtual address.
  410. *
  411. * This function does the following:
  412. * - Add the given block to the virtual blocks list and merge with other blocks
  413. * if a contiguous virtual block can be created.
  414. *
  415. * This Function should be called only when va_list lock is taken.
  416. */
  417. static int add_va_block_locked(struct hl_device *hdev,
  418. struct list_head *va_list, u64 start, u64 end)
  419. {
  420. struct hl_vm_va_block *va_block, *res = NULL;
  421. u64 size = end - start + 1;
  422. print_va_list_locked(hdev, va_list);
  423. list_for_each_entry(va_block, va_list, node) {
  424. /* TODO: remove upon matureness */
  425. if (hl_mem_area_crosses_range(start, size, va_block->start,
  426. va_block->end)) {
  427. dev_err(hdev->dev,
  428. "block crossing ranges at start 0x%llx, end 0x%llx\n",
  429. va_block->start, va_block->end);
  430. return -EINVAL;
  431. }
  432. if (va_block->end < start)
  433. res = va_block;
  434. }
  435. va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
  436. if (!va_block)
  437. return -ENOMEM;
  438. va_block->start = start;
  439. va_block->end = end;
  440. va_block->size = size;
  441. if (!res)
  442. list_add(&va_block->node, va_list);
  443. else
  444. list_add(&va_block->node, &res->node);
  445. merge_va_blocks_locked(hdev, va_list, va_block);
  446. print_va_list_locked(hdev, va_list);
  447. return 0;
  448. }
  449. /**
  450. * add_va_block() - wrapper for add_va_block_locked.
  451. * @hdev: pointer to the habanalabs device structure.
  452. * @va_range: pointer to the virtual addresses range object.
  453. * @start: start virtual address.
  454. * @end: end virtual address.
  455. *
  456. * This function does the following:
  457. * - Takes the list lock and calls add_va_block_locked.
  458. */
  459. static inline int add_va_block(struct hl_device *hdev,
  460. struct hl_va_range *va_range, u64 start, u64 end)
  461. {
  462. int rc;
  463. mutex_lock(&va_range->lock);
  464. rc = add_va_block_locked(hdev, &va_range->list, start, end);
  465. mutex_unlock(&va_range->lock);
  466. return rc;
  467. }
  468. /**
  469. * is_hint_crossing_range() - check if hint address crossing specified reserved.
  470. * @range_type: virtual space range type.
  471. * @start_addr: start virtual address.
  472. * @size: block size.
  473. * @prop: asic properties structure to retrieve reserved ranges from.
  474. */
  475. static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
  476. u64 start_addr, u32 size, struct asic_fixed_properties *prop) {
  477. bool range_cross;
  478. if (range_type == HL_VA_RANGE_TYPE_DRAM)
  479. range_cross =
  480. hl_mem_area_crosses_range(start_addr, size,
  481. prop->hints_dram_reserved_va_range.start_addr,
  482. prop->hints_dram_reserved_va_range.end_addr);
  483. else if (range_type == HL_VA_RANGE_TYPE_HOST)
  484. range_cross =
  485. hl_mem_area_crosses_range(start_addr, size,
  486. prop->hints_host_reserved_va_range.start_addr,
  487. prop->hints_host_reserved_va_range.end_addr);
  488. else
  489. range_cross =
  490. hl_mem_area_crosses_range(start_addr, size,
  491. prop->hints_host_hpage_reserved_va_range.start_addr,
  492. prop->hints_host_hpage_reserved_va_range.end_addr);
  493. return range_cross;
  494. }
  495. /**
  496. * get_va_block() - get a virtual block for the given size and alignment.
  497. *
  498. * @hdev: pointer to the habanalabs device structure.
  499. * @va_range: pointer to the virtual addresses range.
  500. * @size: requested block size.
  501. * @hint_addr: hint for requested address by the user.
  502. * @va_block_align: required alignment of the virtual block start address.
  503. * @range_type: va range type (host, dram)
  504. * @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT
  505. *
  506. * This function does the following:
  507. * - Iterate on the virtual block list to find a suitable virtual block for the
  508. * given size, hint address and alignment.
  509. * - Reserve the requested block and update the list.
  510. * - Return the start address of the virtual block.
  511. */
  512. static u64 get_va_block(struct hl_device *hdev,
  513. struct hl_va_range *va_range,
  514. u64 size, u64 hint_addr, u32 va_block_align,
  515. enum hl_va_range_type range_type,
  516. u32 flags)
  517. {
  518. struct hl_vm_va_block *va_block, *new_va_block = NULL;
  519. struct asic_fixed_properties *prop = &hdev->asic_prop;
  520. u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
  521. align_mask, reserved_valid_start = 0, reserved_valid_size = 0,
  522. dram_hint_mask = prop->dram_hints_align_mask;
  523. bool add_prev = false;
  524. bool is_align_pow_2 = is_power_of_2(va_range->page_size);
  525. bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
  526. bool force_hint = flags & HL_MEM_FORCE_HINT;
  527. if (is_align_pow_2)
  528. align_mask = ~((u64)va_block_align - 1);
  529. else
  530. /*
  531. * with non-power-of-2 range we work only with page granularity
  532. * and the start address is page aligned,
  533. * so no need for alignment checking.
  534. */
  535. size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
  536. va_range->page_size;
  537. tmp_hint_addr = hint_addr & ~dram_hint_mask;
  538. /* Check if we need to ignore hint address */
  539. if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
  540. (!is_align_pow_2 && is_hint_dram_addr &&
  541. do_div(tmp_hint_addr, va_range->page_size))) {
  542. if (force_hint) {
  543. /* Hint must be respected, so here we just fail */
  544. dev_err(hdev->dev,
  545. "Hint address 0x%llx is not page aligned - cannot be respected\n",
  546. hint_addr);
  547. return 0;
  548. }
  549. dev_dbg(hdev->dev,
  550. "Hint address 0x%llx will be ignored because it is not aligned\n",
  551. hint_addr);
  552. hint_addr = 0;
  553. }
  554. mutex_lock(&va_range->lock);
  555. print_va_list_locked(hdev, &va_range->list);
  556. list_for_each_entry(va_block, &va_range->list, node) {
  557. /* Calc the first possible aligned addr */
  558. valid_start = va_block->start;
  559. if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
  560. valid_start &= align_mask;
  561. valid_start += va_block_align;
  562. if (valid_start > va_block->end)
  563. continue;
  564. }
  565. valid_size = va_block->end - valid_start + 1;
  566. if (valid_size < size)
  567. continue;
  568. /*
  569. * In case hint address is 0, and hints_range_reservation
  570. * property enabled, then avoid allocating va blocks from the
  571. * range reserved for hint addresses
  572. */
  573. if (prop->hints_range_reservation && !hint_addr)
  574. if (is_hint_crossing_range(range_type, valid_start,
  575. size, prop))
  576. continue;
  577. /* Pick the minimal length block which has the required size */
  578. if (!new_va_block || (valid_size < reserved_valid_size)) {
  579. new_va_block = va_block;
  580. reserved_valid_start = valid_start;
  581. reserved_valid_size = valid_size;
  582. }
  583. if (hint_addr && hint_addr >= valid_start &&
  584. (hint_addr + size) <= va_block->end) {
  585. new_va_block = va_block;
  586. reserved_valid_start = hint_addr;
  587. reserved_valid_size = valid_size;
  588. break;
  589. }
  590. }
  591. if (!new_va_block) {
  592. dev_err(hdev->dev, "no available va block for size %llu\n",
  593. size);
  594. goto out;
  595. }
  596. if (force_hint && reserved_valid_start != hint_addr) {
  597. /* Hint address must be respected. If we are here - this means
  598. * we could not respect it.
  599. */
  600. dev_err(hdev->dev,
  601. "Hint address 0x%llx could not be respected\n",
  602. hint_addr);
  603. reserved_valid_start = 0;
  604. goto out;
  605. }
  606. /*
  607. * Check if there is some leftover range due to reserving the new
  608. * va block, then return it to the main virtual addresses list.
  609. */
  610. if (reserved_valid_start > new_va_block->start) {
  611. prev_start = new_va_block->start;
  612. prev_end = reserved_valid_start - 1;
  613. new_va_block->start = reserved_valid_start;
  614. new_va_block->size = reserved_valid_size;
  615. add_prev = true;
  616. }
  617. if (new_va_block->size > size) {
  618. new_va_block->start += size;
  619. new_va_block->size = new_va_block->end - new_va_block->start + 1;
  620. } else {
  621. list_del(&new_va_block->node);
  622. kfree(new_va_block);
  623. }
  624. if (add_prev)
  625. add_va_block_locked(hdev, &va_range->list, prev_start,
  626. prev_end);
  627. print_va_list_locked(hdev, &va_range->list);
  628. out:
  629. mutex_unlock(&va_range->lock);
  630. return reserved_valid_start;
  631. }
  632. /*
  633. * hl_reserve_va_block() - reserve a virtual block of a given size.
  634. * @hdev: pointer to the habanalabs device structure.
  635. * @ctx: current context
  636. * @type: virtual addresses range type.
  637. * @size: requested block size.
  638. * @alignment: required alignment in bytes of the virtual block start address,
  639. * 0 means no alignment.
  640. *
  641. * This function does the following:
  642. * - Iterate on the virtual block list to find a suitable virtual block for the
  643. * given size and alignment.
  644. * - Reserve the requested block and update the list.
  645. * - Return the start address of the virtual block.
  646. */
  647. u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
  648. enum hl_va_range_type type, u64 size, u32 alignment)
  649. {
  650. return get_va_block(hdev, ctx->va_range[type], size, 0,
  651. max(alignment, ctx->va_range[type]->page_size),
  652. type, 0);
  653. }
  654. /**
  655. * hl_get_va_range_type() - get va_range type for the given address and size.
  656. * @ctx: context to fetch va_range from.
  657. * @address: the start address of the area we want to validate.
  658. * @size: the size in bytes of the area we want to validate.
  659. * @type: returned va_range type.
  660. *
  661. * Return: true if the area is inside a valid range, false otherwise.
  662. */
  663. static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
  664. enum hl_va_range_type *type)
  665. {
  666. int i;
  667. for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX; i++) {
  668. if (hl_mem_area_inside_range(address, size,
  669. ctx->va_range[i]->start_addr,
  670. ctx->va_range[i]->end_addr)) {
  671. *type = i;
  672. return 0;
  673. }
  674. }
  675. return -EINVAL;
  676. }
  677. /**
  678. * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
  679. * @hdev: pointer to the habanalabs device structure
  680. * @ctx: pointer to the context structure.
  681. * @start_addr: start virtual address.
  682. * @size: number of bytes to unreserve.
  683. *
  684. * This function does the following:
  685. * - Takes the list lock and calls add_va_block_locked.
  686. */
  687. int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
  688. u64 start_addr, u64 size)
  689. {
  690. enum hl_va_range_type type;
  691. int rc;
  692. rc = hl_get_va_range_type(ctx, start_addr, size, &type);
  693. if (rc) {
  694. dev_err(hdev->dev,
  695. "cannot find va_range for va %#llx size %llu",
  696. start_addr, size);
  697. return rc;
  698. }
  699. rc = add_va_block(hdev, ctx->va_range[type], start_addr,
  700. start_addr + size - 1);
  701. if (rc)
  702. dev_warn(hdev->dev,
  703. "add va block failed for vaddr: 0x%llx\n", start_addr);
  704. return rc;
  705. }
  706. /**
  707. * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
  708. * memory
  709. * @ctx: pointer to the context structure.
  710. * @userptr: userptr to initialize from.
  711. * @pphys_pg_pack: result pointer.
  712. * @force_regular_page: tell the function to ignore huge page optimization,
  713. * even if possible. Needed for cases where the device VA
  714. * is allocated before we know the composition of the
  715. * physical pages
  716. *
  717. * This function does the following:
  718. * - Pin the physical pages related to the given virtual block.
  719. * - Create a physical page pack from the physical pages related to the given
  720. * virtual block.
  721. */
  722. static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
  723. struct hl_userptr *userptr,
  724. struct hl_vm_phys_pg_pack **pphys_pg_pack,
  725. bool force_regular_page)
  726. {
  727. u32 npages, page_size = PAGE_SIZE,
  728. huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
  729. u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
  730. struct hl_vm_phys_pg_pack *phys_pg_pack;
  731. bool first = true, is_huge_page_opt;
  732. u64 page_mask, total_npages;
  733. struct scatterlist *sg;
  734. dma_addr_t dma_addr;
  735. int rc, i, j;
  736. phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
  737. if (!phys_pg_pack)
  738. return -ENOMEM;
  739. phys_pg_pack->vm_type = userptr->vm_type;
  740. phys_pg_pack->created_from_userptr = true;
  741. phys_pg_pack->asid = ctx->asid;
  742. atomic_set(&phys_pg_pack->mapping_cnt, 1);
  743. is_huge_page_opt = (force_regular_page ? false : true);
  744. /* Only if all dma_addrs are aligned to 2MB and their
  745. * sizes is at least 2MB, we can use huge page mapping.
  746. * We limit the 2MB optimization to this condition,
  747. * since later on we acquire the related VA range as one
  748. * consecutive block.
  749. */
  750. total_npages = 0;
  751. for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
  752. npages = hl_get_sg_info(sg, &dma_addr);
  753. total_npages += npages;
  754. if ((npages % pgs_in_huge_page) ||
  755. (dma_addr & (huge_page_size - 1)))
  756. is_huge_page_opt = false;
  757. }
  758. if (is_huge_page_opt) {
  759. page_size = huge_page_size;
  760. do_div(total_npages, pgs_in_huge_page);
  761. }
  762. page_mask = ~(((u64) page_size) - 1);
  763. phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
  764. GFP_KERNEL);
  765. if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
  766. rc = -ENOMEM;
  767. goto page_pack_arr_mem_err;
  768. }
  769. phys_pg_pack->npages = total_npages;
  770. phys_pg_pack->page_size = page_size;
  771. phys_pg_pack->total_size = total_npages * page_size;
  772. j = 0;
  773. for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
  774. npages = hl_get_sg_info(sg, &dma_addr);
  775. /* align down to physical page size and save the offset */
  776. if (first) {
  777. first = false;
  778. phys_pg_pack->offset = dma_addr & (page_size - 1);
  779. dma_addr &= page_mask;
  780. }
  781. while (npages) {
  782. phys_pg_pack->pages[j++] = dma_addr;
  783. dma_addr += page_size;
  784. if (is_huge_page_opt)
  785. npages -= pgs_in_huge_page;
  786. else
  787. npages--;
  788. }
  789. }
  790. *pphys_pg_pack = phys_pg_pack;
  791. return 0;
  792. page_pack_arr_mem_err:
  793. kfree(phys_pg_pack);
  794. return rc;
  795. }
  796. /**
  797. * map_phys_pg_pack() - maps the physical page pack..
  798. * @ctx: pointer to the context structure.
  799. * @vaddr: start address of the virtual area to map from.
  800. * @phys_pg_pack: the pack of physical pages to map to.
  801. *
  802. * This function does the following:
  803. * - Maps each chunk of virtual memory to matching physical chunk.
  804. * - Stores number of successful mappings in the given argument.
  805. * - Returns 0 on success, error code otherwise.
  806. */
  807. static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
  808. struct hl_vm_phys_pg_pack *phys_pg_pack)
  809. {
  810. struct hl_device *hdev = ctx->hdev;
  811. u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
  812. u32 page_size = phys_pg_pack->page_size;
  813. int rc = 0;
  814. bool is_host_addr;
  815. for (i = 0 ; i < phys_pg_pack->npages ; i++) {
  816. paddr = phys_pg_pack->pages[i];
  817. rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size,
  818. (i + 1) == phys_pg_pack->npages);
  819. if (rc) {
  820. dev_err(hdev->dev,
  821. "map failed for handle %u, npages: %llu, mapped: %llu",
  822. phys_pg_pack->handle, phys_pg_pack->npages,
  823. mapped_pg_cnt);
  824. goto err;
  825. }
  826. mapped_pg_cnt++;
  827. next_vaddr += page_size;
  828. }
  829. return 0;
  830. err:
  831. is_host_addr = !hl_is_dram_va(hdev, vaddr);
  832. next_vaddr = vaddr;
  833. for (i = 0 ; i < mapped_pg_cnt ; i++) {
  834. if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
  835. (i + 1) == mapped_pg_cnt))
  836. dev_warn_ratelimited(hdev->dev,
  837. "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
  838. phys_pg_pack->handle, next_vaddr,
  839. phys_pg_pack->pages[i], page_size);
  840. next_vaddr += page_size;
  841. /*
  842. * unmapping on Palladium can be really long, so avoid a CPU
  843. * soft lockup bug by sleeping a little between unmapping pages
  844. *
  845. * In addition, on host num of pages could be huge,
  846. * because page size could be 4KB, so when unmapping host
  847. * pages sleep every 32K pages to avoid soft lockup
  848. */
  849. if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
  850. usleep_range(50, 200);
  851. }
  852. return rc;
  853. }
  854. /**
  855. * unmap_phys_pg_pack() - unmaps the physical page pack.
  856. * @ctx: pointer to the context structure.
  857. * @vaddr: start address of the virtual area to unmap.
  858. * @phys_pg_pack: the pack of physical pages to unmap.
  859. */
  860. static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
  861. struct hl_vm_phys_pg_pack *phys_pg_pack)
  862. {
  863. struct hl_device *hdev = ctx->hdev;
  864. u64 next_vaddr, i;
  865. bool is_host_addr;
  866. u32 page_size;
  867. is_host_addr = !hl_is_dram_va(hdev, vaddr);
  868. page_size = phys_pg_pack->page_size;
  869. next_vaddr = vaddr;
  870. for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
  871. if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
  872. (i + 1) == phys_pg_pack->npages))
  873. dev_warn_ratelimited(hdev->dev,
  874. "unmap failed for vaddr: 0x%llx\n", next_vaddr);
  875. /*
  876. * unmapping on Palladium can be really long, so avoid a CPU
  877. * soft lockup bug by sleeping a little between unmapping pages
  878. *
  879. * In addition, on host num of pages could be huge,
  880. * because page size could be 4KB, so when unmapping host
  881. * pages sleep every 32K pages to avoid soft lockup
  882. */
  883. if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
  884. usleep_range(50, 200);
  885. }
  886. }
  887. static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
  888. u64 *paddr)
  889. {
  890. struct hl_device *hdev = ctx->hdev;
  891. struct hl_vm *vm = &hdev->vm;
  892. struct hl_vm_phys_pg_pack *phys_pg_pack;
  893. u32 handle;
  894. handle = lower_32_bits(args->map_device.handle);
  895. spin_lock(&vm->idr_lock);
  896. phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
  897. if (!phys_pg_pack) {
  898. spin_unlock(&vm->idr_lock);
  899. dev_err(hdev->dev, "no match for handle %u\n", handle);
  900. return -EINVAL;
  901. }
  902. *paddr = phys_pg_pack->pages[0];
  903. spin_unlock(&vm->idr_lock);
  904. return 0;
  905. }
  906. /**
  907. * map_device_va() - map the given memory.
  908. * @ctx: pointer to the context structure.
  909. * @args: host parameters with handle/host virtual address.
  910. * @device_addr: pointer to result device virtual address.
  911. *
  912. * This function does the following:
  913. * - If given a physical device memory handle, map to a device virtual block
  914. * and return the start address of this block.
  915. * - If given a host virtual address and size, find the related physical pages,
  916. * map a device virtual block to this pages and return the start address of
  917. * this block.
  918. */
  919. static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr)
  920. {
  921. struct hl_vm_phys_pg_pack *phys_pg_pack;
  922. enum hl_va_range_type va_range_type = 0;
  923. struct hl_device *hdev = ctx->hdev;
  924. struct hl_userptr *userptr = NULL;
  925. u32 handle = 0, va_block_align;
  926. struct hl_vm_hash_node *hnode;
  927. struct hl_vm *vm = &hdev->vm;
  928. struct hl_va_range *va_range;
  929. bool is_userptr, do_prefetch;
  930. u64 ret_vaddr, hint_addr;
  931. enum vm_type *vm_type;
  932. int rc;
  933. /* set map flags */
  934. is_userptr = args->flags & HL_MEM_USERPTR;
  935. do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH);
  936. /* Assume failure */
  937. *device_addr = 0;
  938. if (is_userptr) {
  939. u64 addr = args->map_host.host_virt_addr,
  940. size = args->map_host.mem_size;
  941. u32 page_size = hdev->asic_prop.pmmu.page_size,
  942. huge_page_size = hdev->asic_prop.pmmu_huge.page_size;
  943. rc = dma_map_host_va(hdev, addr, size, &userptr);
  944. if (rc) {
  945. dev_err(hdev->dev, "failed to get userptr from va\n");
  946. return rc;
  947. }
  948. rc = init_phys_pg_pack_from_userptr(ctx, userptr,
  949. &phys_pg_pack, false);
  950. if (rc) {
  951. dev_err(hdev->dev,
  952. "unable to init page pack for vaddr 0x%llx\n",
  953. addr);
  954. goto init_page_pack_err;
  955. }
  956. vm_type = (enum vm_type *) userptr;
  957. hint_addr = args->map_host.hint_addr;
  958. handle = phys_pg_pack->handle;
  959. /* get required alignment */
  960. if (phys_pg_pack->page_size == page_size) {
  961. va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
  962. va_range_type = HL_VA_RANGE_TYPE_HOST;
  963. /*
  964. * huge page alignment may be needed in case of regular
  965. * page mapping, depending on the host VA alignment
  966. */
  967. if (addr & (huge_page_size - 1))
  968. va_block_align = page_size;
  969. else
  970. va_block_align = huge_page_size;
  971. } else {
  972. /*
  973. * huge page alignment is needed in case of huge page
  974. * mapping
  975. */
  976. va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
  977. va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE;
  978. va_block_align = huge_page_size;
  979. }
  980. } else {
  981. handle = lower_32_bits(args->map_device.handle);
  982. spin_lock(&vm->idr_lock);
  983. phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
  984. if (!phys_pg_pack) {
  985. spin_unlock(&vm->idr_lock);
  986. dev_err(hdev->dev,
  987. "no match for handle %u\n", handle);
  988. return -EINVAL;
  989. }
  990. /* increment now to avoid freeing device memory while mapping */
  991. atomic_inc(&phys_pg_pack->mapping_cnt);
  992. spin_unlock(&vm->idr_lock);
  993. vm_type = (enum vm_type *) phys_pg_pack;
  994. hint_addr = args->map_device.hint_addr;
  995. /* DRAM VA alignment is the same as the MMU page size */
  996. va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
  997. va_range_type = HL_VA_RANGE_TYPE_DRAM;
  998. va_block_align = hdev->asic_prop.dmmu.page_size;
  999. }
  1000. /*
  1001. * relevant for mapping device physical memory only, as host memory is
  1002. * implicitly shared
  1003. */
  1004. if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
  1005. phys_pg_pack->asid != ctx->asid) {
  1006. dev_err(hdev->dev,
  1007. "Failed to map memory, handle %u is not shared\n",
  1008. handle);
  1009. rc = -EPERM;
  1010. goto shared_err;
  1011. }
  1012. hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
  1013. if (!hnode) {
  1014. rc = -ENOMEM;
  1015. goto hnode_err;
  1016. }
  1017. if (hint_addr && phys_pg_pack->offset) {
  1018. if (args->flags & HL_MEM_FORCE_HINT) {
  1019. /* Fail if hint must be respected but it can't be */
  1020. dev_err(hdev->dev,
  1021. "Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n",
  1022. hint_addr, phys_pg_pack->offset);
  1023. rc = -EINVAL;
  1024. goto va_block_err;
  1025. }
  1026. dev_dbg(hdev->dev,
  1027. "Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n",
  1028. hint_addr, phys_pg_pack->offset);
  1029. }
  1030. ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
  1031. hint_addr, va_block_align,
  1032. va_range_type, args->flags);
  1033. if (!ret_vaddr) {
  1034. dev_err(hdev->dev, "no available va block for handle %u\n",
  1035. handle);
  1036. rc = -ENOMEM;
  1037. goto va_block_err;
  1038. }
  1039. mutex_lock(&hdev->mmu_lock);
  1040. rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
  1041. if (rc) {
  1042. dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
  1043. mutex_unlock(&hdev->mmu_lock);
  1044. goto map_err;
  1045. }
  1046. rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
  1047. ctx->asid, ret_vaddr, phys_pg_pack->total_size);
  1048. mutex_unlock(&hdev->mmu_lock);
  1049. if (rc)
  1050. goto map_err;
  1051. /*
  1052. * prefetch is done upon user's request. it is performed in WQ as and so can
  1053. * be outside the MMU lock. the operation itself is already protected by the mmu lock
  1054. */
  1055. if (do_prefetch) {
  1056. rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr,
  1057. phys_pg_pack->total_size);
  1058. if (rc)
  1059. goto map_err;
  1060. }
  1061. ret_vaddr += phys_pg_pack->offset;
  1062. hnode->ptr = vm_type;
  1063. hnode->vaddr = ret_vaddr;
  1064. mutex_lock(&ctx->mem_hash_lock);
  1065. hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
  1066. mutex_unlock(&ctx->mem_hash_lock);
  1067. *device_addr = ret_vaddr;
  1068. if (is_userptr)
  1069. free_phys_pg_pack(hdev, phys_pg_pack);
  1070. return rc;
  1071. map_err:
  1072. if (add_va_block(hdev, va_range, ret_vaddr,
  1073. ret_vaddr + phys_pg_pack->total_size - 1))
  1074. dev_warn(hdev->dev,
  1075. "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
  1076. handle, ret_vaddr);
  1077. va_block_err:
  1078. kfree(hnode);
  1079. hnode_err:
  1080. shared_err:
  1081. atomic_dec(&phys_pg_pack->mapping_cnt);
  1082. if (is_userptr)
  1083. free_phys_pg_pack(hdev, phys_pg_pack);
  1084. init_page_pack_err:
  1085. if (is_userptr)
  1086. dma_unmap_host_va(hdev, userptr);
  1087. return rc;
  1088. }
  1089. /**
  1090. * unmap_device_va() - unmap the given device virtual address.
  1091. * @ctx: pointer to the context structure.
  1092. * @args: host parameters with device virtual address to unmap.
  1093. * @ctx_free: true if in context free flow, false otherwise.
  1094. *
  1095. * This function does the following:
  1096. * - unmap the physical pages related to the given virtual address.
  1097. * - return the device virtual block to the virtual block list.
  1098. */
  1099. static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
  1100. bool ctx_free)
  1101. {
  1102. struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
  1103. u64 vaddr = args->unmap.device_virt_addr;
  1104. struct hl_vm_hash_node *hnode = NULL;
  1105. struct asic_fixed_properties *prop;
  1106. struct hl_device *hdev = ctx->hdev;
  1107. struct hl_userptr *userptr = NULL;
  1108. struct hl_va_range *va_range;
  1109. enum vm_type *vm_type;
  1110. bool is_userptr;
  1111. int rc = 0;
  1112. prop = &hdev->asic_prop;
  1113. /* protect from double entrance */
  1114. mutex_lock(&ctx->mem_hash_lock);
  1115. hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
  1116. if (vaddr == hnode->vaddr)
  1117. break;
  1118. if (!hnode) {
  1119. mutex_unlock(&ctx->mem_hash_lock);
  1120. dev_err(hdev->dev,
  1121. "unmap failed, no mem hnode for vaddr 0x%llx\n",
  1122. vaddr);
  1123. return -EINVAL;
  1124. }
  1125. hash_del(&hnode->node);
  1126. mutex_unlock(&ctx->mem_hash_lock);
  1127. vm_type = hnode->ptr;
  1128. if (*vm_type == VM_TYPE_USERPTR) {
  1129. is_userptr = true;
  1130. userptr = hnode->ptr;
  1131. rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack,
  1132. false);
  1133. if (rc) {
  1134. dev_err(hdev->dev,
  1135. "unable to init page pack for vaddr 0x%llx\n",
  1136. vaddr);
  1137. goto vm_type_err;
  1138. }
  1139. if (phys_pg_pack->page_size ==
  1140. hdev->asic_prop.pmmu.page_size)
  1141. va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
  1142. else
  1143. va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
  1144. } else if (*vm_type == VM_TYPE_PHYS_PACK) {
  1145. is_userptr = false;
  1146. va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
  1147. phys_pg_pack = hnode->ptr;
  1148. } else {
  1149. dev_warn(hdev->dev,
  1150. "unmap failed, unknown vm desc for vaddr 0x%llx\n",
  1151. vaddr);
  1152. rc = -EFAULT;
  1153. goto vm_type_err;
  1154. }
  1155. if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
  1156. dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
  1157. rc = -EINVAL;
  1158. goto mapping_cnt_err;
  1159. }
  1160. if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
  1161. vaddr = prop->dram_base_address +
  1162. DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
  1163. phys_pg_pack->page_size) *
  1164. phys_pg_pack->page_size;
  1165. else
  1166. vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
  1167. mutex_lock(&hdev->mmu_lock);
  1168. unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
  1169. /*
  1170. * During context free this function is called in a loop to clean all
  1171. * the context mappings. Hence the cache invalidation can be called once
  1172. * at the loop end rather than for each iteration
  1173. */
  1174. if (!ctx_free)
  1175. rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr,
  1176. phys_pg_pack->total_size);
  1177. mutex_unlock(&hdev->mmu_lock);
  1178. /*
  1179. * If the context is closing we don't need to check for the MMU cache
  1180. * invalidation return code and update the VA free list as in this flow
  1181. * we invalidate the MMU cache outside of this unmap function and the VA
  1182. * free list will be freed anyway.
  1183. */
  1184. if (!ctx_free) {
  1185. int tmp_rc;
  1186. tmp_rc = add_va_block(hdev, va_range, vaddr,
  1187. vaddr + phys_pg_pack->total_size - 1);
  1188. if (tmp_rc) {
  1189. dev_warn(hdev->dev,
  1190. "add va block failed for vaddr: 0x%llx\n",
  1191. vaddr);
  1192. if (!rc)
  1193. rc = tmp_rc;
  1194. }
  1195. }
  1196. atomic_dec(&phys_pg_pack->mapping_cnt);
  1197. kfree(hnode);
  1198. if (is_userptr) {
  1199. free_phys_pg_pack(hdev, phys_pg_pack);
  1200. dma_unmap_host_va(hdev, userptr);
  1201. }
  1202. return rc;
  1203. mapping_cnt_err:
  1204. if (is_userptr)
  1205. free_phys_pg_pack(hdev, phys_pg_pack);
  1206. vm_type_err:
  1207. mutex_lock(&ctx->mem_hash_lock);
  1208. hash_add(ctx->mem_hash, &hnode->node, vaddr);
  1209. mutex_unlock(&ctx->mem_hash_lock);
  1210. return rc;
  1211. }
  1212. static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size)
  1213. {
  1214. u32 block_id;
  1215. int rc;
  1216. *handle = 0;
  1217. if (size)
  1218. *size = 0;
  1219. rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
  1220. if (rc)
  1221. return rc;
  1222. *handle = block_id | HL_MMAP_TYPE_BLOCK;
  1223. *handle <<= PAGE_SHIFT;
  1224. return 0;
  1225. }
  1226. static void hw_block_vm_close(struct vm_area_struct *vma)
  1227. {
  1228. struct hl_vm_hw_block_list_node *lnode =
  1229. (struct hl_vm_hw_block_list_node *) vma->vm_private_data;
  1230. struct hl_ctx *ctx = lnode->ctx;
  1231. long new_mmap_size;
  1232. new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start);
  1233. if (new_mmap_size > 0) {
  1234. lnode->mapped_size = new_mmap_size;
  1235. return;
  1236. }
  1237. mutex_lock(&ctx->hw_block_list_lock);
  1238. list_del(&lnode->node);
  1239. mutex_unlock(&ctx->hw_block_list_lock);
  1240. hl_ctx_put(ctx);
  1241. kfree(lnode);
  1242. vma->vm_private_data = NULL;
  1243. }
  1244. static const struct vm_operations_struct hw_block_vm_ops = {
  1245. .close = hw_block_vm_close
  1246. };
  1247. /**
  1248. * hl_hw_block_mmap() - mmap a hw block to user.
  1249. * @hpriv: pointer to the private data of the fd
  1250. * @vma: pointer to vm_area_struct of the process
  1251. *
  1252. * Driver increments context reference for every HW block mapped in order
  1253. * to prevent user from closing FD without unmapping first
  1254. */
  1255. int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
  1256. {
  1257. struct hl_vm_hw_block_list_node *lnode;
  1258. struct hl_device *hdev = hpriv->hdev;
  1259. struct hl_ctx *ctx = hpriv->ctx;
  1260. u32 block_id, block_size;
  1261. int rc;
  1262. /* We use the page offset to hold the block id and thus we need to clear
  1263. * it before doing the mmap itself
  1264. */
  1265. block_id = vma->vm_pgoff;
  1266. vma->vm_pgoff = 0;
  1267. /* Driver only allows mapping of a complete HW block */
  1268. block_size = vma->vm_end - vma->vm_start;
  1269. if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
  1270. dev_err(hdev->dev,
  1271. "user pointer is invalid - 0x%lx\n",
  1272. vma->vm_start);
  1273. return -EINVAL;
  1274. }
  1275. lnode = kzalloc(sizeof(*lnode), GFP_KERNEL);
  1276. if (!lnode)
  1277. return -ENOMEM;
  1278. rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
  1279. if (rc) {
  1280. kfree(lnode);
  1281. return rc;
  1282. }
  1283. hl_ctx_get(ctx);
  1284. lnode->ctx = ctx;
  1285. lnode->vaddr = vma->vm_start;
  1286. lnode->block_size = block_size;
  1287. lnode->mapped_size = lnode->block_size;
  1288. lnode->id = block_id;
  1289. vma->vm_private_data = lnode;
  1290. vma->vm_ops = &hw_block_vm_ops;
  1291. mutex_lock(&ctx->hw_block_list_lock);
  1292. list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
  1293. mutex_unlock(&ctx->hw_block_list_lock);
  1294. vma->vm_pgoff = block_id;
  1295. return 0;
  1296. }
  1297. static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
  1298. struct device *dev, enum dma_data_direction dir)
  1299. {
  1300. dma_addr_t addr;
  1301. int rc;
  1302. addr = dma_map_resource(dev, bar_address, chunk_size, dir,
  1303. DMA_ATTR_SKIP_CPU_SYNC);
  1304. rc = dma_mapping_error(dev, addr);
  1305. if (rc)
  1306. return rc;
  1307. sg_set_page(sg, NULL, chunk_size, 0);
  1308. sg_dma_address(sg) = addr;
  1309. sg_dma_len(sg) = chunk_size;
  1310. return 0;
  1311. }
  1312. static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
  1313. u64 page_size, struct device *dev,
  1314. enum dma_data_direction dir)
  1315. {
  1316. u64 chunk_size, bar_address, dma_max_seg_size;
  1317. struct asic_fixed_properties *prop;
  1318. int rc, i, j, nents, cur_page;
  1319. struct scatterlist *sg;
  1320. struct sg_table *sgt;
  1321. prop = &hdev->asic_prop;
  1322. dma_max_seg_size = dma_get_max_seg_size(dev);
  1323. /* We would like to align the max segment size to PAGE_SIZE, so the
  1324. * SGL will contain aligned addresses that can be easily mapped to
  1325. * an MMU
  1326. */
  1327. dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
  1328. if (dma_max_seg_size < PAGE_SIZE) {
  1329. dev_err_ratelimited(hdev->dev,
  1330. "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
  1331. dma_max_seg_size);
  1332. return ERR_PTR(-EINVAL);
  1333. }
  1334. sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
  1335. if (!sgt)
  1336. return ERR_PTR(-ENOMEM);
  1337. /* If the size of each page is larger than the dma max segment size,
  1338. * then we can't combine pages and the number of entries in the SGL
  1339. * will just be the
  1340. * <number of pages> * <chunks of max segment size in each page>
  1341. */
  1342. if (page_size > dma_max_seg_size)
  1343. nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size);
  1344. else
  1345. /* Get number of non-contiguous chunks */
  1346. for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) {
  1347. if (pages[i - 1] + page_size != pages[i] ||
  1348. chunk_size + page_size > dma_max_seg_size) {
  1349. nents++;
  1350. chunk_size = page_size;
  1351. continue;
  1352. }
  1353. chunk_size += page_size;
  1354. }
  1355. rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
  1356. if (rc)
  1357. goto error_free;
  1358. cur_page = 0;
  1359. if (page_size > dma_max_seg_size) {
  1360. u64 size_left, cur_device_address = 0;
  1361. size_left = page_size;
  1362. /* Need to split each page into the number of chunks of
  1363. * dma_max_seg_size
  1364. */
  1365. for_each_sgtable_dma_sg(sgt, sg, i) {
  1366. if (size_left == page_size)
  1367. cur_device_address =
  1368. pages[cur_page] - prop->dram_base_address;
  1369. else
  1370. cur_device_address += dma_max_seg_size;
  1371. chunk_size = min(size_left, dma_max_seg_size);
  1372. bar_address = hdev->dram_pci_bar_start + cur_device_address;
  1373. rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
  1374. if (rc)
  1375. goto error_unmap;
  1376. if (size_left > dma_max_seg_size) {
  1377. size_left -= dma_max_seg_size;
  1378. } else {
  1379. cur_page++;
  1380. size_left = page_size;
  1381. }
  1382. }
  1383. } else {
  1384. /* Merge pages and put them into the scatterlist */
  1385. for_each_sgtable_dma_sg(sgt, sg, i) {
  1386. chunk_size = page_size;
  1387. for (j = cur_page + 1 ; j < npages ; j++) {
  1388. if (pages[j - 1] + page_size != pages[j] ||
  1389. chunk_size + page_size > dma_max_seg_size)
  1390. break;
  1391. chunk_size += page_size;
  1392. }
  1393. bar_address = hdev->dram_pci_bar_start +
  1394. (pages[cur_page] - prop->dram_base_address);
  1395. rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
  1396. if (rc)
  1397. goto error_unmap;
  1398. cur_page = j;
  1399. }
  1400. }
  1401. /* Because we are not going to include a CPU list we want to have some
  1402. * chance that other users will detect this by setting the orig_nents
  1403. * to 0 and using only nents (length of DMA list) when going over the
  1404. * sgl
  1405. */
  1406. sgt->orig_nents = 0;
  1407. return sgt;
  1408. error_unmap:
  1409. for_each_sgtable_dma_sg(sgt, sg, i) {
  1410. if (!sg_dma_len(sg))
  1411. continue;
  1412. dma_unmap_resource(dev, sg_dma_address(sg),
  1413. sg_dma_len(sg), dir,
  1414. DMA_ATTR_SKIP_CPU_SYNC);
  1415. }
  1416. sg_free_table(sgt);
  1417. error_free:
  1418. kfree(sgt);
  1419. return ERR_PTR(rc);
  1420. }
  1421. static int hl_dmabuf_attach(struct dma_buf *dmabuf,
  1422. struct dma_buf_attachment *attachment)
  1423. {
  1424. struct hl_dmabuf_priv *hl_dmabuf;
  1425. struct hl_device *hdev;
  1426. int rc;
  1427. hl_dmabuf = dmabuf->priv;
  1428. hdev = hl_dmabuf->ctx->hdev;
  1429. rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true);
  1430. if (rc < 0)
  1431. attachment->peer2peer = false;
  1432. return 0;
  1433. }
  1434. static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
  1435. enum dma_data_direction dir)
  1436. {
  1437. struct dma_buf *dma_buf = attachment->dmabuf;
  1438. struct hl_vm_phys_pg_pack *phys_pg_pack;
  1439. struct hl_dmabuf_priv *hl_dmabuf;
  1440. struct hl_device *hdev;
  1441. struct sg_table *sgt;
  1442. hl_dmabuf = dma_buf->priv;
  1443. hdev = hl_dmabuf->ctx->hdev;
  1444. phys_pg_pack = hl_dmabuf->phys_pg_pack;
  1445. if (!attachment->peer2peer) {
  1446. dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
  1447. return ERR_PTR(-EPERM);
  1448. }
  1449. if (phys_pg_pack)
  1450. sgt = alloc_sgt_from_device_pages(hdev,
  1451. phys_pg_pack->pages,
  1452. phys_pg_pack->npages,
  1453. phys_pg_pack->page_size,
  1454. attachment->dev,
  1455. dir);
  1456. else
  1457. sgt = alloc_sgt_from_device_pages(hdev,
  1458. &hl_dmabuf->device_address,
  1459. 1,
  1460. hl_dmabuf->dmabuf->size,
  1461. attachment->dev,
  1462. dir);
  1463. if (IS_ERR(sgt))
  1464. dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
  1465. return sgt;
  1466. }
  1467. static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment,
  1468. struct sg_table *sgt,
  1469. enum dma_data_direction dir)
  1470. {
  1471. struct scatterlist *sg;
  1472. int i;
  1473. /* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives
  1474. * only in the 'device' domain (after all, it maps a PCI bar address which points to the
  1475. * device memory).
  1476. *
  1477. * Therefore, it was never in the 'CPU' domain and hence, there is no need to perform
  1478. * a sync of the memory to the CPU's cache, as it never resided inside that cache.
  1479. */
  1480. for_each_sgtable_dma_sg(sgt, sg, i)
  1481. dma_unmap_resource(attachment->dev, sg_dma_address(sg),
  1482. sg_dma_len(sg), dir,
  1483. DMA_ATTR_SKIP_CPU_SYNC);
  1484. /* Need to restore orig_nents because sg_free_table use that field */
  1485. sgt->orig_nents = sgt->nents;
  1486. sg_free_table(sgt);
  1487. kfree(sgt);
  1488. }
  1489. static void hl_release_dmabuf(struct dma_buf *dmabuf)
  1490. {
  1491. struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv;
  1492. struct hl_ctx *ctx = hl_dmabuf->ctx;
  1493. struct hl_device *hdev = ctx->hdev;
  1494. struct hl_vm *vm = &hdev->vm;
  1495. if (hl_dmabuf->phys_pg_pack) {
  1496. spin_lock(&vm->idr_lock);
  1497. hl_dmabuf->phys_pg_pack->exporting_cnt--;
  1498. spin_unlock(&vm->idr_lock);
  1499. }
  1500. hl_ctx_put(hl_dmabuf->ctx);
  1501. kfree(hl_dmabuf);
  1502. }
  1503. static const struct dma_buf_ops habanalabs_dmabuf_ops = {
  1504. .attach = hl_dmabuf_attach,
  1505. .map_dma_buf = hl_map_dmabuf,
  1506. .unmap_dma_buf = hl_unmap_dmabuf,
  1507. .release = hl_release_dmabuf,
  1508. };
  1509. static int export_dmabuf_common(struct hl_ctx *ctx,
  1510. struct hl_dmabuf_priv *hl_dmabuf,
  1511. u64 total_size, int flags, int *dmabuf_fd)
  1512. {
  1513. DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
  1514. struct hl_device *hdev = ctx->hdev;
  1515. int rc, fd;
  1516. exp_info.ops = &habanalabs_dmabuf_ops;
  1517. exp_info.size = total_size;
  1518. exp_info.flags = flags;
  1519. exp_info.priv = hl_dmabuf;
  1520. hl_dmabuf->dmabuf = dma_buf_export(&exp_info);
  1521. if (IS_ERR(hl_dmabuf->dmabuf)) {
  1522. dev_err(hdev->dev, "failed to export dma-buf\n");
  1523. return PTR_ERR(hl_dmabuf->dmabuf);
  1524. }
  1525. fd = dma_buf_fd(hl_dmabuf->dmabuf, flags);
  1526. if (fd < 0) {
  1527. dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf\n");
  1528. rc = fd;
  1529. goto err_dma_buf_put;
  1530. }
  1531. hl_dmabuf->ctx = ctx;
  1532. hl_ctx_get(hl_dmabuf->ctx);
  1533. *dmabuf_fd = fd;
  1534. return 0;
  1535. err_dma_buf_put:
  1536. dma_buf_put(hl_dmabuf->dmabuf);
  1537. return rc;
  1538. }
  1539. /**
  1540. * export_dmabuf_from_addr() - export a dma-buf object for the given memory
  1541. * address and size.
  1542. * @ctx: pointer to the context structure.
  1543. * @device_addr: device memory physical address.
  1544. * @size: size of device memory.
  1545. * @flags: DMA-BUF file/FD flags.
  1546. * @dmabuf_fd: pointer to result FD that represents the dma-buf object.
  1547. *
  1548. * Create and export a dma-buf object for an existing memory allocation inside
  1549. * the device memory, and return a FD which is associated with the dma-buf
  1550. * object.
  1551. *
  1552. * Return: 0 on success, non-zero for failure.
  1553. */
  1554. static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 device_addr,
  1555. u64 size, int flags, int *dmabuf_fd)
  1556. {
  1557. struct hl_dmabuf_priv *hl_dmabuf;
  1558. struct hl_device *hdev = ctx->hdev;
  1559. struct asic_fixed_properties *prop;
  1560. u64 bar_address;
  1561. int rc;
  1562. prop = &hdev->asic_prop;
  1563. if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
  1564. dev_dbg(hdev->dev,
  1565. "exported device memory address 0x%llx should be aligned to 0x%lx\n",
  1566. device_addr, PAGE_SIZE);
  1567. return -EINVAL;
  1568. }
  1569. if (size < PAGE_SIZE) {
  1570. dev_dbg(hdev->dev,
  1571. "exported device memory size %llu should be equal to or greater than %lu\n",
  1572. size, PAGE_SIZE);
  1573. return -EINVAL;
  1574. }
  1575. if (device_addr < prop->dram_user_base_address ||
  1576. device_addr + size > prop->dram_end_address ||
  1577. device_addr + size < device_addr) {
  1578. dev_dbg(hdev->dev,
  1579. "DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
  1580. device_addr, size);
  1581. return -EINVAL;
  1582. }
  1583. bar_address = hdev->dram_pci_bar_start +
  1584. (device_addr - prop->dram_base_address);
  1585. if (bar_address + size >
  1586. hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
  1587. bar_address + size < bar_address) {
  1588. dev_dbg(hdev->dev,
  1589. "DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n",
  1590. device_addr, size);
  1591. return -EINVAL;
  1592. }
  1593. hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
  1594. if (!hl_dmabuf)
  1595. return -ENOMEM;
  1596. hl_dmabuf->device_address = device_addr;
  1597. rc = export_dmabuf_common(ctx, hl_dmabuf, size, flags, dmabuf_fd);
  1598. if (rc)
  1599. goto err_free_dmabuf_wrapper;
  1600. return 0;
  1601. err_free_dmabuf_wrapper:
  1602. kfree(hl_dmabuf);
  1603. return rc;
  1604. }
  1605. /**
  1606. * export_dmabuf_from_handle() - export a dma-buf object for the given memory
  1607. * handle.
  1608. * @ctx: pointer to the context structure.
  1609. * @handle: device memory allocation handle.
  1610. * @flags: DMA-BUF file/FD flags.
  1611. * @dmabuf_fd: pointer to result FD that represents the dma-buf object.
  1612. *
  1613. * Create and export a dma-buf object for an existing memory allocation inside
  1614. * the device memory, and return a FD which is associated with the dma-buf
  1615. * object.
  1616. *
  1617. * Return: 0 on success, non-zero for failure.
  1618. */
  1619. static int export_dmabuf_from_handle(struct hl_ctx *ctx, u64 handle, int flags,
  1620. int *dmabuf_fd)
  1621. {
  1622. struct hl_vm_phys_pg_pack *phys_pg_pack;
  1623. struct hl_dmabuf_priv *hl_dmabuf;
  1624. struct hl_device *hdev = ctx->hdev;
  1625. struct asic_fixed_properties *prop;
  1626. struct hl_vm *vm = &hdev->vm;
  1627. u64 bar_address;
  1628. int rc, i;
  1629. prop = &hdev->asic_prop;
  1630. if (upper_32_bits(handle)) {
  1631. dev_dbg(hdev->dev, "no match for handle 0x%llx\n", handle);
  1632. return -EINVAL;
  1633. }
  1634. spin_lock(&vm->idr_lock);
  1635. phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) handle);
  1636. if (!phys_pg_pack) {
  1637. spin_unlock(&vm->idr_lock);
  1638. dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) handle);
  1639. return -EINVAL;
  1640. }
  1641. /* increment now to avoid freeing device memory while exporting */
  1642. phys_pg_pack->exporting_cnt++;
  1643. spin_unlock(&vm->idr_lock);
  1644. if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) {
  1645. dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", handle);
  1646. rc = -EINVAL;
  1647. goto err_dec_exporting_cnt;
  1648. }
  1649. for (i = 0 ; i < phys_pg_pack->npages ; i++) {
  1650. bar_address = hdev->dram_pci_bar_start +
  1651. (phys_pg_pack->pages[i] -
  1652. prop->dram_base_address);
  1653. if (bar_address + phys_pg_pack->page_size >
  1654. hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
  1655. bar_address + phys_pg_pack->page_size < bar_address) {
  1656. dev_dbg(hdev->dev,
  1657. "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
  1658. phys_pg_pack->pages[i],
  1659. phys_pg_pack->page_size);
  1660. rc = -EINVAL;
  1661. goto err_dec_exporting_cnt;
  1662. }
  1663. }
  1664. hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
  1665. if (!hl_dmabuf) {
  1666. rc = -ENOMEM;
  1667. goto err_dec_exporting_cnt;
  1668. }
  1669. hl_dmabuf->phys_pg_pack = phys_pg_pack;
  1670. rc = export_dmabuf_common(ctx, hl_dmabuf, phys_pg_pack->total_size,
  1671. flags, dmabuf_fd);
  1672. if (rc)
  1673. goto err_free_dmabuf_wrapper;
  1674. return 0;
  1675. err_free_dmabuf_wrapper:
  1676. kfree(hl_dmabuf);
  1677. err_dec_exporting_cnt:
  1678. spin_lock(&vm->idr_lock);
  1679. phys_pg_pack->exporting_cnt--;
  1680. spin_unlock(&vm->idr_lock);
  1681. return rc;
  1682. }
  1683. static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
  1684. {
  1685. struct hl_device *hdev = hpriv->hdev;
  1686. u64 block_handle, device_addr = 0;
  1687. struct hl_ctx *ctx = hpriv->ctx;
  1688. u32 handle = 0, block_size;
  1689. int rc;
  1690. switch (args->in.op) {
  1691. case HL_MEM_OP_ALLOC:
  1692. if (args->in.alloc.mem_size == 0) {
  1693. dev_err(hdev->dev, "alloc size must be larger than 0\n");
  1694. rc = -EINVAL;
  1695. goto out;
  1696. }
  1697. /* Force contiguous as there are no real MMU
  1698. * translations to overcome physical memory gaps
  1699. */
  1700. args->in.flags |= HL_MEM_CONTIGUOUS;
  1701. rc = alloc_device_memory(ctx, &args->in, &handle);
  1702. memset(args, 0, sizeof(*args));
  1703. args->out.handle = (__u64) handle;
  1704. break;
  1705. case HL_MEM_OP_FREE:
  1706. rc = free_device_memory(ctx, &args->in);
  1707. break;
  1708. case HL_MEM_OP_MAP:
  1709. if (args->in.flags & HL_MEM_USERPTR) {
  1710. dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n");
  1711. rc = -EPERM;
  1712. } else {
  1713. rc = get_paddr_from_handle(ctx, &args->in, &device_addr);
  1714. memset(args, 0, sizeof(*args));
  1715. args->out.device_virt_addr = device_addr;
  1716. }
  1717. break;
  1718. case HL_MEM_OP_UNMAP:
  1719. rc = 0;
  1720. break;
  1721. case HL_MEM_OP_MAP_BLOCK:
  1722. rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size);
  1723. args->out.block_handle = block_handle;
  1724. args->out.block_size = block_size;
  1725. break;
  1726. case HL_MEM_OP_EXPORT_DMABUF_FD:
  1727. dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n");
  1728. rc = -EPERM;
  1729. break;
  1730. case HL_MEM_OP_TS_ALLOC:
  1731. rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
  1732. break;
  1733. default:
  1734. dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
  1735. rc = -EINVAL;
  1736. break;
  1737. }
  1738. out:
  1739. return rc;
  1740. }
  1741. static void ts_buff_release(struct hl_mmap_mem_buf *buf)
  1742. {
  1743. struct hl_ts_buff *ts_buff = buf->private;
  1744. vfree(ts_buff->kernel_buff_address);
  1745. vfree(ts_buff->user_buff_address);
  1746. kfree(ts_buff);
  1747. }
  1748. static int hl_ts_mmap(struct hl_mmap_mem_buf *buf, struct vm_area_struct *vma, void *args)
  1749. {
  1750. struct hl_ts_buff *ts_buff = buf->private;
  1751. vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE);
  1752. return remap_vmalloc_range(vma, ts_buff->user_buff_address, 0);
  1753. }
  1754. static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
  1755. {
  1756. struct hl_ts_buff *ts_buff = NULL;
  1757. u32 num_elements;
  1758. size_t size;
  1759. void *p;
  1760. num_elements = *(u32 *)args;
  1761. ts_buff = kzalloc(sizeof(*ts_buff), gfp);
  1762. if (!ts_buff)
  1763. return -ENOMEM;
  1764. /* Allocate the user buffer */
  1765. size = num_elements * sizeof(u64);
  1766. p = vmalloc_user(size);
  1767. if (!p)
  1768. goto free_mem;
  1769. ts_buff->user_buff_address = p;
  1770. buf->mappable_size = size;
  1771. /* Allocate the internal kernel buffer */
  1772. size = num_elements * sizeof(struct hl_user_pending_interrupt);
  1773. p = vmalloc(size);
  1774. if (!p)
  1775. goto free_user_buff;
  1776. ts_buff->kernel_buff_address = p;
  1777. ts_buff->kernel_buff_size = size;
  1778. buf->private = ts_buff;
  1779. return 0;
  1780. free_user_buff:
  1781. vfree(ts_buff->user_buff_address);
  1782. free_mem:
  1783. kfree(ts_buff);
  1784. return -ENOMEM;
  1785. }
  1786. static struct hl_mmap_mem_buf_behavior hl_ts_behavior = {
  1787. .topic = "TS",
  1788. .mem_id = HL_MMAP_TYPE_TS_BUFF,
  1789. .mmap = hl_ts_mmap,
  1790. .alloc = hl_ts_alloc_buf,
  1791. .release = ts_buff_release,
  1792. };
  1793. /**
  1794. * allocate_timestamps_buffers() - allocate timestamps buffers
  1795. * This function will allocate ts buffer that will later on be mapped to the user
  1796. * in order to be able to read the timestamp.
  1797. * in additon it'll allocate an extra buffer for registration management.
  1798. * since we cannot fail during registration for out-of-memory situation, so
  1799. * we'll prepare a pool which will be used as user interrupt nodes and instead
  1800. * of dynamically allocating nodes while registration we'll pick the node from
  1801. * this pool. in addtion it'll add node to the mapping hash which will be used
  1802. * to map user ts buffer to the internal kernel ts buffer.
  1803. * @hpriv: pointer to the private data of the fd
  1804. * @args: ioctl input
  1805. * @handle: user timestamp buffer handle as an output
  1806. */
  1807. static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
  1808. {
  1809. struct hl_mem_mgr *mmg = &hpriv->mem_mgr;
  1810. struct hl_mmap_mem_buf *buf;
  1811. if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
  1812. dev_err(mmg->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
  1813. args->num_of_elements, TS_MAX_ELEMENTS_NUM);
  1814. return -EINVAL;
  1815. }
  1816. buf = hl_mmap_mem_buf_alloc(mmg, &hl_ts_behavior, GFP_KERNEL, &args->num_of_elements);
  1817. if (!buf)
  1818. return -ENOMEM;
  1819. *handle = buf->handle;
  1820. return 0;
  1821. }
  1822. int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
  1823. {
  1824. enum hl_device_status status;
  1825. union hl_mem_args *args = data;
  1826. struct hl_device *hdev = hpriv->hdev;
  1827. struct hl_ctx *ctx = hpriv->ctx;
  1828. u64 block_handle, device_addr = 0;
  1829. u32 handle = 0, block_size;
  1830. int rc, dmabuf_fd = -EBADF;
  1831. if (!hl_device_operational(hdev, &status)) {
  1832. dev_warn_ratelimited(hdev->dev,
  1833. "Device is %s. Can't execute MEMORY IOCTL\n",
  1834. hdev->status[status]);
  1835. return -EBUSY;
  1836. }
  1837. if (!hdev->mmu_enable)
  1838. return mem_ioctl_no_mmu(hpriv, args);
  1839. switch (args->in.op) {
  1840. case HL_MEM_OP_ALLOC:
  1841. if (args->in.alloc.mem_size == 0) {
  1842. dev_err(hdev->dev,
  1843. "alloc size must be larger than 0\n");
  1844. rc = -EINVAL;
  1845. goto out;
  1846. }
  1847. /* If DRAM does not support virtual memory the driver won't
  1848. * handle the allocation/freeing of that memory. However, for
  1849. * system administration/monitoring purposes, the driver will
  1850. * keep track of the amount of DRAM memory that is allocated
  1851. * and freed by the user. Because this code totally relies on
  1852. * the user's input, the driver can't ensure the validity
  1853. * of this accounting.
  1854. */
  1855. if (!hdev->asic_prop.dram_supports_virtual_memory) {
  1856. atomic64_add(args->in.alloc.mem_size,
  1857. &ctx->dram_phys_mem);
  1858. atomic64_add(args->in.alloc.mem_size,
  1859. &hdev->dram_used_mem);
  1860. dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
  1861. rc = 0;
  1862. memset(args, 0, sizeof(*args));
  1863. args->out.handle = 0;
  1864. goto out;
  1865. }
  1866. rc = alloc_device_memory(ctx, &args->in, &handle);
  1867. memset(args, 0, sizeof(*args));
  1868. args->out.handle = (__u64) handle;
  1869. break;
  1870. case HL_MEM_OP_FREE:
  1871. /* If DRAM does not support virtual memory the driver won't
  1872. * handle the allocation/freeing of that memory. However, for
  1873. * system administration/monitoring purposes, the driver will
  1874. * keep track of the amount of DRAM memory that is allocated
  1875. * and freed by the user. Because this code totally relies on
  1876. * the user's input, the driver can't ensure the validity
  1877. * of this accounting.
  1878. */
  1879. if (!hdev->asic_prop.dram_supports_virtual_memory) {
  1880. atomic64_sub(args->in.alloc.mem_size,
  1881. &ctx->dram_phys_mem);
  1882. atomic64_sub(args->in.alloc.mem_size,
  1883. &hdev->dram_used_mem);
  1884. dev_dbg(hdev->dev, "DRAM alloc is not supported\n");
  1885. rc = 0;
  1886. goto out;
  1887. }
  1888. rc = free_device_memory(ctx, &args->in);
  1889. break;
  1890. case HL_MEM_OP_MAP:
  1891. rc = map_device_va(ctx, &args->in, &device_addr);
  1892. memset(args, 0, sizeof(*args));
  1893. args->out.device_virt_addr = device_addr;
  1894. break;
  1895. case HL_MEM_OP_UNMAP:
  1896. rc = unmap_device_va(ctx, &args->in, false);
  1897. break;
  1898. case HL_MEM_OP_MAP_BLOCK:
  1899. rc = map_block(hdev, args->in.map_block.block_addr,
  1900. &block_handle, &block_size);
  1901. args->out.block_handle = block_handle;
  1902. args->out.block_size = block_size;
  1903. break;
  1904. case HL_MEM_OP_EXPORT_DMABUF_FD:
  1905. if (hdev->asic_prop.dram_supports_virtual_memory)
  1906. rc = export_dmabuf_from_handle(ctx,
  1907. args->in.export_dmabuf_fd.handle,
  1908. args->in.flags,
  1909. &dmabuf_fd);
  1910. else
  1911. rc = export_dmabuf_from_addr(ctx,
  1912. args->in.export_dmabuf_fd.handle,
  1913. args->in.export_dmabuf_fd.mem_size,
  1914. args->in.flags,
  1915. &dmabuf_fd);
  1916. memset(args, 0, sizeof(*args));
  1917. args->out.fd = dmabuf_fd;
  1918. break;
  1919. case HL_MEM_OP_TS_ALLOC:
  1920. rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
  1921. break;
  1922. default:
  1923. dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
  1924. rc = -EINVAL;
  1925. break;
  1926. }
  1927. out:
  1928. return rc;
  1929. }
  1930. static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
  1931. u32 npages, u64 start, u32 offset,
  1932. struct hl_userptr *userptr)
  1933. {
  1934. int rc;
  1935. if (!access_ok((void __user *) (uintptr_t) addr, size)) {
  1936. dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
  1937. return -EFAULT;
  1938. }
  1939. userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
  1940. if (!userptr->pages)
  1941. return -ENOMEM;
  1942. rc = pin_user_pages_fast(start, npages,
  1943. FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
  1944. userptr->pages);
  1945. if (rc != npages) {
  1946. dev_err(hdev->dev,
  1947. "Failed (%d) to pin host memory with user ptr 0x%llx, size 0x%llx, npages %d\n",
  1948. rc, addr, size, npages);
  1949. if (rc < 0)
  1950. goto destroy_pages;
  1951. npages = rc;
  1952. rc = -EFAULT;
  1953. goto put_pages;
  1954. }
  1955. userptr->npages = npages;
  1956. rc = sg_alloc_table_from_pages(userptr->sgt,
  1957. userptr->pages,
  1958. npages, offset, size, GFP_KERNEL);
  1959. if (rc < 0) {
  1960. dev_err(hdev->dev, "failed to create SG table from pages\n");
  1961. goto put_pages;
  1962. }
  1963. return 0;
  1964. put_pages:
  1965. unpin_user_pages(userptr->pages, npages);
  1966. destroy_pages:
  1967. kvfree(userptr->pages);
  1968. return rc;
  1969. }
  1970. /**
  1971. * hl_pin_host_memory() - pins a chunk of host memory.
  1972. * @hdev: pointer to the habanalabs device structure.
  1973. * @addr: the host virtual address of the memory area.
  1974. * @size: the size of the memory area.
  1975. * @userptr: pointer to hl_userptr structure.
  1976. *
  1977. * This function does the following:
  1978. * - Pins the physical pages.
  1979. * - Create an SG list from those pages.
  1980. */
  1981. int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
  1982. struct hl_userptr *userptr)
  1983. {
  1984. u64 start, end;
  1985. u32 npages, offset;
  1986. int rc;
  1987. if (!size) {
  1988. dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
  1989. return -EINVAL;
  1990. }
  1991. /*
  1992. * If the combination of the address and size requested for this memory
  1993. * region causes an integer overflow, return error.
  1994. */
  1995. if (((addr + size) < addr) ||
  1996. PAGE_ALIGN(addr + size) < (addr + size)) {
  1997. dev_err(hdev->dev,
  1998. "user pointer 0x%llx + %llu causes integer overflow\n",
  1999. addr, size);
  2000. return -EINVAL;
  2001. }
  2002. userptr->pid = current->pid;
  2003. userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
  2004. if (!userptr->sgt)
  2005. return -ENOMEM;
  2006. start = addr & PAGE_MASK;
  2007. offset = addr & ~PAGE_MASK;
  2008. end = PAGE_ALIGN(addr + size);
  2009. npages = (end - start) >> PAGE_SHIFT;
  2010. userptr->size = size;
  2011. userptr->addr = addr;
  2012. userptr->dma_mapped = false;
  2013. INIT_LIST_HEAD(&userptr->job_node);
  2014. rc = get_user_memory(hdev, addr, size, npages, start, offset,
  2015. userptr);
  2016. if (rc) {
  2017. dev_err(hdev->dev,
  2018. "failed to get user memory for address 0x%llx\n",
  2019. addr);
  2020. goto free_sgt;
  2021. }
  2022. hl_debugfs_add_userptr(hdev, userptr);
  2023. return 0;
  2024. free_sgt:
  2025. kfree(userptr->sgt);
  2026. return rc;
  2027. }
  2028. /*
  2029. * hl_unpin_host_memory - unpins a chunk of host memory.
  2030. * @hdev: pointer to the habanalabs device structure
  2031. * @userptr: pointer to hl_userptr structure
  2032. *
  2033. * This function does the following:
  2034. * - Unpins the physical pages related to the host memory
  2035. * - Free the SG list
  2036. */
  2037. void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
  2038. {
  2039. hl_debugfs_remove_userptr(hdev, userptr);
  2040. if (userptr->dma_mapped)
  2041. hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
  2042. unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
  2043. kvfree(userptr->pages);
  2044. list_del(&userptr->job_node);
  2045. sg_free_table(userptr->sgt);
  2046. kfree(userptr->sgt);
  2047. }
  2048. /**
  2049. * hl_userptr_delete_list() - clear userptr list.
  2050. * @hdev: pointer to the habanalabs device structure.
  2051. * @userptr_list: pointer to the list to clear.
  2052. *
  2053. * This function does the following:
  2054. * - Iterates over the list and unpins the host memory and frees the userptr
  2055. * structure.
  2056. */
  2057. void hl_userptr_delete_list(struct hl_device *hdev,
  2058. struct list_head *userptr_list)
  2059. {
  2060. struct hl_userptr *userptr, *tmp;
  2061. list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
  2062. hl_unpin_host_memory(hdev, userptr);
  2063. kfree(userptr);
  2064. }
  2065. INIT_LIST_HEAD(userptr_list);
  2066. }
  2067. /**
  2068. * hl_userptr_is_pinned() - returns whether the given userptr is pinned.
  2069. * @hdev: pointer to the habanalabs device structure.
  2070. * @addr: user address to check.
  2071. * @size: user block size to check.
  2072. * @userptr_list: pointer to the list to clear.
  2073. * @userptr: pointer to userptr to check.
  2074. *
  2075. * This function does the following:
  2076. * - Iterates over the list and checks if the given userptr is in it, means is
  2077. * pinned. If so, returns true, otherwise returns false.
  2078. */
  2079. bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
  2080. u32 size, struct list_head *userptr_list,
  2081. struct hl_userptr **userptr)
  2082. {
  2083. list_for_each_entry((*userptr), userptr_list, job_node) {
  2084. if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
  2085. return true;
  2086. }
  2087. return false;
  2088. }
  2089. /**
  2090. * va_range_init() - initialize virtual addresses range.
  2091. * @hdev: pointer to the habanalabs device structure.
  2092. * @va_ranges: pointer to va_ranges array.
  2093. * @range_type: virtual address range type.
  2094. * @start: range start address, inclusive.
  2095. * @end: range end address, inclusive.
  2096. * @page_size: page size for this va_range.
  2097. *
  2098. * This function does the following:
  2099. * - Initializes the virtual addresses list of the given range with the given
  2100. * addresses.
  2101. */
  2102. static int va_range_init(struct hl_device *hdev, struct hl_va_range **va_ranges,
  2103. enum hl_va_range_type range_type, u64 start,
  2104. u64 end, u32 page_size)
  2105. {
  2106. struct hl_va_range *va_range = va_ranges[range_type];
  2107. int rc;
  2108. INIT_LIST_HEAD(&va_range->list);
  2109. /*
  2110. * PAGE_SIZE alignment
  2111. * it is the callers responsibility to align the addresses if the
  2112. * page size is not a power of 2
  2113. */
  2114. if (is_power_of_2(page_size)) {
  2115. if (start & (PAGE_SIZE - 1)) {
  2116. start &= PAGE_MASK;
  2117. start += PAGE_SIZE;
  2118. }
  2119. /*
  2120. * The end of the range is inclusive, hence we need to align it
  2121. * to the end of the last full page in the range. For example if
  2122. * end = 0x3ff5 with page size 0x1000, we need to align it to
  2123. * 0x2fff. The remainig 0xff5 bytes do not form a full page.
  2124. */
  2125. if ((end + 1) & (PAGE_SIZE - 1))
  2126. end = ((end + 1) & PAGE_MASK) - 1;
  2127. }
  2128. if (start >= end) {
  2129. dev_err(hdev->dev, "too small vm range for va list\n");
  2130. return -EFAULT;
  2131. }
  2132. rc = add_va_block(hdev, va_range, start, end);
  2133. if (rc) {
  2134. dev_err(hdev->dev, "Failed to init host va list\n");
  2135. return rc;
  2136. }
  2137. va_range->start_addr = start;
  2138. va_range->end_addr = end;
  2139. va_range->page_size = page_size;
  2140. return 0;
  2141. }
  2142. /**
  2143. * va_range_fini() - clear a virtual addresses range.
  2144. * @hdev: pointer to the habanalabs structure.
  2145. * @va_range: pointer to virtual addresses range.
  2146. *
  2147. * This function does the following:
  2148. * - Frees the virtual addresses block list and its lock.
  2149. */
  2150. static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
  2151. {
  2152. mutex_lock(&va_range->lock);
  2153. clear_va_list_locked(hdev, &va_range->list);
  2154. mutex_unlock(&va_range->lock);
  2155. mutex_destroy(&va_range->lock);
  2156. kfree(va_range);
  2157. }
  2158. /**
  2159. * vm_ctx_init_with_ranges() - initialize virtual memory for context.
  2160. * @ctx: pointer to the habanalabs context structure.
  2161. * @host_range_start: host virtual addresses range start.
  2162. * @host_range_end: host virtual addresses range end.
  2163. * @host_page_size: host page size.
  2164. * @host_huge_range_start: host virtual addresses range start for memory
  2165. * allocated with huge pages.
  2166. * @host_huge_range_end: host virtual addresses range end for memory allocated
  2167. * with huge pages.
  2168. * @host_huge_page_size: host huge page size.
  2169. * @dram_range_start: dram virtual addresses range start.
  2170. * @dram_range_end: dram virtual addresses range end.
  2171. * @dram_page_size: dram page size.
  2172. *
  2173. * This function initializes the following:
  2174. * - MMU for context.
  2175. * - Virtual address to area descriptor hashtable.
  2176. * - Virtual block list of available virtual memory.
  2177. */
  2178. static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
  2179. u64 host_range_start,
  2180. u64 host_range_end,
  2181. u32 host_page_size,
  2182. u64 host_huge_range_start,
  2183. u64 host_huge_range_end,
  2184. u32 host_huge_page_size,
  2185. u64 dram_range_start,
  2186. u64 dram_range_end,
  2187. u32 dram_page_size)
  2188. {
  2189. struct hl_device *hdev = ctx->hdev;
  2190. int i, rc;
  2191. for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++) {
  2192. ctx->va_range[i] =
  2193. kzalloc(sizeof(struct hl_va_range), GFP_KERNEL);
  2194. if (!ctx->va_range[i]) {
  2195. rc = -ENOMEM;
  2196. goto free_va_range;
  2197. }
  2198. }
  2199. rc = hl_mmu_ctx_init(ctx);
  2200. if (rc) {
  2201. dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
  2202. goto free_va_range;
  2203. }
  2204. mutex_init(&ctx->mem_hash_lock);
  2205. hash_init(ctx->mem_hash);
  2206. mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
  2207. rc = va_range_init(hdev, ctx->va_range, HL_VA_RANGE_TYPE_HOST,
  2208. host_range_start, host_range_end, host_page_size);
  2209. if (rc) {
  2210. dev_err(hdev->dev, "failed to init host vm range\n");
  2211. goto mmu_ctx_fini;
  2212. }
  2213. if (hdev->pmmu_huge_range) {
  2214. mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
  2215. rc = va_range_init(hdev,
  2216. ctx->va_range, HL_VA_RANGE_TYPE_HOST_HUGE,
  2217. host_huge_range_start, host_huge_range_end,
  2218. host_huge_page_size);
  2219. if (rc) {
  2220. dev_err(hdev->dev,
  2221. "failed to init host huge vm range\n");
  2222. goto clear_host_va_range;
  2223. }
  2224. } else {
  2225. kfree(ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
  2226. ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE] =
  2227. ctx->va_range[HL_VA_RANGE_TYPE_HOST];
  2228. }
  2229. mutex_init(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
  2230. rc = va_range_init(hdev, ctx->va_range, HL_VA_RANGE_TYPE_DRAM,
  2231. dram_range_start, dram_range_end, dram_page_size);
  2232. if (rc) {
  2233. dev_err(hdev->dev, "failed to init dram vm range\n");
  2234. goto clear_host_huge_va_range;
  2235. }
  2236. hl_debugfs_add_ctx_mem_hash(hdev, ctx);
  2237. return 0;
  2238. clear_host_huge_va_range:
  2239. mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_DRAM]->lock);
  2240. if (hdev->pmmu_huge_range) {
  2241. mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
  2242. clear_va_list_locked(hdev,
  2243. &ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->list);
  2244. mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
  2245. }
  2246. clear_host_va_range:
  2247. if (hdev->pmmu_huge_range)
  2248. mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]->lock);
  2249. mutex_lock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
  2250. clear_va_list_locked(hdev, &ctx->va_range[HL_VA_RANGE_TYPE_HOST]->list);
  2251. mutex_unlock(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
  2252. mmu_ctx_fini:
  2253. mutex_destroy(&ctx->va_range[HL_VA_RANGE_TYPE_HOST]->lock);
  2254. mutex_destroy(&ctx->mem_hash_lock);
  2255. hl_mmu_ctx_fini(ctx);
  2256. free_va_range:
  2257. for (i = 0 ; i < HL_VA_RANGE_TYPE_MAX ; i++)
  2258. kfree(ctx->va_range[i]);
  2259. return rc;
  2260. }
  2261. int hl_vm_ctx_init(struct hl_ctx *ctx)
  2262. {
  2263. struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
  2264. u64 host_range_start, host_range_end, host_huge_range_start,
  2265. host_huge_range_end, dram_range_start, dram_range_end;
  2266. u32 host_page_size, host_huge_page_size, dram_page_size;
  2267. atomic64_set(&ctx->dram_phys_mem, 0);
  2268. /*
  2269. * - If MMU is enabled, init the ranges as usual.
  2270. * - If MMU is disabled, in case of host mapping, the returned address
  2271. * is the given one.
  2272. * In case of DRAM mapping, the returned address is the physical
  2273. * address of the memory related to the given handle.
  2274. */
  2275. if (!ctx->hdev->mmu_enable)
  2276. return 0;
  2277. dram_range_start = prop->dmmu.start_addr;
  2278. dram_range_end = prop->dmmu.end_addr - 1;
  2279. dram_page_size = prop->dram_page_size ?
  2280. prop->dram_page_size : prop->dmmu.page_size;
  2281. host_range_start = prop->pmmu.start_addr;
  2282. host_range_end = prop->pmmu.end_addr - 1;
  2283. host_page_size = prop->pmmu.page_size;
  2284. host_huge_range_start = prop->pmmu_huge.start_addr;
  2285. host_huge_range_end = prop->pmmu_huge.end_addr - 1;
  2286. host_huge_page_size = prop->pmmu_huge.page_size;
  2287. return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
  2288. host_page_size, host_huge_range_start,
  2289. host_huge_range_end, host_huge_page_size,
  2290. dram_range_start, dram_range_end, dram_page_size);
  2291. }
  2292. /**
  2293. * hl_vm_ctx_fini() - virtual memory teardown of context.
  2294. * @ctx: pointer to the habanalabs context structure.
  2295. *
  2296. * This function perform teardown the following:
  2297. * - Virtual block list of available virtual memory.
  2298. * - Virtual address to area descriptor hashtable.
  2299. * - MMU for context.
  2300. *
  2301. * In addition this function does the following:
  2302. * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
  2303. * hashtable should be empty as no valid mappings should exist at this
  2304. * point.
  2305. * - Frees any existing physical page list from the idr which relates to the
  2306. * current context asid.
  2307. * - This function checks the virtual block list for correctness. At this point
  2308. * the list should contain one element which describes the whole virtual
  2309. * memory range of the context. Otherwise, a warning is printed.
  2310. */
  2311. void hl_vm_ctx_fini(struct hl_ctx *ctx)
  2312. {
  2313. struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node;
  2314. struct hl_device *hdev = ctx->hdev;
  2315. struct hl_vm_hash_node *hnode;
  2316. struct hl_vm *vm = &hdev->vm;
  2317. struct hlist_node *tmp_node;
  2318. struct list_head free_list;
  2319. struct hl_mem_in args;
  2320. int i;
  2321. if (!hdev->mmu_enable)
  2322. return;
  2323. hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
  2324. /*
  2325. * Clearly something went wrong on hard reset so no point in printing
  2326. * another side effect error
  2327. */
  2328. if (!hdev->reset_info.hard_reset_pending && !hash_empty(ctx->mem_hash))
  2329. dev_dbg(hdev->dev,
  2330. "user released device without removing its memory mappings\n");
  2331. hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
  2332. dev_dbg(hdev->dev,
  2333. "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
  2334. hnode->vaddr, ctx->asid);
  2335. args.unmap.device_virt_addr = hnode->vaddr;
  2336. unmap_device_va(ctx, &args, true);
  2337. }
  2338. mutex_lock(&hdev->mmu_lock);
  2339. /* invalidate the cache once after the unmapping loop */
  2340. hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
  2341. hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK);
  2342. mutex_unlock(&hdev->mmu_lock);
  2343. INIT_LIST_HEAD(&free_list);
  2344. spin_lock(&vm->idr_lock);
  2345. idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
  2346. if (phys_pg_list->asid == ctx->asid) {
  2347. dev_dbg(hdev->dev,
  2348. "page list 0x%px of asid %d is still alive\n",
  2349. phys_pg_list, ctx->asid);
  2350. atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem);
  2351. idr_remove(&vm->phys_pg_pack_handles, i);
  2352. list_add(&phys_pg_list->node, &free_list);
  2353. }
  2354. spin_unlock(&vm->idr_lock);
  2355. list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node)
  2356. free_phys_pg_pack(hdev, phys_pg_list);
  2357. va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
  2358. va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);
  2359. if (hdev->pmmu_huge_range)
  2360. va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]);
  2361. mutex_destroy(&ctx->mem_hash_lock);
  2362. hl_mmu_ctx_fini(ctx);
  2363. /* In this case we need to clear the global accounting of DRAM usage
  2364. * because the user notifies us on allocations. If the user is no more,
  2365. * all DRAM is available
  2366. */
  2367. if (ctx->asid != HL_KERNEL_ASID_ID &&
  2368. !hdev->asic_prop.dram_supports_virtual_memory)
  2369. atomic64_set(&hdev->dram_used_mem, 0);
  2370. }
  2371. /**
  2372. * hl_vm_init() - initialize virtual memory module.
  2373. * @hdev: pointer to the habanalabs device structure.
  2374. *
  2375. * This function initializes the following:
  2376. * - MMU module.
  2377. * - DRAM physical pages pool of 2MB.
  2378. * - Idr for device memory allocation handles.
  2379. */
  2380. int hl_vm_init(struct hl_device *hdev)
  2381. {
  2382. struct asic_fixed_properties *prop = &hdev->asic_prop;
  2383. struct hl_vm *vm = &hdev->vm;
  2384. int rc;
  2385. if (is_power_of_2(prop->dram_page_size))
  2386. vm->dram_pg_pool =
  2387. gen_pool_create(__ffs(prop->dram_page_size), -1);
  2388. else
  2389. vm->dram_pg_pool =
  2390. gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
  2391. if (!vm->dram_pg_pool) {
  2392. dev_err(hdev->dev, "Failed to create dram page pool\n");
  2393. return -ENOMEM;
  2394. }
  2395. kref_init(&vm->dram_pg_pool_refcount);
  2396. rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
  2397. prop->dram_end_address - prop->dram_user_base_address,
  2398. -1);
  2399. if (rc) {
  2400. dev_err(hdev->dev,
  2401. "Failed to add memory to dram page pool %d\n", rc);
  2402. goto pool_add_err;
  2403. }
  2404. spin_lock_init(&vm->idr_lock);
  2405. idr_init(&vm->phys_pg_pack_handles);
  2406. atomic64_set(&hdev->dram_used_mem, 0);
  2407. vm->init_done = true;
  2408. return 0;
  2409. pool_add_err:
  2410. gen_pool_destroy(vm->dram_pg_pool);
  2411. return rc;
  2412. }
  2413. /**
  2414. * hl_vm_fini() - virtual memory module teardown.
  2415. * @hdev: pointer to the habanalabs device structure.
  2416. *
  2417. * This function perform teardown to the following:
  2418. * - Idr for device memory allocation handles.
  2419. * - DRAM physical pages pool of 2MB.
  2420. * - MMU module.
  2421. */
  2422. void hl_vm_fini(struct hl_device *hdev)
  2423. {
  2424. struct hl_vm *vm = &hdev->vm;
  2425. if (!vm->init_done)
  2426. return;
  2427. /*
  2428. * At this point all the contexts should be freed and hence no DRAM
  2429. * memory should be in use. Hence the DRAM pool should be freed here.
  2430. */
  2431. if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
  2432. dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
  2433. __func__);
  2434. vm->init_done = false;
  2435. }
  2436. /**
  2437. * hl_hw_block_mem_init() - HW block memory initialization.
  2438. * @ctx: pointer to the habanalabs context structure.
  2439. *
  2440. * This function initializes the HW block virtual mapped addresses list and
  2441. * it's lock.
  2442. */
  2443. void hl_hw_block_mem_init(struct hl_ctx *ctx)
  2444. {
  2445. mutex_init(&ctx->hw_block_list_lock);
  2446. INIT_LIST_HEAD(&ctx->hw_block_mem_list);
  2447. }
  2448. /**
  2449. * hl_hw_block_mem_fini() - HW block memory teardown.
  2450. * @ctx: pointer to the habanalabs context structure.
  2451. *
  2452. * This function clears the HW block virtual mapped addresses list and destroys
  2453. * it's lock.
  2454. */
  2455. void hl_hw_block_mem_fini(struct hl_ctx *ctx)
  2456. {
  2457. struct hl_vm_hw_block_list_node *lnode, *tmp;
  2458. if (!list_empty(&ctx->hw_block_mem_list))
  2459. dev_crit(ctx->hdev->dev, "HW block mem list isn't empty\n");
  2460. list_for_each_entry_safe(lnode, tmp, &ctx->hw_block_mem_list, node) {
  2461. list_del(&lnode->node);
  2462. kfree(lnode);
  2463. }
  2464. mutex_destroy(&ctx->hw_block_list_lock);
  2465. }