command_buffer.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright 2016-2019 HabanaLabs, Ltd.
  4. * All Rights Reserved.
  5. */
  6. #include <uapi/misc/habanalabs.h>
  7. #include "habanalabs.h"
  8. #include <linux/mm.h>
  9. #include <linux/slab.h>
  10. #include <linux/uaccess.h>
  11. #define CB_VA_POOL_SIZE (4UL * SZ_1G)
  12. static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
  13. {
  14. struct hl_device *hdev = ctx->hdev;
  15. struct asic_fixed_properties *prop = &hdev->asic_prop;
  16. u32 page_size = prop->pmmu.page_size;
  17. int rc;
  18. if (!hdev->supports_cb_mapping) {
  19. dev_err_ratelimited(hdev->dev,
  20. "Mapping a CB to the device's MMU is not supported\n");
  21. return -EINVAL;
  22. }
  23. if (!hdev->mmu_enable) {
  24. dev_err_ratelimited(hdev->dev,
  25. "Cannot map CB because MMU is disabled\n");
  26. return -EINVAL;
  27. }
  28. if (cb->is_mmu_mapped)
  29. return 0;
  30. cb->roundup_size = roundup(cb->size, page_size);
  31. cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
  32. if (!cb->virtual_addr) {
  33. dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
  34. return -ENOMEM;
  35. }
  36. mutex_lock(&hdev->mmu_lock);
  37. rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
  38. if (rc) {
  39. dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
  40. goto err_va_umap;
  41. }
  42. rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
  43. mutex_unlock(&hdev->mmu_lock);
  44. cb->is_mmu_mapped = true;
  45. return rc;
  46. err_va_umap:
  47. mutex_unlock(&hdev->mmu_lock);
  48. gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
  49. return rc;
  50. }
  51. static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
  52. {
  53. struct hl_device *hdev = ctx->hdev;
  54. mutex_lock(&hdev->mmu_lock);
  55. hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
  56. hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
  57. mutex_unlock(&hdev->mmu_lock);
  58. gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
  59. }
  60. static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
  61. {
  62. if (cb->is_internal)
  63. gen_pool_free(hdev->internal_cb_pool,
  64. (uintptr_t)cb->kernel_address, cb->size);
  65. else
  66. hl_asic_dma_free_coherent(hdev, cb->size, cb->kernel_address, cb->bus_address);
  67. kfree(cb);
  68. }
  69. static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
  70. {
  71. if (cb->is_pool) {
  72. spin_lock(&hdev->cb_pool_lock);
  73. list_add(&cb->pool_list, &hdev->cb_pool);
  74. spin_unlock(&hdev->cb_pool_lock);
  75. } else {
  76. cb_fini(hdev, cb);
  77. }
  78. }
  79. static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
  80. int ctx_id, bool internal_cb)
  81. {
  82. struct hl_cb *cb = NULL;
  83. u32 cb_offset;
  84. void *p;
  85. /*
  86. * We use of GFP_ATOMIC here because this function can be called from
  87. * the latency-sensitive code path for command submission. Due to H/W
  88. * limitations in some of the ASICs, the kernel must copy the user CB
  89. * that is designated for an external queue and actually enqueue
  90. * the kernel's copy. Hence, we must never sleep in this code section
  91. * and must use GFP_ATOMIC for all memory allocations.
  92. */
  93. if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled)
  94. cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
  95. if (!cb)
  96. cb = kzalloc(sizeof(*cb), GFP_KERNEL);
  97. if (!cb)
  98. return NULL;
  99. if (internal_cb) {
  100. p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
  101. if (!p) {
  102. kfree(cb);
  103. return NULL;
  104. }
  105. cb_offset = p - hdev->internal_cb_pool_virt_addr;
  106. cb->is_internal = true;
  107. cb->bus_address = hdev->internal_cb_va_base + cb_offset;
  108. } else if (ctx_id == HL_KERNEL_ASID_ID) {
  109. p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC);
  110. if (!p)
  111. p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_KERNEL);
  112. } else {
  113. p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address,
  114. GFP_USER | __GFP_ZERO);
  115. }
  116. if (!p) {
  117. dev_err(hdev->dev,
  118. "failed to allocate %d of dma memory for CB\n",
  119. cb_size);
  120. kfree(cb);
  121. return NULL;
  122. }
  123. cb->kernel_address = p;
  124. cb->size = cb_size;
  125. return cb;
  126. }
  127. struct hl_cb_mmap_mem_alloc_args {
  128. struct hl_device *hdev;
  129. struct hl_ctx *ctx;
  130. u32 cb_size;
  131. bool internal_cb;
  132. bool map_cb;
  133. };
  134. static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf)
  135. {
  136. struct hl_cb *cb = buf->private;
  137. hl_debugfs_remove_cb(cb);
  138. if (cb->is_mmu_mapped)
  139. cb_unmap_mem(cb->ctx, cb);
  140. hl_ctx_put(cb->ctx);
  141. cb_do_release(cb->hdev, cb);
  142. }
  143. static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
  144. {
  145. struct hl_cb_mmap_mem_alloc_args *cb_args = args;
  146. struct hl_cb *cb;
  147. int rc, ctx_id = cb_args->ctx->asid;
  148. bool alloc_new_cb = true;
  149. if (!cb_args->internal_cb) {
  150. /* Minimum allocation must be PAGE SIZE */
  151. if (cb_args->cb_size < PAGE_SIZE)
  152. cb_args->cb_size = PAGE_SIZE;
  153. if (ctx_id == HL_KERNEL_ASID_ID &&
  154. cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) {
  155. spin_lock(&cb_args->hdev->cb_pool_lock);
  156. if (!list_empty(&cb_args->hdev->cb_pool)) {
  157. cb = list_first_entry(&cb_args->hdev->cb_pool,
  158. typeof(*cb), pool_list);
  159. list_del(&cb->pool_list);
  160. spin_unlock(&cb_args->hdev->cb_pool_lock);
  161. alloc_new_cb = false;
  162. } else {
  163. spin_unlock(&cb_args->hdev->cb_pool_lock);
  164. dev_dbg(cb_args->hdev->dev, "CB pool is empty\n");
  165. }
  166. }
  167. }
  168. if (alloc_new_cb) {
  169. cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb);
  170. if (!cb)
  171. return -ENOMEM;
  172. }
  173. cb->hdev = cb_args->hdev;
  174. cb->ctx = cb_args->ctx;
  175. cb->buf = buf;
  176. cb->buf->mappable_size = cb->size;
  177. cb->buf->private = cb;
  178. hl_ctx_get(cb->ctx);
  179. if (cb_args->map_cb) {
  180. if (ctx_id == HL_KERNEL_ASID_ID) {
  181. dev_err(cb_args->hdev->dev,
  182. "CB mapping is not supported for kernel context\n");
  183. rc = -EINVAL;
  184. goto release_cb;
  185. }
  186. rc = cb_map_mem(cb_args->ctx, cb);
  187. if (rc)
  188. goto release_cb;
  189. }
  190. hl_debugfs_add_cb(cb);
  191. return 0;
  192. release_cb:
  193. hl_ctx_put(cb->ctx);
  194. cb_do_release(cb_args->hdev, cb);
  195. return rc;
  196. }
  197. static int hl_cb_mmap(struct hl_mmap_mem_buf *buf,
  198. struct vm_area_struct *vma, void *args)
  199. {
  200. struct hl_cb *cb = buf->private;
  201. return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address,
  202. cb->bus_address, cb->size);
  203. }
  204. static struct hl_mmap_mem_buf_behavior cb_behavior = {
  205. .topic = "CB",
  206. .mem_id = HL_MMAP_TYPE_CB,
  207. .alloc = hl_cb_mmap_mem_alloc,
  208. .release = hl_cb_mmap_mem_release,
  209. .mmap = hl_cb_mmap,
  210. };
  211. int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg,
  212. struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
  213. bool map_cb, u64 *handle)
  214. {
  215. struct hl_cb_mmap_mem_alloc_args args = {
  216. .hdev = hdev,
  217. .ctx = ctx,
  218. .cb_size = cb_size,
  219. .internal_cb = internal_cb,
  220. .map_cb = map_cb,
  221. };
  222. struct hl_mmap_mem_buf *buf;
  223. int ctx_id = ctx->asid;
  224. if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) {
  225. dev_warn_ratelimited(hdev->dev,
  226. "Device is disabled or in reset. Can't create new CBs\n");
  227. return -EBUSY;
  228. }
  229. if (cb_size > SZ_2M) {
  230. dev_err(hdev->dev, "CB size %d must be less than %d\n",
  231. cb_size, SZ_2M);
  232. return -EINVAL;
  233. }
  234. buf = hl_mmap_mem_buf_alloc(
  235. mmg, &cb_behavior,
  236. ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args);
  237. if (!buf)
  238. return -ENOMEM;
  239. *handle = buf->handle;
  240. return 0;
  241. }
  242. int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
  243. {
  244. int rc;
  245. rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle);
  246. if (rc < 0)
  247. return rc; /* Invalid handle */
  248. if (rc == 0)
  249. dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle);
  250. return 0;
  251. }
  252. static int hl_cb_info(struct hl_mem_mgr *mmg,
  253. u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
  254. {
  255. struct hl_cb *cb;
  256. int rc = 0;
  257. cb = hl_cb_get(mmg, handle);
  258. if (!cb) {
  259. dev_err(mmg->dev,
  260. "CB info failed, no match to handle 0x%llx\n", handle);
  261. return -EINVAL;
  262. }
  263. if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
  264. if (cb->is_mmu_mapped) {
  265. *device_va = cb->virtual_addr;
  266. } else {
  267. dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
  268. rc = -EINVAL;
  269. goto out;
  270. }
  271. } else {
  272. *usage_cnt = atomic_read(&cb->cs_cnt);
  273. }
  274. out:
  275. hl_cb_put(cb);
  276. return rc;
  277. }
  278. int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
  279. {
  280. union hl_cb_args *args = data;
  281. struct hl_device *hdev = hpriv->hdev;
  282. u64 handle = 0, device_va = 0;
  283. enum hl_device_status status;
  284. u32 usage_cnt = 0;
  285. int rc;
  286. if (!hl_device_operational(hdev, &status)) {
  287. dev_warn_ratelimited(hdev->dev,
  288. "Device is %s. Can't execute CB IOCTL\n",
  289. hdev->status[status]);
  290. return -EBUSY;
  291. }
  292. switch (args->in.op) {
  293. case HL_CB_OP_CREATE:
  294. if (args->in.cb_size > HL_MAX_CB_SIZE) {
  295. dev_err(hdev->dev,
  296. "User requested CB size %d must be less than %d\n",
  297. args->in.cb_size, HL_MAX_CB_SIZE);
  298. rc = -EINVAL;
  299. } else {
  300. rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx,
  301. args->in.cb_size, false,
  302. !!(args->in.flags & HL_CB_FLAGS_MAP),
  303. &handle);
  304. }
  305. memset(args, 0, sizeof(*args));
  306. args->out.cb_handle = handle;
  307. break;
  308. case HL_CB_OP_DESTROY:
  309. rc = hl_cb_destroy(&hpriv->mem_mgr,
  310. args->in.cb_handle);
  311. break;
  312. case HL_CB_OP_INFO:
  313. rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle,
  314. args->in.flags,
  315. &usage_cnt,
  316. &device_va);
  317. if (rc)
  318. break;
  319. memset(&args->out, 0, sizeof(args->out));
  320. if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA)
  321. args->out.device_va = device_va;
  322. else
  323. args->out.usage_cnt = usage_cnt;
  324. break;
  325. default:
  326. rc = -EINVAL;
  327. break;
  328. }
  329. return rc;
  330. }
  331. struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle)
  332. {
  333. struct hl_mmap_mem_buf *buf;
  334. buf = hl_mmap_mem_buf_get(mmg, handle);
  335. if (!buf)
  336. return NULL;
  337. return buf->private;
  338. }
  339. void hl_cb_put(struct hl_cb *cb)
  340. {
  341. hl_mmap_mem_buf_put(cb->buf);
  342. }
  343. struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
  344. bool internal_cb)
  345. {
  346. u64 cb_handle;
  347. struct hl_cb *cb;
  348. int rc;
  349. rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size,
  350. internal_cb, false, &cb_handle);
  351. if (rc) {
  352. dev_err(hdev->dev,
  353. "Failed to allocate CB for the kernel driver %d\n", rc);
  354. return NULL;
  355. }
  356. cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle);
  357. /* hl_cb_get should never fail here */
  358. if (!cb) {
  359. dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n",
  360. (u32) cb_handle);
  361. goto destroy_cb;
  362. }
  363. return cb;
  364. destroy_cb:
  365. hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle);
  366. return NULL;
  367. }
  368. int hl_cb_pool_init(struct hl_device *hdev)
  369. {
  370. struct hl_cb *cb;
  371. int i;
  372. INIT_LIST_HEAD(&hdev->cb_pool);
  373. spin_lock_init(&hdev->cb_pool_lock);
  374. for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
  375. cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
  376. HL_KERNEL_ASID_ID, false);
  377. if (cb) {
  378. cb->is_pool = true;
  379. list_add(&cb->pool_list, &hdev->cb_pool);
  380. } else {
  381. hl_cb_pool_fini(hdev);
  382. return -ENOMEM;
  383. }
  384. }
  385. return 0;
  386. }
  387. int hl_cb_pool_fini(struct hl_device *hdev)
  388. {
  389. struct hl_cb *cb, *tmp;
  390. list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) {
  391. list_del(&cb->pool_list);
  392. cb_fini(hdev, cb);
  393. }
  394. return 0;
  395. }
  396. int hl_cb_va_pool_init(struct hl_ctx *ctx)
  397. {
  398. struct hl_device *hdev = ctx->hdev;
  399. struct asic_fixed_properties *prop = &hdev->asic_prop;
  400. int rc;
  401. if (!hdev->supports_cb_mapping)
  402. return 0;
  403. ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1);
  404. if (!ctx->cb_va_pool) {
  405. dev_err(hdev->dev,
  406. "Failed to create VA gen pool for CB mapping\n");
  407. return -ENOMEM;
  408. }
  409. ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
  410. CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
  411. if (!ctx->cb_va_pool_base) {
  412. rc = -ENOMEM;
  413. goto err_pool_destroy;
  414. }
  415. rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
  416. if (rc) {
  417. dev_err(hdev->dev,
  418. "Failed to add memory to VA gen pool for CB mapping\n");
  419. goto err_unreserve_va_block;
  420. }
  421. return 0;
  422. err_unreserve_va_block:
  423. hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
  424. err_pool_destroy:
  425. gen_pool_destroy(ctx->cb_va_pool);
  426. return rc;
  427. }
  428. void hl_cb_va_pool_fini(struct hl_ctx *ctx)
  429. {
  430. struct hl_device *hdev = ctx->hdev;
  431. if (!hdev->supports_cb_mapping)
  432. return;
  433. gen_pool_destroy(ctx->cb_va_pool);
  434. hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
  435. }