sgl.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright (c) 2019 HiSilicon Limited. */
  3. #include <linux/align.h>
  4. #include <linux/dma-mapping.h>
  5. #include <linux/hisi_acc_qm.h>
  6. #include <linux/module.h>
  7. #include <linux/slab.h>
  8. #define HISI_ACC_SGL_SGE_NR_MIN 1
  9. #define HISI_ACC_SGL_NR_MAX 256
  10. #define HISI_ACC_SGL_ALIGN_SIZE 64
  11. #define HISI_ACC_MEM_BLOCK_NR 5
  12. struct acc_hw_sge {
  13. dma_addr_t buf;
  14. void *page_ctrl;
  15. __le32 len;
  16. __le32 pad;
  17. __le32 pad0;
  18. __le32 pad1;
  19. };
  20. /* use default sgl head size 64B */
  21. struct hisi_acc_hw_sgl {
  22. dma_addr_t next_dma;
  23. __le16 entry_sum_in_chain;
  24. __le16 entry_sum_in_sgl;
  25. __le16 entry_length_in_sgl;
  26. __le16 pad0;
  27. __le64 pad1[5];
  28. struct hisi_acc_hw_sgl *next;
  29. struct acc_hw_sge sge_entries[];
  30. } __aligned(1);
  31. struct hisi_acc_sgl_pool {
  32. struct mem_block {
  33. struct hisi_acc_hw_sgl *sgl;
  34. dma_addr_t sgl_dma;
  35. size_t size;
  36. } mem_block[HISI_ACC_MEM_BLOCK_NR];
  37. u32 sgl_num_per_block;
  38. u32 block_num;
  39. u32 count;
  40. u32 sge_nr;
  41. size_t sgl_size;
  42. };
  43. /**
  44. * hisi_acc_create_sgl_pool() - Create a hw sgl pool.
  45. * @dev: The device which hw sgl pool belongs to.
  46. * @count: Count of hisi_acc_hw_sgl in pool.
  47. * @sge_nr: The count of sge in hw_sgl
  48. *
  49. * This function creates a hw sgl pool, after this user can get hw sgl memory
  50. * from it.
  51. */
  52. struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
  53. u32 count, u32 sge_nr)
  54. {
  55. u32 sgl_size, block_size, sgl_num_per_block, block_num, remain_sgl;
  56. struct hisi_acc_sgl_pool *pool;
  57. struct mem_block *block;
  58. u32 i, j;
  59. if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
  60. return ERR_PTR(-EINVAL);
  61. sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr +
  62. sizeof(struct hisi_acc_hw_sgl),
  63. HISI_ACC_SGL_ALIGN_SIZE);
  64. /*
  65. * the pool may allocate a block of memory of size PAGE_SIZE * 2^(MAX_ORDER - 1),
  66. * block size may exceed 2^31 on ia64, so the max of block size is 2^31
  67. */
  68. block_size = 1 << (PAGE_SHIFT + MAX_ORDER <= 32 ?
  69. PAGE_SHIFT + MAX_ORDER - 1 : 31);
  70. sgl_num_per_block = block_size / sgl_size;
  71. block_num = count / sgl_num_per_block;
  72. remain_sgl = count % sgl_num_per_block;
  73. if ((!remain_sgl && block_num > HISI_ACC_MEM_BLOCK_NR) ||
  74. (remain_sgl > 0 && block_num > HISI_ACC_MEM_BLOCK_NR - 1))
  75. return ERR_PTR(-EINVAL);
  76. pool = kzalloc(sizeof(*pool), GFP_KERNEL);
  77. if (!pool)
  78. return ERR_PTR(-ENOMEM);
  79. block = pool->mem_block;
  80. for (i = 0; i < block_num; i++) {
  81. block[i].sgl = dma_alloc_coherent(dev, block_size,
  82. &block[i].sgl_dma,
  83. GFP_KERNEL);
  84. if (!block[i].sgl) {
  85. dev_err(dev, "Fail to allocate hw SG buffer!\n");
  86. goto err_free_mem;
  87. }
  88. block[i].size = block_size;
  89. }
  90. if (remain_sgl > 0) {
  91. block[i].sgl = dma_alloc_coherent(dev, remain_sgl * sgl_size,
  92. &block[i].sgl_dma,
  93. GFP_KERNEL);
  94. if (!block[i].sgl) {
  95. dev_err(dev, "Fail to allocate remained hw SG buffer!\n");
  96. goto err_free_mem;
  97. }
  98. block[i].size = remain_sgl * sgl_size;
  99. }
  100. pool->sgl_num_per_block = sgl_num_per_block;
  101. pool->block_num = remain_sgl ? block_num + 1 : block_num;
  102. pool->count = count;
  103. pool->sgl_size = sgl_size;
  104. pool->sge_nr = sge_nr;
  105. return pool;
  106. err_free_mem:
  107. for (j = 0; j < i; j++) {
  108. dma_free_coherent(dev, block_size, block[j].sgl,
  109. block[j].sgl_dma);
  110. }
  111. kfree_sensitive(pool);
  112. return ERR_PTR(-ENOMEM);
  113. }
  114. EXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
  115. /**
  116. * hisi_acc_free_sgl_pool() - Free a hw sgl pool.
  117. * @dev: The device which hw sgl pool belongs to.
  118. * @pool: Pointer of pool.
  119. *
  120. * This function frees memory of a hw sgl pool.
  121. */
  122. void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool)
  123. {
  124. struct mem_block *block;
  125. int i;
  126. if (!dev || !pool)
  127. return;
  128. block = pool->mem_block;
  129. for (i = 0; i < pool->block_num; i++)
  130. dma_free_coherent(dev, block[i].size, block[i].sgl,
  131. block[i].sgl_dma);
  132. kfree(pool);
  133. }
  134. EXPORT_SYMBOL_GPL(hisi_acc_free_sgl_pool);
  135. static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool,
  136. u32 index, dma_addr_t *hw_sgl_dma)
  137. {
  138. struct mem_block *block;
  139. u32 block_index, offset;
  140. if (!pool || !hw_sgl_dma || index >= pool->count)
  141. return ERR_PTR(-EINVAL);
  142. block = pool->mem_block;
  143. block_index = index / pool->sgl_num_per_block;
  144. offset = index % pool->sgl_num_per_block;
  145. *hw_sgl_dma = block[block_index].sgl_dma + pool->sgl_size * offset;
  146. return (void *)block[block_index].sgl + pool->sgl_size * offset;
  147. }
  148. static void sg_map_to_hw_sg(struct scatterlist *sgl,
  149. struct acc_hw_sge *hw_sge)
  150. {
  151. hw_sge->buf = sg_dma_address(sgl);
  152. hw_sge->len = cpu_to_le32(sg_dma_len(sgl));
  153. hw_sge->page_ctrl = sg_virt(sgl);
  154. }
  155. static void inc_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
  156. {
  157. u16 var = le16_to_cpu(hw_sgl->entry_sum_in_sgl);
  158. var++;
  159. hw_sgl->entry_sum_in_sgl = cpu_to_le16(var);
  160. }
  161. static void update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl *hw_sgl, u16 sum)
  162. {
  163. hw_sgl->entry_sum_in_chain = cpu_to_le16(sum);
  164. }
  165. static void clear_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
  166. {
  167. struct acc_hw_sge *hw_sge = hw_sgl->sge_entries;
  168. int i;
  169. for (i = 0; i < le16_to_cpu(hw_sgl->entry_sum_in_sgl); i++) {
  170. hw_sge[i].page_ctrl = NULL;
  171. hw_sge[i].buf = 0;
  172. hw_sge[i].len = 0;
  173. }
  174. }
  175. /**
  176. * hisi_acc_sg_buf_map_to_hw_sgl - Map a scatterlist to a hw sgl.
  177. * @dev: The device which hw sgl belongs to.
  178. * @sgl: Scatterlist which will be mapped to hw sgl.
  179. * @pool: Pool which hw sgl memory will be allocated in.
  180. * @index: Index of hisi_acc_hw_sgl in pool.
  181. * @hw_sgl_dma: The dma address of allocated hw sgl.
  182. *
  183. * This function builds hw sgl according input sgl, user can use hw_sgl_dma
  184. * as src/dst in its BD. Only support single hw sgl currently.
  185. */
  186. struct hisi_acc_hw_sgl *
  187. hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
  188. struct scatterlist *sgl,
  189. struct hisi_acc_sgl_pool *pool,
  190. u32 index, dma_addr_t *hw_sgl_dma)
  191. {
  192. struct hisi_acc_hw_sgl *curr_hw_sgl;
  193. dma_addr_t curr_sgl_dma = 0;
  194. struct acc_hw_sge *curr_hw_sge;
  195. struct scatterlist *sg;
  196. int i, sg_n, sg_n_mapped;
  197. if (!dev || !sgl || !pool || !hw_sgl_dma)
  198. return ERR_PTR(-EINVAL);
  199. sg_n = sg_nents(sgl);
  200. sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
  201. if (!sg_n_mapped) {
  202. dev_err(dev, "DMA mapping for SG error!\n");
  203. return ERR_PTR(-EINVAL);
  204. }
  205. if (sg_n_mapped > pool->sge_nr) {
  206. dev_err(dev, "the number of entries in input scatterlist is bigger than SGL pool setting.\n");
  207. return ERR_PTR(-EINVAL);
  208. }
  209. curr_hw_sgl = acc_get_sgl(pool, index, &curr_sgl_dma);
  210. if (IS_ERR(curr_hw_sgl)) {
  211. dev_err(dev, "Get SGL error!\n");
  212. dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
  213. return ERR_PTR(-ENOMEM);
  214. }
  215. curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr);
  216. curr_hw_sge = curr_hw_sgl->sge_entries;
  217. for_each_sg(sgl, sg, sg_n_mapped, i) {
  218. sg_map_to_hw_sg(sg, curr_hw_sge);
  219. inc_hw_sgl_sge(curr_hw_sgl);
  220. curr_hw_sge++;
  221. }
  222. update_hw_sgl_sum_sge(curr_hw_sgl, pool->sge_nr);
  223. *hw_sgl_dma = curr_sgl_dma;
  224. return curr_hw_sgl;
  225. }
  226. EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl);
  227. /**
  228. * hisi_acc_sg_buf_unmap() - Unmap allocated hw sgl.
  229. * @dev: The device which hw sgl belongs to.
  230. * @sgl: Related scatterlist.
  231. * @hw_sgl: Virtual address of hw sgl.
  232. *
  233. * This function unmaps allocated hw sgl.
  234. */
  235. void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
  236. struct hisi_acc_hw_sgl *hw_sgl)
  237. {
  238. if (!dev || !sgl || !hw_sgl)
  239. return;
  240. dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL);
  241. clear_hw_sgl_sge(hw_sgl);
  242. hw_sgl->entry_sum_in_chain = 0;
  243. hw_sgl->entry_sum_in_sgl = 0;
  244. hw_sgl->entry_length_in_sgl = 0;
  245. }
  246. EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_unmap);