pci-ioda-tce.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * TCE helpers for IODA PCI/PCIe on PowerNV platforms
  4. *
  5. * Copyright 2018 IBM Corp.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. */
  12. #include <linux/kernel.h>
  13. #include <linux/iommu.h>
  14. #include <asm/iommu.h>
  15. #include <asm/tce.h>
  16. #include "pci.h"
  17. unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
  18. {
  19. struct pci_controller *hose = phb->hose;
  20. struct device_node *dn = hose->dn;
  21. unsigned long mask = 0;
  22. int i, rc, count;
  23. u32 val;
  24. count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
  25. if (count <= 0) {
  26. mask = SZ_4K | SZ_64K;
  27. /* Add 16M for POWER8 by default */
  28. if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
  29. !cpu_has_feature(CPU_FTR_ARCH_300))
  30. mask |= SZ_16M | SZ_256M;
  31. return mask;
  32. }
  33. for (i = 0; i < count; i++) {
  34. rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
  35. i, &val);
  36. if (rc == 0)
  37. mask |= 1ULL << val;
  38. }
  39. return mask;
  40. }
  41. void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
  42. void *tce_mem, u64 tce_size,
  43. u64 dma_offset, unsigned int page_shift)
  44. {
  45. tbl->it_blocksize = 16;
  46. tbl->it_base = (unsigned long)tce_mem;
  47. tbl->it_page_shift = page_shift;
  48. tbl->it_offset = dma_offset >> tbl->it_page_shift;
  49. tbl->it_index = 0;
  50. tbl->it_size = tce_size >> 3;
  51. tbl->it_busno = 0;
  52. tbl->it_type = TCE_PCI;
  53. }
  54. static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
  55. {
  56. struct page *tce_mem = NULL;
  57. __be64 *addr;
  58. tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
  59. shift - PAGE_SHIFT);
  60. if (!tce_mem) {
  61. pr_err("Failed to allocate a TCE memory, level shift=%d\n",
  62. shift);
  63. return NULL;
  64. }
  65. addr = page_address(tce_mem);
  66. memset(addr, 0, 1UL << shift);
  67. return addr;
  68. }
  69. static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
  70. unsigned long size, unsigned int levels);
  71. static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
  72. {
  73. __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
  74. int level = tbl->it_indirect_levels;
  75. const long shift = ilog2(tbl->it_level_size);
  76. unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
  77. while (level) {
  78. int n = (idx & mask) >> (level * shift);
  79. unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
  80. if (!tce) {
  81. __be64 *tmp2;
  82. if (!alloc)
  83. return NULL;
  84. tmp2 = pnv_alloc_tce_level(tbl->it_nid,
  85. ilog2(tbl->it_level_size) + 3);
  86. if (!tmp2)
  87. return NULL;
  88. tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
  89. oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
  90. cpu_to_be64(tce)));
  91. if (oldtce) {
  92. pnv_pci_ioda2_table_do_free_pages(tmp2,
  93. ilog2(tbl->it_level_size) + 3, 1);
  94. tce = oldtce;
  95. }
  96. }
  97. tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
  98. idx &= ~mask;
  99. mask >>= shift;
  100. --level;
  101. }
  102. return tmp + idx;
  103. }
  104. int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
  105. unsigned long uaddr, enum dma_data_direction direction,
  106. unsigned long attrs)
  107. {
  108. u64 proto_tce = iommu_direction_to_tce_perm(direction);
  109. u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
  110. long i;
  111. if (proto_tce & TCE_PCI_WRITE)
  112. proto_tce |= TCE_PCI_READ;
  113. for (i = 0; i < npages; i++) {
  114. unsigned long newtce = proto_tce |
  115. ((rpn + i) << tbl->it_page_shift);
  116. unsigned long idx = index - tbl->it_offset + i;
  117. *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
  118. }
  119. return 0;
  120. }
  121. #ifdef CONFIG_IOMMU_API
  122. int pnv_tce_xchg(struct iommu_table *tbl, long index,
  123. unsigned long *hpa, enum dma_data_direction *direction)
  124. {
  125. u64 proto_tce = iommu_direction_to_tce_perm(*direction);
  126. unsigned long newtce = *hpa | proto_tce, oldtce;
  127. unsigned long idx = index - tbl->it_offset;
  128. __be64 *ptce = NULL;
  129. BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
  130. if (*direction == DMA_NONE) {
  131. ptce = pnv_tce(tbl, false, idx, false);
  132. if (!ptce) {
  133. *hpa = 0;
  134. return 0;
  135. }
  136. }
  137. if (!ptce) {
  138. ptce = pnv_tce(tbl, false, idx, true);
  139. if (!ptce)
  140. return -ENOMEM;
  141. }
  142. if (newtce & TCE_PCI_WRITE)
  143. newtce |= TCE_PCI_READ;
  144. oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
  145. *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
  146. *direction = iommu_tce_direction(oldtce);
  147. return 0;
  148. }
  149. __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
  150. {
  151. if (WARN_ON_ONCE(!tbl->it_userspace))
  152. return NULL;
  153. return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
  154. }
  155. #endif
  156. void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
  157. {
  158. long i;
  159. for (i = 0; i < npages; i++) {
  160. unsigned long idx = index - tbl->it_offset + i;
  161. __be64 *ptce = pnv_tce(tbl, false, idx, false);
  162. if (ptce)
  163. *ptce = cpu_to_be64(0);
  164. else
  165. /* Skip the rest of the level */
  166. i |= tbl->it_level_size - 1;
  167. }
  168. }
  169. unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
  170. {
  171. __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
  172. if (!ptce)
  173. return 0;
  174. return be64_to_cpu(*ptce);
  175. }
  176. static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
  177. unsigned long size, unsigned int levels)
  178. {
  179. const unsigned long addr_ul = (unsigned long) addr &
  180. ~(TCE_PCI_READ | TCE_PCI_WRITE);
  181. if (levels) {
  182. long i;
  183. u64 *tmp = (u64 *) addr_ul;
  184. for (i = 0; i < size; ++i) {
  185. unsigned long hpa = be64_to_cpu(tmp[i]);
  186. if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
  187. continue;
  188. pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
  189. levels - 1);
  190. }
  191. }
  192. free_pages(addr_ul, get_order(size << 3));
  193. }
  194. void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
  195. {
  196. const unsigned long size = tbl->it_indirect_levels ?
  197. tbl->it_level_size : tbl->it_size;
  198. if (!tbl->it_size)
  199. return;
  200. pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
  201. tbl->it_indirect_levels);
  202. if (tbl->it_userspace) {
  203. pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
  204. tbl->it_indirect_levels);
  205. }
  206. }
  207. static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
  208. unsigned int levels, unsigned long limit,
  209. unsigned long *current_offset, unsigned long *total_allocated)
  210. {
  211. __be64 *addr, *tmp;
  212. unsigned long allocated = 1UL << shift;
  213. unsigned int entries = 1UL << (shift - 3);
  214. long i;
  215. addr = pnv_alloc_tce_level(nid, shift);
  216. *total_allocated += allocated;
  217. --levels;
  218. if (!levels) {
  219. *current_offset += allocated;
  220. return addr;
  221. }
  222. for (i = 0; i < entries; ++i) {
  223. tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
  224. levels, limit, current_offset, total_allocated);
  225. if (!tmp)
  226. break;
  227. addr[i] = cpu_to_be64(__pa(tmp) |
  228. TCE_PCI_READ | TCE_PCI_WRITE);
  229. if (*current_offset >= limit)
  230. break;
  231. }
  232. return addr;
  233. }
  234. long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
  235. __u32 page_shift, __u64 window_size, __u32 levels,
  236. bool alloc_userspace_copy, struct iommu_table *tbl)
  237. {
  238. void *addr, *uas = NULL;
  239. unsigned long offset = 0, level_shift, total_allocated = 0;
  240. unsigned long total_allocated_uas = 0;
  241. const unsigned int window_shift = ilog2(window_size);
  242. unsigned int entries_shift = window_shift - page_shift;
  243. unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
  244. PAGE_SHIFT);
  245. const unsigned long tce_table_size = 1UL << table_shift;
  246. if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
  247. return -EINVAL;
  248. if (!is_power_of_2(window_size))
  249. return -EINVAL;
  250. /* Adjust direct table size from window_size and levels */
  251. entries_shift = (entries_shift + levels - 1) / levels;
  252. level_shift = entries_shift + 3;
  253. level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
  254. if ((level_shift - 3) * levels + page_shift >= 55)
  255. return -EINVAL;
  256. /* Allocate TCE table */
  257. addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
  258. 1, tce_table_size, &offset, &total_allocated);
  259. /* addr==NULL means that the first level allocation failed */
  260. if (!addr)
  261. return -ENOMEM;
  262. /*
  263. * First level was allocated but some lower level failed as
  264. * we did not allocate as much as we wanted,
  265. * release partially allocated table.
  266. */
  267. if (levels == 1 && offset < tce_table_size)
  268. goto free_tces_exit;
  269. /* Allocate userspace view of the TCE table */
  270. if (alloc_userspace_copy) {
  271. offset = 0;
  272. uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
  273. 1, tce_table_size, &offset,
  274. &total_allocated_uas);
  275. if (!uas)
  276. goto free_tces_exit;
  277. if (levels == 1 && (offset < tce_table_size ||
  278. total_allocated_uas != total_allocated))
  279. goto free_uas_exit;
  280. }
  281. /* Setup linux iommu table */
  282. pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
  283. page_shift);
  284. tbl->it_level_size = 1ULL << (level_shift - 3);
  285. tbl->it_indirect_levels = levels - 1;
  286. tbl->it_userspace = uas;
  287. tbl->it_nid = nid;
  288. pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
  289. window_size, tce_table_size, bus_offset, tbl->it_base,
  290. tbl->it_userspace, 1, levels);
  291. return 0;
  292. free_uas_exit:
  293. pnv_pci_ioda2_table_do_free_pages(uas,
  294. 1ULL << (level_shift - 3), levels - 1);
  295. free_tces_exit:
  296. pnv_pci_ioda2_table_do_free_pages(addr,
  297. 1ULL << (level_shift - 3), levels - 1);
  298. return -ENOMEM;
  299. }
  300. void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
  301. struct iommu_table_group *table_group)
  302. {
  303. long i;
  304. bool found;
  305. struct iommu_table_group_link *tgl;
  306. if (!tbl || !table_group)
  307. return;
  308. /* Remove link to a group from table's list of attached groups */
  309. found = false;
  310. rcu_read_lock();
  311. list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
  312. if (tgl->table_group == table_group) {
  313. list_del_rcu(&tgl->next);
  314. kfree_rcu(tgl, rcu);
  315. found = true;
  316. break;
  317. }
  318. }
  319. rcu_read_unlock();
  320. if (WARN_ON(!found))
  321. return;
  322. /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
  323. found = false;
  324. for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
  325. if (table_group->tables[i] == tbl) {
  326. iommu_tce_table_put(tbl);
  327. table_group->tables[i] = NULL;
  328. found = true;
  329. break;
  330. }
  331. }
  332. WARN_ON(!found);
  333. }
  334. long pnv_pci_link_table_and_group(int node, int num,
  335. struct iommu_table *tbl,
  336. struct iommu_table_group *table_group)
  337. {
  338. struct iommu_table_group_link *tgl = NULL;
  339. if (WARN_ON(!tbl || !table_group))
  340. return -EINVAL;
  341. tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
  342. node);
  343. if (!tgl)
  344. return -ENOMEM;
  345. tgl->table_group = table_group;
  346. list_add_rcu(&tgl->next, &tbl->it_group_list);
  347. table_group->tables[num] = iommu_tce_table_get(tbl);
  348. return 0;
  349. }