ocxl.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. // SPDX-License-Identifier: GPL-2.0+
  2. // Copyright 2017 IBM Corp.
  3. #include <asm/pnv-ocxl.h>
  4. #include <asm/opal.h>
  5. #include <misc/ocxl-config.h>
  6. #include "pci.h"
  7. #define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full
  8. #define PNV_OCXL_ACTAG_MAX 64
  9. /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
  10. #define PNV_OCXL_PASID_BITS 15
  11. #define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)
  12. #define AFU_PRESENT (1 << 31)
  13. #define AFU_INDEX_MASK 0x3F000000
  14. #define AFU_INDEX_SHIFT 24
  15. #define ACTAG_MASK 0xFFF
  16. struct actag_range {
  17. u16 start;
  18. u16 count;
  19. };
  20. struct npu_link {
  21. struct list_head list;
  22. int domain;
  23. int bus;
  24. int dev;
  25. u16 fn_desired_actags[8];
  26. struct actag_range fn_actags[8];
  27. bool assignment_done;
  28. };
  29. static struct list_head links_list = LIST_HEAD_INIT(links_list);
  30. static DEFINE_MUTEX(links_list_lock);
  31. /*
  32. * opencapi actags handling:
  33. *
  34. * When sending commands, the opencapi device references the memory
  35. * context it's targeting with an 'actag', which is really an alias
  36. * for a (BDF, pasid) combination. When it receives a command, the NPU
  37. * must do a lookup of the actag to identify the memory context. The
  38. * hardware supports a finite number of actags per link (64 for
  39. * POWER9).
  40. *
  41. * The device can carry multiple functions, and each function can have
  42. * multiple AFUs. Each AFU advertises in its config space the number
  43. * of desired actags. The host must configure in the config space of
  44. * the AFU how many actags the AFU is really allowed to use (which can
  45. * be less than what the AFU desires).
  46. *
  47. * When a PCI function is probed by the driver, it has no visibility
  48. * about the other PCI functions and how many actags they'd like,
  49. * which makes it impossible to distribute actags fairly among AFUs.
  50. *
  51. * Unfortunately, the only way to know how many actags a function
  52. * desires is by looking at the data for each AFU in the config space
  53. * and add them up. Similarly, the only way to know how many actags
  54. * all the functions of the physical device desire is by adding the
  55. * previously computed function counts. Then we can match that against
  56. * what the hardware supports.
  57. *
  58. * To get a comprehensive view, we use a 'pci fixup': at the end of
  59. * PCI enumeration, each function counts how many actags its AFUs
  60. * desire and we save it in a 'npu_link' structure, shared between all
  61. * the PCI functions of a same device. Therefore, when the first
  62. * function is probed by the driver, we can get an idea of the total
  63. * count of desired actags for the device, and assign the actags to
  64. * the AFUs, by pro-rating if needed.
  65. */
  66. static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
  67. {
  68. int vsec = pos;
  69. u16 vendor, id;
  70. while ((vsec = pci_find_next_ext_capability(dev, vsec,
  71. OCXL_EXT_CAP_ID_DVSEC))) {
  72. pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
  73. &vendor);
  74. pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
  75. if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
  76. return vsec;
  77. }
  78. return 0;
  79. }
  80. static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
  81. {
  82. int vsec = 0;
  83. u8 idx;
  84. while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
  85. vsec))) {
  86. pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
  87. &idx);
  88. if (idx == afu_idx)
  89. return vsec;
  90. }
  91. return 0;
  92. }
  93. static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
  94. {
  95. int pos;
  96. u32 val;
  97. pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,
  98. OCXL_DVSEC_FUNC_ID);
  99. if (!pos)
  100. return -ESRCH;
  101. pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
  102. if (val & AFU_PRESENT)
  103. *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
  104. else
  105. *afu_idx = -1;
  106. return 0;
  107. }
  108. static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
  109. {
  110. int pos;
  111. u16 actag_sup;
  112. pos = find_dvsec_afu_ctrl(dev, afu_idx);
  113. if (!pos)
  114. return -ESRCH;
  115. pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
  116. &actag_sup);
  117. *actag = actag_sup & ACTAG_MASK;
  118. return 0;
  119. }
  120. static struct npu_link *find_link(struct pci_dev *dev)
  121. {
  122. struct npu_link *link;
  123. list_for_each_entry(link, &links_list, list) {
  124. /* The functions of a device all share the same link */
  125. if (link->domain == pci_domain_nr(dev->bus) &&
  126. link->bus == dev->bus->number &&
  127. link->dev == PCI_SLOT(dev->devfn)) {
  128. return link;
  129. }
  130. }
  131. /* link doesn't exist yet. Allocate one */
  132. link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
  133. if (!link)
  134. return NULL;
  135. link->domain = pci_domain_nr(dev->bus);
  136. link->bus = dev->bus->number;
  137. link->dev = PCI_SLOT(dev->devfn);
  138. list_add(&link->list, &links_list);
  139. return link;
  140. }
  141. static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
  142. {
  143. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  144. struct pnv_phb *phb = hose->private_data;
  145. struct npu_link *link;
  146. int rc, afu_idx = -1, i, actag;
  147. if (!machine_is(powernv))
  148. return;
  149. if (phb->type != PNV_PHB_NPU_OCAPI)
  150. return;
  151. mutex_lock(&links_list_lock);
  152. link = find_link(dev);
  153. if (!link) {
  154. dev_warn(&dev->dev, "couldn't update actag information\n");
  155. mutex_unlock(&links_list_lock);
  156. return;
  157. }
  158. /*
  159. * Check how many actags are desired for the AFUs under that
  160. * function and add it to the count for the link
  161. */
  162. rc = get_max_afu_index(dev, &afu_idx);
  163. if (rc) {
  164. /* Most likely an invalid config space */
  165. dev_dbg(&dev->dev, "couldn't find AFU information\n");
  166. afu_idx = -1;
  167. }
  168. link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
  169. for (i = 0; i <= afu_idx; i++) {
  170. /*
  171. * AFU index 'holes' are allowed. So don't fail if we
  172. * can't read the actag info for an index
  173. */
  174. rc = get_actag_count(dev, i, &actag);
  175. if (rc)
  176. continue;
  177. link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
  178. }
  179. dev_dbg(&dev->dev, "total actags for function: %d\n",
  180. link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
  181. mutex_unlock(&links_list_lock);
  182. }
  183. DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
  184. static u16 assign_fn_actags(u16 desired, u16 total)
  185. {
  186. u16 count;
  187. if (total <= PNV_OCXL_ACTAG_MAX)
  188. count = desired;
  189. else
  190. count = PNV_OCXL_ACTAG_MAX * desired / total;
  191. return count;
  192. }
  193. static void assign_actags(struct npu_link *link)
  194. {
  195. u16 actag_count, range_start = 0, total_desired = 0;
  196. int i;
  197. for (i = 0; i < 8; i++)
  198. total_desired += link->fn_desired_actags[i];
  199. for (i = 0; i < 8; i++) {
  200. if (link->fn_desired_actags[i]) {
  201. actag_count = assign_fn_actags(
  202. link->fn_desired_actags[i],
  203. total_desired);
  204. link->fn_actags[i].start = range_start;
  205. link->fn_actags[i].count = actag_count;
  206. range_start += actag_count;
  207. WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
  208. }
  209. pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
  210. link->domain, link->bus, link->dev, i,
  211. link->fn_actags[i].start, link->fn_actags[i].count,
  212. link->fn_desired_actags[i]);
  213. }
  214. link->assignment_done = true;
  215. }
  216. int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
  217. u16 *supported)
  218. {
  219. struct npu_link *link;
  220. mutex_lock(&links_list_lock);
  221. link = find_link(dev);
  222. if (!link) {
  223. dev_err(&dev->dev, "actag information not found\n");
  224. mutex_unlock(&links_list_lock);
  225. return -ENODEV;
  226. }
  227. /*
  228. * On p9, we only have 64 actags per link, so they must be
  229. * shared by all the functions of the same adapter. We counted
  230. * the desired actag counts during PCI enumeration, so that we
  231. * can allocate a pro-rated number of actags to each function.
  232. */
  233. if (!link->assignment_done)
  234. assign_actags(link);
  235. *base = link->fn_actags[PCI_FUNC(dev->devfn)].start;
  236. *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count;
  237. *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
  238. mutex_unlock(&links_list_lock);
  239. return 0;
  240. }
  241. EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
  242. int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
  243. {
  244. struct npu_link *link;
  245. int i, rc = -EINVAL;
  246. /*
  247. * The number of PASIDs (process address space ID) which can
  248. * be used by a function depends on how many functions exist
  249. * on the device. The NPU needs to be configured to know how
  250. * many bits are available to PASIDs and how many are to be
  251. * used by the function BDF identifier.
  252. *
  253. * We only support one AFU-carrying function for now.
  254. */
  255. mutex_lock(&links_list_lock);
  256. link = find_link(dev);
  257. if (!link) {
  258. dev_err(&dev->dev, "actag information not found\n");
  259. mutex_unlock(&links_list_lock);
  260. return -ENODEV;
  261. }
  262. for (i = 0; i < 8; i++)
  263. if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
  264. *count = PNV_OCXL_PASID_MAX;
  265. rc = 0;
  266. break;
  267. }
  268. mutex_unlock(&links_list_lock);
  269. dev_dbg(&dev->dev, "%d PASIDs available for function\n",
  270. rc ? 0 : *count);
  271. return rc;
  272. }
  273. EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
  274. static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
  275. {
  276. int shift, idx;
  277. WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
  278. idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
  279. shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
  280. buf[idx] |= rate << shift;
  281. }
  282. int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
  283. char *rate_buf, int rate_buf_size)
  284. {
  285. if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
  286. return -EINVAL;
  287. /*
  288. * The TL capabilities are a characteristic of the NPU, so
  289. * we go with hard-coded values.
  290. *
  291. * The receiving rate of each template is encoded on 4 bits.
  292. *
  293. * On P9:
  294. * - templates 0 -> 3 are supported
  295. * - templates 0, 1 and 3 have a 0 receiving rate
  296. * - template 2 has receiving rate of 1 (extra cycle)
  297. */
  298. memset(rate_buf, 0, rate_buf_size);
  299. set_templ_rate(2, 1, rate_buf);
  300. *cap = PNV_OCXL_TL_P9_RECV_CAP;
  301. return 0;
  302. }
  303. EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
  304. int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
  305. uint64_t rate_buf_phys, int rate_buf_size)
  306. {
  307. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  308. struct pnv_phb *phb = hose->private_data;
  309. int rc;
  310. if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
  311. return -EINVAL;
  312. rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
  313. rate_buf_phys, rate_buf_size);
  314. if (rc) {
  315. dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
  316. return -EINVAL;
  317. }
  318. return 0;
  319. }
  320. EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
  321. int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
  322. {
  323. int rc;
  324. rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
  325. if (rc) {
  326. dev_err(&dev->dev,
  327. "Can't get translation interrupt for device\n");
  328. return rc;
  329. }
  330. return 0;
  331. }
  332. EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
  333. void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
  334. void __iomem *tfc, void __iomem *pe_handle)
  335. {
  336. iounmap(dsisr);
  337. iounmap(dar);
  338. iounmap(tfc);
  339. iounmap(pe_handle);
  340. }
  341. EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
  342. int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
  343. void __iomem **dar, void __iomem **tfc,
  344. void __iomem **pe_handle)
  345. {
  346. u64 reg;
  347. int i, j, rc = 0;
  348. void __iomem *regs[4];
  349. /*
  350. * opal stores the mmio addresses of the DSISR, DAR, TFC and
  351. * PE_HANDLE registers in a device tree property, in that
  352. * order
  353. */
  354. for (i = 0; i < 4; i++) {
  355. rc = of_property_read_u64_index(dev->dev.of_node,
  356. "ibm,opal-xsl-mmio", i, &reg);
  357. if (rc)
  358. break;
  359. regs[i] = ioremap(reg, 8);
  360. if (!regs[i]) {
  361. rc = -EINVAL;
  362. break;
  363. }
  364. }
  365. if (rc) {
  366. dev_err(&dev->dev, "Can't map translation mmio registers\n");
  367. for (j = i - 1; j >= 0; j--)
  368. iounmap(regs[j]);
  369. } else {
  370. *dsisr = regs[0];
  371. *dar = regs[1];
  372. *tfc = regs[2];
  373. *pe_handle = regs[3];
  374. }
  375. return rc;
  376. }
  377. EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
  378. struct spa_data {
  379. u64 phb_opal_id;
  380. u32 bdfn;
  381. };
  382. int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
  383. void **platform_data)
  384. {
  385. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  386. struct pnv_phb *phb = hose->private_data;
  387. struct spa_data *data;
  388. u32 bdfn;
  389. int rc;
  390. data = kzalloc(sizeof(*data), GFP_KERNEL);
  391. if (!data)
  392. return -ENOMEM;
  393. bdfn = (dev->bus->number << 8) | dev->devfn;
  394. rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
  395. PE_mask);
  396. if (rc) {
  397. dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
  398. kfree(data);
  399. return rc;
  400. }
  401. data->phb_opal_id = phb->opal_id;
  402. data->bdfn = bdfn;
  403. *platform_data = (void *) data;
  404. return 0;
  405. }
  406. EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
  407. void pnv_ocxl_spa_release(void *platform_data)
  408. {
  409. struct spa_data *data = (struct spa_data *) platform_data;
  410. int rc;
  411. rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
  412. WARN_ON(rc);
  413. kfree(data);
  414. }
  415. EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
  416. int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
  417. {
  418. struct spa_data *data = (struct spa_data *) platform_data;
  419. return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
  420. }
  421. EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
  422. int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
  423. uint64_t lpcr, void __iomem **arva)
  424. {
  425. struct pci_controller *hose = pci_bus_to_host(dev->bus);
  426. struct pnv_phb *phb = hose->private_data;
  427. u64 mmio_atsd;
  428. int rc;
  429. /* ATSD physical address.
  430. * ATSD LAUNCH register: write access initiates a shoot down to
  431. * initiate the TLB Invalidate command.
  432. */
  433. rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
  434. 0, &mmio_atsd);
  435. if (rc) {
  436. dev_info(&dev->dev, "No available ATSD found\n");
  437. return rc;
  438. }
  439. /* Assign a register set to a Logical Partition and MMIO ATSD
  440. * LPARID register to the required value.
  441. */
  442. rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
  443. lparid, lpcr);
  444. if (rc) {
  445. dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
  446. return rc;
  447. }
  448. *arva = ioremap(mmio_atsd, 24);
  449. if (!(*arva)) {
  450. dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
  451. rc = -ENOMEM;
  452. }
  453. return rc;
  454. }
  455. EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
  456. void pnv_ocxl_unmap_lpar(void __iomem *arva)
  457. {
  458. iounmap(arva);
  459. }
  460. EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
  461. void pnv_ocxl_tlb_invalidate(void __iomem *arva,
  462. unsigned long pid,
  463. unsigned long addr,
  464. unsigned long page_size)
  465. {
  466. unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
  467. u64 val = 0ull;
  468. int pend;
  469. u8 size;
  470. if (!(arva))
  471. return;
  472. if (addr) {
  473. /* load Abbreviated Virtual Address register with
  474. * the necessary value
  475. */
  476. val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
  477. out_be64(arva + PNV_OCXL_ATSD_AVA, val);
  478. }
  479. /* Write access initiates a shoot down to initiate the
  480. * TLB Invalidate command
  481. */
  482. val = PNV_OCXL_ATSD_LNCH_R;
  483. val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
  484. if (addr)
  485. val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
  486. else {
  487. val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
  488. val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
  489. }
  490. val |= PNV_OCXL_ATSD_LNCH_PRS;
  491. /* Actual Page Size to be invalidated
  492. * 000 4KB
  493. * 101 64KB
  494. * 001 2MB
  495. * 010 1GB
  496. */
  497. size = 0b101;
  498. if (page_size == 0x1000)
  499. size = 0b000;
  500. if (page_size == 0x200000)
  501. size = 0b001;
  502. if (page_size == 0x40000000)
  503. size = 0b010;
  504. val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
  505. val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
  506. out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
  507. /* Poll the ATSD status register to determine when the
  508. * TLB Invalidate has been completed.
  509. */
  510. val = in_be64(arva + PNV_OCXL_ATSD_STAT);
  511. pend = val >> 63;
  512. while (pend) {
  513. if (time_after_eq(jiffies, timeout)) {
  514. pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
  515. __func__, val, pid);
  516. return;
  517. }
  518. cpu_relax();
  519. val = in_be64(arva + PNV_OCXL_ATSD_STAT);
  520. pend = val >> 63;
  521. }
  522. }
  523. EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);