link.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779
  1. // SPDX-License-Identifier: GPL-2.0+
  2. // Copyright 2017 IBM Corp.
  3. #include <linux/sched/mm.h>
  4. #include <linux/mutex.h>
  5. #include <linux/mm.h>
  6. #include <linux/mm_types.h>
  7. #include <linux/mmu_context.h>
  8. #include <linux/mmu_notifier.h>
  9. #include <linux/irqdomain.h>
  10. #include <asm/copro.h>
  11. #include <asm/pnv-ocxl.h>
  12. #include <asm/xive.h>
  13. #include <misc/ocxl.h>
  14. #include "ocxl_internal.h"
  15. #include "trace.h"
  16. #define SPA_PASID_BITS 15
  17. #define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1)
  18. #define SPA_PE_MASK SPA_PASID_MAX
  19. #define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */
  20. #define SPA_CFG_SF (1ull << (63-0))
  21. #define SPA_CFG_TA (1ull << (63-1))
  22. #define SPA_CFG_HV (1ull << (63-3))
  23. #define SPA_CFG_UV (1ull << (63-4))
  24. #define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */
  25. #define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */
  26. #define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */
  27. #define SPA_CFG_PR (1ull << (63-49))
  28. #define SPA_CFG_TC (1ull << (63-54))
  29. #define SPA_CFG_DR (1ull << (63-59))
  30. #define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */
  31. #define SPA_XSL_S (1ull << (63-38)) /* Store operation */
  32. #define SPA_PE_VALID 0x80000000
  33. struct ocxl_link;
  34. struct pe_data {
  35. struct mm_struct *mm;
  36. /* callback to trigger when a translation fault occurs */
  37. void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
  38. /* opaque pointer to be passed to the above callback */
  39. void *xsl_err_data;
  40. struct rcu_head rcu;
  41. struct ocxl_link *link;
  42. struct mmu_notifier mmu_notifier;
  43. };
  44. struct spa {
  45. struct ocxl_process_element *spa_mem;
  46. int spa_order;
  47. struct mutex spa_lock;
  48. struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
  49. char *irq_name;
  50. int virq;
  51. void __iomem *reg_dsisr;
  52. void __iomem *reg_dar;
  53. void __iomem *reg_tfc;
  54. void __iomem *reg_pe_handle;
  55. /*
  56. * The following field are used by the memory fault
  57. * interrupt handler. We can only have one interrupt at a
  58. * time. The NPU won't raise another interrupt until the
  59. * previous one has been ack'd by writing to the TFC register
  60. */
  61. struct xsl_fault {
  62. struct work_struct fault_work;
  63. u64 pe;
  64. u64 dsisr;
  65. u64 dar;
  66. struct pe_data pe_data;
  67. } xsl_fault;
  68. };
  69. /*
  70. * A opencapi link can be used be by several PCI functions. We have
  71. * one link per device slot.
  72. *
  73. * A linked list of opencapi links should suffice, as there's a
  74. * limited number of opencapi slots on a system and lookup is only
  75. * done when the device is probed
  76. */
  77. struct ocxl_link {
  78. struct list_head list;
  79. struct kref ref;
  80. int domain;
  81. int bus;
  82. int dev;
  83. void __iomem *arva; /* ATSD register virtual address */
  84. spinlock_t atsd_lock; /* to serialize shootdowns */
  85. atomic_t irq_available;
  86. struct spa *spa;
  87. void *platform_data;
  88. };
  89. static LIST_HEAD(links_list);
  90. static DEFINE_MUTEX(links_list_lock);
  91. enum xsl_response {
  92. CONTINUE,
  93. ADDRESS_ERROR,
  94. RESTART,
  95. };
  96. static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
  97. {
  98. u64 reg;
  99. *dsisr = in_be64(spa->reg_dsisr);
  100. *dar = in_be64(spa->reg_dar);
  101. reg = in_be64(spa->reg_pe_handle);
  102. *pe = reg & SPA_PE_MASK;
  103. }
  104. static void ack_irq(struct spa *spa, enum xsl_response r)
  105. {
  106. u64 reg = 0;
  107. /* continue is not supported */
  108. if (r == RESTART)
  109. reg = PPC_BIT(31);
  110. else if (r == ADDRESS_ERROR)
  111. reg = PPC_BIT(30);
  112. else
  113. WARN(1, "Invalid irq response %d\n", r);
  114. if (reg) {
  115. trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe,
  116. spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg);
  117. out_be64(spa->reg_tfc, reg);
  118. }
  119. }
  120. static void xsl_fault_handler_bh(struct work_struct *fault_work)
  121. {
  122. vm_fault_t flt = 0;
  123. unsigned long access, flags, inv_flags = 0;
  124. enum xsl_response r;
  125. struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
  126. fault_work);
  127. struct spa *spa = container_of(fault, struct spa, xsl_fault);
  128. int rc;
  129. /*
  130. * We must release a reference on mm_users whenever exiting this
  131. * function (taken in the memory fault interrupt handler)
  132. */
  133. rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
  134. &flt);
  135. if (rc) {
  136. pr_debug("copro_handle_mm_fault failed: %d\n", rc);
  137. if (fault->pe_data.xsl_err_cb) {
  138. fault->pe_data.xsl_err_cb(
  139. fault->pe_data.xsl_err_data,
  140. fault->dar, fault->dsisr);
  141. }
  142. r = ADDRESS_ERROR;
  143. goto ack;
  144. }
  145. if (!radix_enabled()) {
  146. /*
  147. * update_mmu_cache() will not have loaded the hash
  148. * since current->trap is not a 0x400 or 0x300, so
  149. * just call hash_page_mm() here.
  150. */
  151. access = _PAGE_PRESENT | _PAGE_READ;
  152. if (fault->dsisr & SPA_XSL_S)
  153. access |= _PAGE_WRITE;
  154. if (get_region_id(fault->dar) != USER_REGION_ID)
  155. access |= _PAGE_PRIVILEGED;
  156. local_irq_save(flags);
  157. hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
  158. inv_flags);
  159. local_irq_restore(flags);
  160. }
  161. r = RESTART;
  162. ack:
  163. mmput(fault->pe_data.mm);
  164. ack_irq(spa, r);
  165. }
  166. static irqreturn_t xsl_fault_handler(int irq, void *data)
  167. {
  168. struct ocxl_link *link = (struct ocxl_link *) data;
  169. struct spa *spa = link->spa;
  170. u64 dsisr, dar, pe_handle;
  171. struct pe_data *pe_data;
  172. struct ocxl_process_element *pe;
  173. int pid;
  174. bool schedule = false;
  175. read_irq(spa, &dsisr, &dar, &pe_handle);
  176. trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1);
  177. WARN_ON(pe_handle > SPA_PE_MASK);
  178. pe = spa->spa_mem + pe_handle;
  179. pid = be32_to_cpu(pe->pid);
  180. /* We could be reading all null values here if the PE is being
  181. * removed while an interrupt kicks in. It's not supposed to
  182. * happen if the driver notified the AFU to terminate the
  183. * PASID, and the AFU waited for pending operations before
  184. * acknowledging. But even if it happens, we won't find a
  185. * memory context below and fail silently, so it should be ok.
  186. */
  187. if (!(dsisr & SPA_XSL_TF)) {
  188. WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
  189. ack_irq(spa, ADDRESS_ERROR);
  190. return IRQ_HANDLED;
  191. }
  192. rcu_read_lock();
  193. pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
  194. if (!pe_data) {
  195. /*
  196. * Could only happen if the driver didn't notify the
  197. * AFU about PASID termination before removing the PE,
  198. * or the AFU didn't wait for all memory access to
  199. * have completed.
  200. *
  201. * Either way, we fail early, but we shouldn't log an
  202. * error message, as it is a valid (if unexpected)
  203. * scenario
  204. */
  205. rcu_read_unlock();
  206. pr_debug("Unknown mm context for xsl interrupt\n");
  207. ack_irq(spa, ADDRESS_ERROR);
  208. return IRQ_HANDLED;
  209. }
  210. if (!pe_data->mm) {
  211. /*
  212. * translation fault from a kernel context - an OpenCAPI
  213. * device tried to access a bad kernel address
  214. */
  215. rcu_read_unlock();
  216. pr_warn("Unresolved OpenCAPI xsl fault in kernel context\n");
  217. ack_irq(spa, ADDRESS_ERROR);
  218. return IRQ_HANDLED;
  219. }
  220. WARN_ON(pe_data->mm->context.id != pid);
  221. if (mmget_not_zero(pe_data->mm)) {
  222. spa->xsl_fault.pe = pe_handle;
  223. spa->xsl_fault.dar = dar;
  224. spa->xsl_fault.dsisr = dsisr;
  225. spa->xsl_fault.pe_data = *pe_data;
  226. schedule = true;
  227. /* mm_users count released by bottom half */
  228. }
  229. rcu_read_unlock();
  230. if (schedule)
  231. schedule_work(&spa->xsl_fault.fault_work);
  232. else
  233. ack_irq(spa, ADDRESS_ERROR);
  234. return IRQ_HANDLED;
  235. }
  236. static void unmap_irq_registers(struct spa *spa)
  237. {
  238. pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
  239. spa->reg_pe_handle);
  240. }
  241. static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
  242. {
  243. return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
  244. &spa->reg_tfc, &spa->reg_pe_handle);
  245. }
  246. static int setup_xsl_irq(struct pci_dev *dev, struct ocxl_link *link)
  247. {
  248. struct spa *spa = link->spa;
  249. int rc;
  250. int hwirq;
  251. rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
  252. if (rc)
  253. return rc;
  254. rc = map_irq_registers(dev, spa);
  255. if (rc)
  256. return rc;
  257. spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
  258. link->domain, link->bus, link->dev);
  259. if (!spa->irq_name) {
  260. dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
  261. rc = -ENOMEM;
  262. goto err_xsl;
  263. }
  264. /*
  265. * At some point, we'll need to look into allowing a higher
  266. * number of interrupts. Could we have an IRQ domain per link?
  267. */
  268. spa->virq = irq_create_mapping(NULL, hwirq);
  269. if (!spa->virq) {
  270. dev_err(&dev->dev,
  271. "irq_create_mapping failed for translation interrupt\n");
  272. rc = -EINVAL;
  273. goto err_name;
  274. }
  275. dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
  276. rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
  277. link);
  278. if (rc) {
  279. dev_err(&dev->dev,
  280. "request_irq failed for translation interrupt: %d\n",
  281. rc);
  282. rc = -EINVAL;
  283. goto err_mapping;
  284. }
  285. return 0;
  286. err_mapping:
  287. irq_dispose_mapping(spa->virq);
  288. err_name:
  289. kfree(spa->irq_name);
  290. err_xsl:
  291. unmap_irq_registers(spa);
  292. return rc;
  293. }
  294. static void release_xsl_irq(struct ocxl_link *link)
  295. {
  296. struct spa *spa = link->spa;
  297. if (spa->virq) {
  298. free_irq(spa->virq, link);
  299. irq_dispose_mapping(spa->virq);
  300. }
  301. kfree(spa->irq_name);
  302. unmap_irq_registers(spa);
  303. }
  304. static int alloc_spa(struct pci_dev *dev, struct ocxl_link *link)
  305. {
  306. struct spa *spa;
  307. spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
  308. if (!spa)
  309. return -ENOMEM;
  310. mutex_init(&spa->spa_lock);
  311. INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
  312. INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
  313. spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
  314. spa->spa_mem = (struct ocxl_process_element *)
  315. __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
  316. if (!spa->spa_mem) {
  317. dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
  318. kfree(spa);
  319. return -ENOMEM;
  320. }
  321. pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
  322. link->dev, spa->spa_mem);
  323. link->spa = spa;
  324. return 0;
  325. }
  326. static void free_spa(struct ocxl_link *link)
  327. {
  328. struct spa *spa = link->spa;
  329. pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
  330. link->dev);
  331. if (spa && spa->spa_mem) {
  332. free_pages((unsigned long) spa->spa_mem, spa->spa_order);
  333. kfree(spa);
  334. link->spa = NULL;
  335. }
  336. }
  337. static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_link)
  338. {
  339. struct ocxl_link *link;
  340. int rc;
  341. link = kzalloc(sizeof(struct ocxl_link), GFP_KERNEL);
  342. if (!link)
  343. return -ENOMEM;
  344. kref_init(&link->ref);
  345. link->domain = pci_domain_nr(dev->bus);
  346. link->bus = dev->bus->number;
  347. link->dev = PCI_SLOT(dev->devfn);
  348. atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
  349. spin_lock_init(&link->atsd_lock);
  350. rc = alloc_spa(dev, link);
  351. if (rc)
  352. goto err_free;
  353. rc = setup_xsl_irq(dev, link);
  354. if (rc)
  355. goto err_spa;
  356. /* platform specific hook */
  357. rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
  358. &link->platform_data);
  359. if (rc)
  360. goto err_xsl_irq;
  361. /* if link->arva is not defeined, MMIO registers are not used to
  362. * generate TLB invalidate. PowerBus snooping is enabled.
  363. * Otherwise, PowerBus snooping is disabled. TLB Invalidates are
  364. * initiated using MMIO registers.
  365. */
  366. pnv_ocxl_map_lpar(dev, mfspr(SPRN_LPID), 0, &link->arva);
  367. *out_link = link;
  368. return 0;
  369. err_xsl_irq:
  370. release_xsl_irq(link);
  371. err_spa:
  372. free_spa(link);
  373. err_free:
  374. kfree(link);
  375. return rc;
  376. }
  377. static void free_link(struct ocxl_link *link)
  378. {
  379. release_xsl_irq(link);
  380. free_spa(link);
  381. kfree(link);
  382. }
  383. int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
  384. {
  385. int rc = 0;
  386. struct ocxl_link *link;
  387. mutex_lock(&links_list_lock);
  388. list_for_each_entry(link, &links_list, list) {
  389. /* The functions of a device all share the same link */
  390. if (link->domain == pci_domain_nr(dev->bus) &&
  391. link->bus == dev->bus->number &&
  392. link->dev == PCI_SLOT(dev->devfn)) {
  393. kref_get(&link->ref);
  394. *link_handle = link;
  395. goto unlock;
  396. }
  397. }
  398. rc = alloc_link(dev, PE_mask, &link);
  399. if (rc)
  400. goto unlock;
  401. list_add(&link->list, &links_list);
  402. *link_handle = link;
  403. unlock:
  404. mutex_unlock(&links_list_lock);
  405. return rc;
  406. }
  407. EXPORT_SYMBOL_GPL(ocxl_link_setup);
  408. static void release_xsl(struct kref *ref)
  409. {
  410. struct ocxl_link *link = container_of(ref, struct ocxl_link, ref);
  411. if (link->arva) {
  412. pnv_ocxl_unmap_lpar(link->arva);
  413. link->arva = NULL;
  414. }
  415. list_del(&link->list);
  416. /* call platform code before releasing data */
  417. pnv_ocxl_spa_release(link->platform_data);
  418. free_link(link);
  419. }
  420. void ocxl_link_release(struct pci_dev *dev, void *link_handle)
  421. {
  422. struct ocxl_link *link = (struct ocxl_link *) link_handle;
  423. mutex_lock(&links_list_lock);
  424. kref_put(&link->ref, release_xsl);
  425. mutex_unlock(&links_list_lock);
  426. }
  427. EXPORT_SYMBOL_GPL(ocxl_link_release);
  428. static void invalidate_range(struct mmu_notifier *mn,
  429. struct mm_struct *mm,
  430. unsigned long start, unsigned long end)
  431. {
  432. struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier);
  433. struct ocxl_link *link = pe_data->link;
  434. unsigned long addr, pid, page_size = PAGE_SIZE;
  435. pid = mm->context.id;
  436. trace_ocxl_mmu_notifier_range(start, end, pid);
  437. spin_lock(&link->atsd_lock);
  438. for (addr = start; addr < end; addr += page_size)
  439. pnv_ocxl_tlb_invalidate(link->arva, pid, addr, page_size);
  440. spin_unlock(&link->atsd_lock);
  441. }
  442. static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = {
  443. .invalidate_range = invalidate_range,
  444. };
  445. static u64 calculate_cfg_state(bool kernel)
  446. {
  447. u64 state;
  448. state = SPA_CFG_DR;
  449. if (mfspr(SPRN_LPCR) & LPCR_TC)
  450. state |= SPA_CFG_TC;
  451. if (radix_enabled())
  452. state |= SPA_CFG_XLAT_ror;
  453. else
  454. state |= SPA_CFG_XLAT_hpt;
  455. state |= SPA_CFG_HV;
  456. if (kernel) {
  457. if (mfmsr() & MSR_SF)
  458. state |= SPA_CFG_SF;
  459. } else {
  460. state |= SPA_CFG_PR;
  461. if (!test_tsk_thread_flag(current, TIF_32BIT))
  462. state |= SPA_CFG_SF;
  463. }
  464. return state;
  465. }
  466. int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
  467. u64 amr, u16 bdf, struct mm_struct *mm,
  468. void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
  469. void *xsl_err_data)
  470. {
  471. struct ocxl_link *link = (struct ocxl_link *) link_handle;
  472. struct spa *spa = link->spa;
  473. struct ocxl_process_element *pe;
  474. int pe_handle, rc = 0;
  475. struct pe_data *pe_data;
  476. BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
  477. if (pasid > SPA_PASID_MAX)
  478. return -EINVAL;
  479. mutex_lock(&spa->spa_lock);
  480. pe_handle = pasid & SPA_PE_MASK;
  481. pe = spa->spa_mem + pe_handle;
  482. if (pe->software_state) {
  483. rc = -EBUSY;
  484. goto unlock;
  485. }
  486. pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
  487. if (!pe_data) {
  488. rc = -ENOMEM;
  489. goto unlock;
  490. }
  491. pe_data->mm = mm;
  492. pe_data->xsl_err_cb = xsl_err_cb;
  493. pe_data->xsl_err_data = xsl_err_data;
  494. pe_data->link = link;
  495. pe_data->mmu_notifier.ops = &ocxl_mmu_notifier_ops;
  496. memset(pe, 0, sizeof(struct ocxl_process_element));
  497. pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
  498. pe->pasid = cpu_to_be32(pasid << (31 - 19));
  499. pe->bdf = cpu_to_be16(bdf);
  500. pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
  501. pe->pid = cpu_to_be32(pidr);
  502. pe->tid = cpu_to_be32(tidr);
  503. pe->amr = cpu_to_be64(amr);
  504. pe->software_state = cpu_to_be32(SPA_PE_VALID);
  505. /*
  506. * For user contexts, register a copro so that TLBIs are seen
  507. * by the nest MMU. If we have a kernel context, TLBIs are
  508. * already global.
  509. */
  510. if (mm) {
  511. mm_context_add_copro(mm);
  512. if (link->arva) {
  513. /* Use MMIO registers for the TLB Invalidate
  514. * operations.
  515. */
  516. trace_ocxl_init_mmu_notifier(pasid, mm->context.id);
  517. mmu_notifier_register(&pe_data->mmu_notifier, mm);
  518. }
  519. }
  520. /*
  521. * Barrier is to make sure PE is visible in the SPA before it
  522. * is used by the device. It also helps with the global TLBI
  523. * invalidation
  524. */
  525. mb();
  526. radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
  527. /*
  528. * The mm must stay valid for as long as the device uses it. We
  529. * lower the count when the context is removed from the SPA.
  530. *
  531. * We grab mm_count (and not mm_users), as we don't want to
  532. * end up in a circular dependency if a process mmaps its
  533. * mmio, therefore incrementing the file ref count when
  534. * calling mmap(), and forgets to unmap before exiting. In
  535. * that scenario, when the kernel handles the death of the
  536. * process, the file is not cleaned because unmap was not
  537. * called, and the mm wouldn't be freed because we would still
  538. * have a reference on mm_users. Incrementing mm_count solves
  539. * the problem.
  540. */
  541. if (mm)
  542. mmgrab(mm);
  543. trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
  544. unlock:
  545. mutex_unlock(&spa->spa_lock);
  546. return rc;
  547. }
  548. EXPORT_SYMBOL_GPL(ocxl_link_add_pe);
  549. int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid)
  550. {
  551. struct ocxl_link *link = (struct ocxl_link *) link_handle;
  552. struct spa *spa = link->spa;
  553. struct ocxl_process_element *pe;
  554. int pe_handle, rc;
  555. if (pasid > SPA_PASID_MAX)
  556. return -EINVAL;
  557. pe_handle = pasid & SPA_PE_MASK;
  558. pe = spa->spa_mem + pe_handle;
  559. mutex_lock(&spa->spa_lock);
  560. pe->tid = cpu_to_be32(tid);
  561. /*
  562. * The barrier makes sure the PE is updated
  563. * before we clear the NPU context cache below, so that the
  564. * old PE cannot be reloaded erroneously.
  565. */
  566. mb();
  567. /*
  568. * hook to platform code
  569. * On powerpc, the entry needs to be cleared from the context
  570. * cache of the NPU.
  571. */
  572. rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle);
  573. WARN_ON(rc);
  574. mutex_unlock(&spa->spa_lock);
  575. return rc;
  576. }
  577. int ocxl_link_remove_pe(void *link_handle, int pasid)
  578. {
  579. struct ocxl_link *link = (struct ocxl_link *) link_handle;
  580. struct spa *spa = link->spa;
  581. struct ocxl_process_element *pe;
  582. struct pe_data *pe_data;
  583. int pe_handle, rc;
  584. if (pasid > SPA_PASID_MAX)
  585. return -EINVAL;
  586. /*
  587. * About synchronization with our memory fault handler:
  588. *
  589. * Before removing the PE, the driver is supposed to have
  590. * notified the AFU, which should have cleaned up and make
  591. * sure the PASID is no longer in use, including pending
  592. * interrupts. However, there's no way to be sure...
  593. *
  594. * We clear the PE and remove the context from our radix
  595. * tree. From that point on, any new interrupt for that
  596. * context will fail silently, which is ok. As mentioned
  597. * above, that's not expected, but it could happen if the
  598. * driver or AFU didn't do the right thing.
  599. *
  600. * There could still be a bottom half running, but we don't
  601. * need to wait/flush, as it is managing a reference count on
  602. * the mm it reads from the radix tree.
  603. */
  604. pe_handle = pasid & SPA_PE_MASK;
  605. pe = spa->spa_mem + pe_handle;
  606. mutex_lock(&spa->spa_lock);
  607. if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) {
  608. rc = -EINVAL;
  609. goto unlock;
  610. }
  611. trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid,
  612. be32_to_cpu(pe->pid), be32_to_cpu(pe->tid));
  613. memset(pe, 0, sizeof(struct ocxl_process_element));
  614. /*
  615. * The barrier makes sure the PE is removed from the SPA
  616. * before we clear the NPU context cache below, so that the
  617. * old PE cannot be reloaded erroneously.
  618. */
  619. mb();
  620. /*
  621. * hook to platform code
  622. * On powerpc, the entry needs to be cleared from the context
  623. * cache of the NPU.
  624. */
  625. rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle);
  626. WARN_ON(rc);
  627. pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
  628. if (!pe_data) {
  629. WARN(1, "Couldn't find pe data when removing PE\n");
  630. } else {
  631. if (pe_data->mm) {
  632. if (link->arva) {
  633. trace_ocxl_release_mmu_notifier(pasid,
  634. pe_data->mm->context.id);
  635. mmu_notifier_unregister(&pe_data->mmu_notifier,
  636. pe_data->mm);
  637. spin_lock(&link->atsd_lock);
  638. pnv_ocxl_tlb_invalidate(link->arva,
  639. pe_data->mm->context.id,
  640. 0ull,
  641. PAGE_SIZE);
  642. spin_unlock(&link->atsd_lock);
  643. }
  644. mm_context_remove_copro(pe_data->mm);
  645. mmdrop(pe_data->mm);
  646. }
  647. kfree_rcu(pe_data, rcu);
  648. }
  649. unlock:
  650. mutex_unlock(&spa->spa_lock);
  651. return rc;
  652. }
  653. EXPORT_SYMBOL_GPL(ocxl_link_remove_pe);
  654. int ocxl_link_irq_alloc(void *link_handle, int *hw_irq)
  655. {
  656. struct ocxl_link *link = (struct ocxl_link *) link_handle;
  657. int irq;
  658. if (atomic_dec_if_positive(&link->irq_available) < 0)
  659. return -ENOSPC;
  660. irq = xive_native_alloc_irq();
  661. if (!irq) {
  662. atomic_inc(&link->irq_available);
  663. return -ENXIO;
  664. }
  665. *hw_irq = irq;
  666. return 0;
  667. }
  668. EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc);
  669. void ocxl_link_free_irq(void *link_handle, int hw_irq)
  670. {
  671. struct ocxl_link *link = (struct ocxl_link *) link_handle;
  672. xive_native_free_irq(hw_irq);
  673. atomic_inc(&link->irq_available);
  674. }
  675. EXPORT_SYMBOL_GPL(ocxl_link_free_irq);