vas-api.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * VAS user space API for its accelerators (Only NX-GZIP is supported now)
  4. * Copyright (C) 2019 Haren Myneni, IBM Corp
  5. */
  6. #define pr_fmt(fmt) "vas-api: " fmt
  7. #include <linux/kernel.h>
  8. #include <linux/device.h>
  9. #include <linux/cdev.h>
  10. #include <linux/fs.h>
  11. #include <linux/slab.h>
  12. #include <linux/uaccess.h>
  13. #include <linux/kthread.h>
  14. #include <linux/sched/signal.h>
  15. #include <linux/mmu_context.h>
  16. #include <linux/io.h>
  17. #include <asm/vas.h>
  18. #include <uapi/asm/vas-api.h>
  19. /*
  20. * The driver creates the device node that can be used as follows:
  21. * For NX-GZIP
  22. *
  23. * fd = open("/dev/crypto/nx-gzip", O_RDWR);
  24. * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
  25. * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
  26. * vas_copy(&crb, 0, 1);
  27. * vas_paste(paste_addr, 0, 1);
  28. * close(fd) or exit process to close window.
  29. *
  30. * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
  31. * copy/paste returns to the user space directly. So refer NX hardware
  32. * documentation for exact copy/paste usage and completion / error
  33. * conditions.
  34. */
  35. /*
  36. * Wrapper object for the nx-gzip device - there is just one instance of
  37. * this node for the whole system.
  38. */
  39. static struct coproc_dev {
  40. struct cdev cdev;
  41. struct device *device;
  42. char *name;
  43. dev_t devt;
  44. struct class *class;
  45. enum vas_cop_type cop_type;
  46. const struct vas_user_win_ops *vops;
  47. } coproc_device;
  48. struct coproc_instance {
  49. struct coproc_dev *coproc;
  50. struct vas_window *txwin;
  51. };
  52. static char *coproc_devnode(struct device *dev, umode_t *mode)
  53. {
  54. return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
  55. }
  56. /*
  57. * Take reference to pid and mm
  58. */
  59. int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)
  60. {
  61. /*
  62. * Window opened by a child thread may not be closed when
  63. * it exits. So take reference to its pid and release it
  64. * when the window is free by parent thread.
  65. * Acquire a reference to the task's pid to make sure
  66. * pid will not be re-used - needed only for multithread
  67. * applications.
  68. */
  69. task_ref->pid = get_task_pid(current, PIDTYPE_PID);
  70. /*
  71. * Acquire a reference to the task's mm.
  72. */
  73. task_ref->mm = get_task_mm(current);
  74. if (!task_ref->mm) {
  75. put_pid(task_ref->pid);
  76. pr_err("pid(%d): mm_struct is not found\n",
  77. current->pid);
  78. return -EPERM;
  79. }
  80. mmgrab(task_ref->mm);
  81. mmput(task_ref->mm);
  82. /*
  83. * Process closes window during exit. In the case of
  84. * multithread application, the child thread can open
  85. * window and can exit without closing it. So takes tgid
  86. * reference until window closed to make sure tgid is not
  87. * reused.
  88. */
  89. task_ref->tgid = find_get_pid(task_tgid_vnr(current));
  90. return 0;
  91. }
  92. /*
  93. * Successful return must release the task reference with
  94. * put_task_struct
  95. */
  96. static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref,
  97. struct task_struct **tskp, struct pid **pidp)
  98. {
  99. struct task_struct *tsk;
  100. struct pid *pid;
  101. pid = task_ref->pid;
  102. tsk = get_pid_task(pid, PIDTYPE_PID);
  103. if (!tsk) {
  104. pid = task_ref->tgid;
  105. tsk = get_pid_task(pid, PIDTYPE_PID);
  106. /*
  107. * Parent thread (tgid) will be closing window when it
  108. * exits. So should not get here.
  109. */
  110. if (WARN_ON_ONCE(!tsk))
  111. return false;
  112. }
  113. /* Return if the task is exiting. */
  114. if (tsk->flags & PF_EXITING) {
  115. put_task_struct(tsk);
  116. return false;
  117. }
  118. *tskp = tsk;
  119. *pidp = pid;
  120. return true;
  121. }
  122. /*
  123. * Update the CSB to indicate a translation error.
  124. *
  125. * User space will be polling on CSB after the request is issued.
  126. * If NX can handle the request without any issues, it updates CSB.
  127. * Whereas if NX encounters page fault, the kernel will handle the
  128. * fault and update CSB with translation error.
  129. *
  130. * If we are unable to update the CSB means copy_to_user failed due to
  131. * invalid csb_addr, send a signal to the process.
  132. */
  133. void vas_update_csb(struct coprocessor_request_block *crb,
  134. struct vas_user_win_ref *task_ref)
  135. {
  136. struct coprocessor_status_block csb;
  137. struct kernel_siginfo info;
  138. struct task_struct *tsk;
  139. void __user *csb_addr;
  140. struct pid *pid;
  141. int rc;
  142. /*
  143. * NX user space windows can not be opened for task->mm=NULL
  144. * and faults will not be generated for kernel requests.
  145. */
  146. if (WARN_ON_ONCE(!task_ref->mm))
  147. return;
  148. csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
  149. memset(&csb, 0, sizeof(csb));
  150. csb.cc = CSB_CC_FAULT_ADDRESS;
  151. csb.ce = CSB_CE_TERMINATION;
  152. csb.cs = 0;
  153. csb.count = 0;
  154. /*
  155. * NX operates and returns in BE format as defined CRB struct.
  156. * So saves fault_storage_addr in BE as NX pastes in FIFO and
  157. * expects user space to convert to CPU format.
  158. */
  159. csb.address = crb->stamp.nx.fault_storage_addr;
  160. csb.flags = 0;
  161. /*
  162. * Process closes send window after all pending NX requests are
  163. * completed. In multi-thread applications, a child thread can
  164. * open a window and can exit without closing it. May be some
  165. * requests are pending or this window can be used by other
  166. * threads later. We should handle faults if NX encounters
  167. * pages faults on these requests. Update CSB with translation
  168. * error and fault address. If csb_addr passed by user space is
  169. * invalid, send SEGV signal to pid saved in window. If the
  170. * child thread is not running, send the signal to tgid.
  171. * Parent thread (tgid) will close this window upon its exit.
  172. *
  173. * pid and mm references are taken when window is opened by
  174. * process (pid). So tgid is used only when child thread opens
  175. * a window and exits without closing it.
  176. */
  177. if (!ref_get_pid_and_task(task_ref, &tsk, &pid))
  178. return;
  179. kthread_use_mm(task_ref->mm);
  180. rc = copy_to_user(csb_addr, &csb, sizeof(csb));
  181. /*
  182. * User space polls on csb.flags (first byte). So add barrier
  183. * then copy first byte with csb flags update.
  184. */
  185. if (!rc) {
  186. csb.flags = CSB_V;
  187. /* Make sure update to csb.flags is visible now */
  188. smp_mb();
  189. rc = copy_to_user(csb_addr, &csb, sizeof(u8));
  190. }
  191. kthread_unuse_mm(task_ref->mm);
  192. put_task_struct(tsk);
  193. /* Success */
  194. if (!rc)
  195. return;
  196. pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
  197. csb_addr, pid_vnr(pid));
  198. clear_siginfo(&info);
  199. info.si_signo = SIGSEGV;
  200. info.si_errno = EFAULT;
  201. info.si_code = SEGV_MAPERR;
  202. info.si_addr = csb_addr;
  203. /*
  204. * process will be polling on csb.flags after request is sent to
  205. * NX. So generally CSB update should not fail except when an
  206. * application passes invalid csb_addr. So an error message will
  207. * be displayed and leave it to user space whether to ignore or
  208. * handle this signal.
  209. */
  210. rcu_read_lock();
  211. rc = kill_pid_info(SIGSEGV, &info, pid);
  212. rcu_read_unlock();
  213. pr_devel("pid %d kill_proc_info() rc %d\n", pid_vnr(pid), rc);
  214. }
  215. void vas_dump_crb(struct coprocessor_request_block *crb)
  216. {
  217. struct data_descriptor_entry *dde;
  218. struct nx_fault_stamp *nx;
  219. dde = &crb->source;
  220. pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
  221. be64_to_cpu(dde->address), be32_to_cpu(dde->length),
  222. dde->count, dde->index, dde->flags);
  223. dde = &crb->target;
  224. pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
  225. be64_to_cpu(dde->address), be32_to_cpu(dde->length),
  226. dde->count, dde->index, dde->flags);
  227. nx = &crb->stamp.nx;
  228. pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
  229. be32_to_cpu(nx->pswid),
  230. be64_to_cpu(crb->stamp.nx.fault_storage_addr),
  231. nx->flags, nx->fault_status);
  232. }
  233. static int coproc_open(struct inode *inode, struct file *fp)
  234. {
  235. struct coproc_instance *cp_inst;
  236. cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
  237. if (!cp_inst)
  238. return -ENOMEM;
  239. cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
  240. cdev);
  241. fp->private_data = cp_inst;
  242. return 0;
  243. }
  244. static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
  245. {
  246. void __user *uptr = (void __user *)arg;
  247. struct vas_tx_win_open_attr uattr;
  248. struct coproc_instance *cp_inst;
  249. struct vas_window *txwin;
  250. int rc;
  251. cp_inst = fp->private_data;
  252. /*
  253. * One window for file descriptor
  254. */
  255. if (cp_inst->txwin)
  256. return -EEXIST;
  257. rc = copy_from_user(&uattr, uptr, sizeof(uattr));
  258. if (rc) {
  259. pr_err("copy_from_user() returns %d\n", rc);
  260. return -EFAULT;
  261. }
  262. if (uattr.version != 1) {
  263. pr_err("Invalid window open API version\n");
  264. return -EINVAL;
  265. }
  266. if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->open_win) {
  267. pr_err("VAS API is not registered\n");
  268. return -EACCES;
  269. }
  270. txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,
  271. cp_inst->coproc->cop_type);
  272. if (IS_ERR(txwin)) {
  273. pr_err_ratelimited("VAS window open failed rc=%ld\n",
  274. PTR_ERR(txwin));
  275. return PTR_ERR(txwin);
  276. }
  277. mutex_init(&txwin->task_ref.mmap_mutex);
  278. cp_inst->txwin = txwin;
  279. return 0;
  280. }
  281. static int coproc_release(struct inode *inode, struct file *fp)
  282. {
  283. struct coproc_instance *cp_inst = fp->private_data;
  284. int rc;
  285. if (cp_inst->txwin) {
  286. if (cp_inst->coproc->vops &&
  287. cp_inst->coproc->vops->close_win) {
  288. rc = cp_inst->coproc->vops->close_win(cp_inst->txwin);
  289. if (rc)
  290. return rc;
  291. }
  292. cp_inst->txwin = NULL;
  293. }
  294. kfree(cp_inst);
  295. fp->private_data = NULL;
  296. /*
  297. * We don't know here if user has other receive windows
  298. * open, so we can't really call clear_thread_tidr().
  299. * So, once the process calls set_thread_tidr(), the
  300. * TIDR value sticks around until process exits, resulting
  301. * in an extra copy in restore_sprs().
  302. */
  303. return 0;
  304. }
  305. /*
  306. * If the executed instruction that caused the fault was a paste, then
  307. * clear regs CR0[EQ], advance NIP, and return 0. Else return error code.
  308. */
  309. static int do_fail_paste(void)
  310. {
  311. struct pt_regs *regs = current->thread.regs;
  312. u32 instword;
  313. if (WARN_ON_ONCE(!regs))
  314. return -EINVAL;
  315. if (WARN_ON_ONCE(!user_mode(regs)))
  316. return -EINVAL;
  317. /*
  318. * If we couldn't translate the instruction, the driver should
  319. * return success without handling the fault, it will be retried
  320. * or the instruction fetch will fault.
  321. */
  322. if (get_user(instword, (u32 __user *)(regs->nip)))
  323. return -EAGAIN;
  324. /*
  325. * Not a paste instruction, driver may fail the fault.
  326. */
  327. if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE)
  328. return -ENOENT;
  329. regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */
  330. regs_add_return_ip(regs, 4); /* Emulate the paste */
  331. return 0;
  332. }
  333. /*
  334. * This fault handler is invoked when the core generates page fault on
  335. * the paste address. Happens if the kernel closes window in hypervisor
  336. * (on pseries) due to lost credit or the paste address is not mapped.
  337. */
  338. static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
  339. {
  340. struct vm_area_struct *vma = vmf->vma;
  341. struct file *fp = vma->vm_file;
  342. struct coproc_instance *cp_inst = fp->private_data;
  343. struct vas_window *txwin;
  344. vm_fault_t fault;
  345. u64 paste_addr;
  346. int ret;
  347. /*
  348. * window is not opened. Shouldn't expect this error.
  349. */
  350. if (!cp_inst || !cp_inst->txwin) {
  351. pr_err("Unexpected fault on paste address with TX window closed\n");
  352. return VM_FAULT_SIGBUS;
  353. }
  354. txwin = cp_inst->txwin;
  355. /*
  356. * When the LPAR lost credits due to core removal or during
  357. * migration, invalidate the existing mapping for the current
  358. * paste addresses and set windows in-active (zap_page_range in
  359. * reconfig_close_windows()).
  360. * New mapping will be done later after migration or new credits
  361. * available. So continue to receive faults if the user space
  362. * issue NX request.
  363. */
  364. if (txwin->task_ref.vma != vmf->vma) {
  365. pr_err("No previous mapping with paste address\n");
  366. return VM_FAULT_SIGBUS;
  367. }
  368. mutex_lock(&txwin->task_ref.mmap_mutex);
  369. /*
  370. * The window may be inactive due to lost credit (Ex: core
  371. * removal with DLPAR). If the window is active again when
  372. * the credit is available, map the new paste address at the
  373. * window virtual address.
  374. */
  375. if (txwin->status == VAS_WIN_ACTIVE) {
  376. paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
  377. if (paste_addr) {
  378. fault = vmf_insert_pfn(vma, vma->vm_start,
  379. (paste_addr >> PAGE_SHIFT));
  380. mutex_unlock(&txwin->task_ref.mmap_mutex);
  381. return fault;
  382. }
  383. }
  384. mutex_unlock(&txwin->task_ref.mmap_mutex);
  385. /*
  386. * Received this fault due to closing the actual window.
  387. * It can happen during migration or lost credits.
  388. * Since no mapping, return the paste instruction failure
  389. * to the user space.
  390. */
  391. ret = do_fail_paste();
  392. /*
  393. * The user space can retry several times until success (needed
  394. * for migration) or should fallback to SW compression or
  395. * manage with the existing open windows if available.
  396. * Looking at sysfs interface, it can determine whether these
  397. * failures are coming during migration or core removal:
  398. * nr_used_credits > nr_total_credits when lost credits
  399. */
  400. if (!ret || (ret == -EAGAIN))
  401. return VM_FAULT_NOPAGE;
  402. return VM_FAULT_SIGBUS;
  403. }
  404. static const struct vm_operations_struct vas_vm_ops = {
  405. .fault = vas_mmap_fault,
  406. };
  407. static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
  408. {
  409. struct coproc_instance *cp_inst = fp->private_data;
  410. struct vas_window *txwin;
  411. unsigned long pfn;
  412. u64 paste_addr;
  413. pgprot_t prot;
  414. int rc;
  415. txwin = cp_inst->txwin;
  416. if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
  417. pr_debug("size 0x%zx, PAGE_SIZE 0x%zx\n",
  418. (vma->vm_end - vma->vm_start), PAGE_SIZE);
  419. return -EINVAL;
  420. }
  421. /* Ensure instance has an open send window */
  422. if (!txwin) {
  423. pr_err("No send window open?\n");
  424. return -EINVAL;
  425. }
  426. if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) {
  427. pr_err("VAS API is not registered\n");
  428. return -EACCES;
  429. }
  430. /*
  431. * The initial mmap is done after the window is opened
  432. * with ioctl. But before mmap(), this window can be closed in
  433. * the hypervisor due to lost credit (core removal on pseries).
  434. * So if the window is not active, return mmap() failure with
  435. * -EACCES and expects the user space reissue mmap() when it
  436. * is active again or open new window when the credit is available.
  437. * mmap_mutex protects the paste address mmap() with DLPAR
  438. * close/open event and allows mmap() only when the window is
  439. * active.
  440. */
  441. mutex_lock(&txwin->task_ref.mmap_mutex);
  442. if (txwin->status != VAS_WIN_ACTIVE) {
  443. pr_err("Window is not active\n");
  444. rc = -EACCES;
  445. goto out;
  446. }
  447. paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
  448. if (!paste_addr) {
  449. pr_err("Window paste address failed\n");
  450. rc = -EINVAL;
  451. goto out;
  452. }
  453. pfn = paste_addr >> PAGE_SHIFT;
  454. /* flags, page_prot from cxl_mmap(), except we want cachable */
  455. vm_flags_set(vma, VM_IO | VM_PFNMAP);
  456. vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
  457. prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
  458. rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
  459. vma->vm_end - vma->vm_start, prot);
  460. pr_devel("paste addr %llx at %lx, rc %d\n", paste_addr,
  461. vma->vm_start, rc);
  462. txwin->task_ref.vma = vma;
  463. vma->vm_ops = &vas_vm_ops;
  464. out:
  465. mutex_unlock(&txwin->task_ref.mmap_mutex);
  466. return rc;
  467. }
  468. static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
  469. {
  470. switch (cmd) {
  471. case VAS_TX_WIN_OPEN:
  472. return coproc_ioc_tx_win_open(fp, arg);
  473. default:
  474. return -EINVAL;
  475. }
  476. }
  477. static struct file_operations coproc_fops = {
  478. .open = coproc_open,
  479. .release = coproc_release,
  480. .mmap = coproc_mmap,
  481. .unlocked_ioctl = coproc_ioctl,
  482. };
  483. /*
  484. * Supporting only nx-gzip coprocessor type now, but this API code
  485. * extended to other coprocessor types later.
  486. */
  487. int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
  488. const char *name,
  489. const struct vas_user_win_ops *vops)
  490. {
  491. int rc = -EINVAL;
  492. dev_t devno;
  493. rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
  494. if (rc) {
  495. pr_err("Unable to allocate coproc major number: %i\n", rc);
  496. return rc;
  497. }
  498. pr_devel("%s device allocated, dev [%i,%i]\n", name,
  499. MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
  500. coproc_device.class = class_create(mod, name);
  501. if (IS_ERR(coproc_device.class)) {
  502. rc = PTR_ERR(coproc_device.class);
  503. pr_err("Unable to create %s class %d\n", name, rc);
  504. goto err_class;
  505. }
  506. coproc_device.class->devnode = coproc_devnode;
  507. coproc_device.cop_type = cop_type;
  508. coproc_device.vops = vops;
  509. coproc_fops.owner = mod;
  510. cdev_init(&coproc_device.cdev, &coproc_fops);
  511. devno = MKDEV(MAJOR(coproc_device.devt), 0);
  512. rc = cdev_add(&coproc_device.cdev, devno, 1);
  513. if (rc) {
  514. pr_err("cdev_add() failed %d\n", rc);
  515. goto err_cdev;
  516. }
  517. coproc_device.device = device_create(coproc_device.class, NULL,
  518. devno, NULL, name, MINOR(devno));
  519. if (IS_ERR(coproc_device.device)) {
  520. rc = PTR_ERR(coproc_device.device);
  521. pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
  522. goto err;
  523. }
  524. pr_devel("Added dev [%d,%d]\n", MAJOR(devno), MINOR(devno));
  525. return 0;
  526. err:
  527. cdev_del(&coproc_device.cdev);
  528. err_cdev:
  529. class_destroy(coproc_device.class);
  530. err_class:
  531. unregister_chrdev_region(coproc_device.devt, 1);
  532. return rc;
  533. }
  534. void vas_unregister_coproc_api(void)
  535. {
  536. dev_t devno;
  537. cdev_del(&coproc_device.cdev);
  538. devno = MKDEV(MAJOR(coproc_device.devt), 0);
  539. device_destroy(coproc_device.class, devno);
  540. class_destroy(coproc_device.class);
  541. unregister_chrdev_region(coproc_device.devt, 1);
  542. }