vas.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright 2020-21 IBM Corp.
  4. */
  5. #define pr_fmt(fmt) "vas: " fmt
  6. #include <linux/module.h>
  7. #include <linux/kernel.h>
  8. #include <linux/export.h>
  9. #include <linux/types.h>
  10. #include <linux/delay.h>
  11. #include <linux/slab.h>
  12. #include <linux/interrupt.h>
  13. #include <linux/irqdomain.h>
  14. #include <asm/machdep.h>
  15. #include <asm/hvcall.h>
  16. #include <asm/plpar_wrappers.h>
  17. #include <asm/firmware.h>
  18. #include <asm/vas.h>
  19. #include "vas.h"
  20. #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
  21. #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
  22. /* The hypervisor allows one credit per window right now */
  23. #define DEF_WIN_CREDS 1
  24. static struct vas_all_caps caps_all;
  25. static bool copypaste_feat;
  26. static struct hv_vas_cop_feat_caps hv_cop_caps;
  27. static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
  28. static DEFINE_MUTEX(vas_pseries_mutex);
  29. static bool migration_in_progress;
  30. static long hcall_return_busy_check(long rc)
  31. {
  32. /* Check if we are stalled for some time */
  33. if (H_IS_LONG_BUSY(rc)) {
  34. msleep(get_longbusy_msecs(rc));
  35. rc = H_BUSY;
  36. } else if (rc == H_BUSY) {
  37. cond_resched();
  38. }
  39. return rc;
  40. }
  41. /*
  42. * Allocate VAS window hcall
  43. */
  44. static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
  45. u8 wintype, u16 credits)
  46. {
  47. long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
  48. long rc;
  49. do {
  50. rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
  51. credits, domain[0], domain[1], domain[2],
  52. domain[3], domain[4], domain[5]);
  53. rc = hcall_return_busy_check(rc);
  54. } while (rc == H_BUSY);
  55. if (rc == H_SUCCESS) {
  56. if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
  57. pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
  58. return -ENOTSUPP;
  59. }
  60. win->vas_win.winid = retbuf[0];
  61. win->win_addr = retbuf[1];
  62. win->complete_irq = retbuf[2];
  63. win->fault_irq = retbuf[3];
  64. return 0;
  65. }
  66. pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
  67. rc, wintype, credits);
  68. return -EIO;
  69. }
  70. /*
  71. * Deallocate VAS window hcall.
  72. */
  73. static int h_deallocate_vas_window(u64 winid)
  74. {
  75. long rc;
  76. do {
  77. rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
  78. rc = hcall_return_busy_check(rc);
  79. } while (rc == H_BUSY);
  80. if (rc == H_SUCCESS)
  81. return 0;
  82. pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
  83. rc, winid);
  84. return -EIO;
  85. }
  86. /*
  87. * Modify VAS window.
  88. * After the window is opened with allocate window hcall, configure it
  89. * with flags and LPAR PID before using.
  90. */
  91. static int h_modify_vas_window(struct pseries_vas_window *win)
  92. {
  93. long rc;
  94. /*
  95. * AMR value is not supported in Linux VAS implementation.
  96. * The hypervisor ignores it if 0 is passed.
  97. */
  98. do {
  99. rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
  100. win->vas_win.winid, win->pid, 0,
  101. VAS_MOD_WIN_FLAGS, 0);
  102. rc = hcall_return_busy_check(rc);
  103. } while (rc == H_BUSY);
  104. if (rc == H_SUCCESS)
  105. return 0;
  106. pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
  107. rc, win->vas_win.winid, win->pid);
  108. return -EIO;
  109. }
  110. /*
  111. * This hcall is used to determine the capabilities from the hypervisor.
  112. * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
  113. * @query_type: If 0 is passed, the hypervisor returns the overall
  114. * capabilities which provides all feature(s) that are
  115. * available. Then query the hypervisor to get the
  116. * corresponding capabilities for the specific feature.
  117. * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
  118. * and VAS GZIP Default capabilities.
  119. * H_QUERY_NX_CAPABILITIES provides NX GZIP
  120. * capabilities.
  121. * @result: Return buffer to save capabilities.
  122. */
  123. int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
  124. {
  125. long rc;
  126. rc = plpar_hcall_norets(hcall, query_type, result);
  127. if (rc == H_SUCCESS)
  128. return 0;
  129. /* H_FUNCTION means HV does not support VAS so don't print an error */
  130. if (rc != H_FUNCTION) {
  131. pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
  132. (hcall == H_QUERY_VAS_CAPABILITIES) ?
  133. "H_QUERY_VAS_CAPABILITIES" :
  134. "H_QUERY_NX_CAPABILITIES",
  135. rc, query_type, result);
  136. }
  137. return -EIO;
  138. }
  139. EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
  140. /*
  141. * hcall to get fault CRB from the hypervisor.
  142. */
  143. static int h_get_nx_fault(u32 winid, u64 buffer)
  144. {
  145. long rc;
  146. rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
  147. if (rc == H_SUCCESS)
  148. return 0;
  149. pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
  150. rc, winid, buffer);
  151. return -EIO;
  152. }
  153. /*
  154. * Handle the fault interrupt.
  155. * When the fault interrupt is received for each window, query the
  156. * hypervisor to get the fault CRB on the specific fault. Then
  157. * process the CRB by updating CSB or send signal if the user space
  158. * CSB is invalid.
  159. * Note: The hypervisor forwards an interrupt for each fault request.
  160. * So one fault CRB to process for each H_GET_NX_FAULT hcall.
  161. */
  162. static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
  163. {
  164. struct pseries_vas_window *txwin = data;
  165. struct coprocessor_request_block crb;
  166. struct vas_user_win_ref *tsk_ref;
  167. int rc;
  168. while (atomic_read(&txwin->pending_faults)) {
  169. rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
  170. if (!rc) {
  171. tsk_ref = &txwin->vas_win.task_ref;
  172. vas_dump_crb(&crb);
  173. vas_update_csb(&crb, tsk_ref);
  174. }
  175. atomic_dec(&txwin->pending_faults);
  176. }
  177. return IRQ_HANDLED;
  178. }
  179. /*
  180. * irq_default_primary_handler() can be used only with IRQF_ONESHOT
  181. * which disables IRQ before executing the thread handler and enables
  182. * it after. But this disabling interrupt sets the VAS IRQ OFF
  183. * state in the hypervisor. If the NX generates fault interrupt
  184. * during this window, the hypervisor will not deliver this
  185. * interrupt to the LPAR. So use VAS specific IRQ handler instead
  186. * of calling the default primary handler.
  187. */
  188. static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
  189. {
  190. struct pseries_vas_window *txwin = data;
  191. /*
  192. * The thread hanlder will process this interrupt if it is
  193. * already running.
  194. */
  195. atomic_inc(&txwin->pending_faults);
  196. return IRQ_WAKE_THREAD;
  197. }
  198. /*
  199. * Allocate window and setup IRQ mapping.
  200. */
  201. static int allocate_setup_window(struct pseries_vas_window *txwin,
  202. u64 *domain, u8 wintype)
  203. {
  204. int rc;
  205. rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
  206. if (rc)
  207. return rc;
  208. /*
  209. * On PowerVM, the hypervisor setup and forwards the fault
  210. * interrupt per window. So the IRQ setup and fault handling
  211. * will be done for each open window separately.
  212. */
  213. txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
  214. if (!txwin->fault_virq) {
  215. pr_err("Failed irq mapping %d\n", txwin->fault_irq);
  216. rc = -EINVAL;
  217. goto out_win;
  218. }
  219. txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
  220. txwin->vas_win.winid);
  221. if (!txwin->name) {
  222. rc = -ENOMEM;
  223. goto out_irq;
  224. }
  225. rc = request_threaded_irq(txwin->fault_virq,
  226. pseries_vas_irq_handler,
  227. pseries_vas_fault_thread_fn, 0,
  228. txwin->name, txwin);
  229. if (rc) {
  230. pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
  231. txwin->vas_win.winid, txwin->fault_virq, rc);
  232. goto out_free;
  233. }
  234. txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
  235. return 0;
  236. out_free:
  237. kfree(txwin->name);
  238. out_irq:
  239. irq_dispose_mapping(txwin->fault_virq);
  240. out_win:
  241. h_deallocate_vas_window(txwin->vas_win.winid);
  242. return rc;
  243. }
  244. static inline void free_irq_setup(struct pseries_vas_window *txwin)
  245. {
  246. free_irq(txwin->fault_virq, txwin);
  247. kfree(txwin->name);
  248. irq_dispose_mapping(txwin->fault_virq);
  249. }
  250. static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
  251. enum vas_cop_type cop_type)
  252. {
  253. long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
  254. struct vas_cop_feat_caps *cop_feat_caps;
  255. struct vas_caps *caps;
  256. struct pseries_vas_window *txwin;
  257. int rc;
  258. txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
  259. if (!txwin)
  260. return ERR_PTR(-ENOMEM);
  261. /*
  262. * A VAS window can have many credits which means that many
  263. * requests can be issued simultaneously. But the hypervisor
  264. * restricts one credit per window.
  265. * The hypervisor introduces 2 different types of credits:
  266. * Default credit type (Uses normal priority FIFO):
  267. * A limited number of credits are assigned to partitions
  268. * based on processor entitlement. But these credits may be
  269. * over-committed on a system depends on whether the CPUs
  270. * are in shared or dedicated modes - that is, more requests
  271. * may be issued across the system than NX can service at
  272. * once which can result in paste command failure (RMA_busy).
  273. * Then the process has to resend requests or fall-back to
  274. * SW compression.
  275. * Quality of Service (QoS) credit type (Uses high priority FIFO):
  276. * To avoid NX HW contention, the system admins can assign
  277. * QoS credits for each LPAR so that this partition is
  278. * guaranteed access to NX resources. These credits are
  279. * assigned to partitions via the HMC.
  280. * Refer PAPR for more information.
  281. *
  282. * Allocate window with QoS credits if user requested. Otherwise
  283. * default credits are used.
  284. */
  285. if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
  286. caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
  287. else
  288. caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
  289. cop_feat_caps = &caps->caps;
  290. if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
  291. atomic_read(&cop_feat_caps->nr_total_credits)) {
  292. pr_err_ratelimited("Credits are not available to allocate window\n");
  293. rc = -EINVAL;
  294. goto out;
  295. }
  296. if (vas_id == -1) {
  297. /*
  298. * The user space is requesting to allocate a window on
  299. * a VAS instance where the process is executing.
  300. * On PowerVM, domain values are passed to the hypervisor
  301. * to select VAS instance. Useful if the process is
  302. * affinity to NUMA node.
  303. * The hypervisor selects VAS instance if
  304. * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
  305. * The h_allocate_vas_window hcall is defined to take a
  306. * domain values as specified by h_home_node_associativity,
  307. * So no unpacking needs to be done.
  308. */
  309. rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
  310. VPHN_FLAG_VCPU, hard_smp_processor_id());
  311. if (rc != H_SUCCESS) {
  312. pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
  313. goto out;
  314. }
  315. }
  316. txwin->pid = mfspr(SPRN_PID);
  317. /*
  318. * Allocate / Deallocate window hcalls and setup / free IRQs
  319. * have to be protected with mutex.
  320. * Open VAS window: Allocate window hcall and setup IRQ
  321. * Close VAS window: Deallocate window hcall and free IRQ
  322. * The hypervisor waits until all NX requests are
  323. * completed before closing the window. So expects OS
  324. * to handle NX faults, means IRQ can be freed only
  325. * after the deallocate window hcall is returned.
  326. * So once the window is closed with deallocate hcall before
  327. * the IRQ is freed, it can be assigned to new allocate
  328. * hcall with the same fault IRQ by the hypervisor. It can
  329. * result in setup IRQ fail for the new window since the
  330. * same fault IRQ is not freed by the OS before.
  331. */
  332. mutex_lock(&vas_pseries_mutex);
  333. if (migration_in_progress)
  334. rc = -EBUSY;
  335. else
  336. rc = allocate_setup_window(txwin, (u64 *)&domain[0],
  337. cop_feat_caps->win_type);
  338. mutex_unlock(&vas_pseries_mutex);
  339. if (rc)
  340. goto out;
  341. /*
  342. * Modify window and it is ready to use.
  343. */
  344. rc = h_modify_vas_window(txwin);
  345. if (!rc)
  346. rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
  347. if (rc)
  348. goto out_free;
  349. txwin->win_type = cop_feat_caps->win_type;
  350. mutex_lock(&vas_pseries_mutex);
  351. /*
  352. * Possible to lose the acquired credit with DLPAR core
  353. * removal after the window is opened. So if there are any
  354. * closed windows (means with lost credits), do not give new
  355. * window to user space. New windows will be opened only
  356. * after the existing windows are reopened when credits are
  357. * available.
  358. */
  359. if (!caps->nr_close_wins) {
  360. list_add(&txwin->win_list, &caps->list);
  361. caps->nr_open_windows++;
  362. mutex_unlock(&vas_pseries_mutex);
  363. vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
  364. return &txwin->vas_win;
  365. }
  366. mutex_unlock(&vas_pseries_mutex);
  367. put_vas_user_win_ref(&txwin->vas_win.task_ref);
  368. rc = -EBUSY;
  369. pr_err_ratelimited("No credit is available to allocate window\n");
  370. out_free:
  371. /*
  372. * Window is not operational. Free IRQ before closing
  373. * window so that do not have to hold mutex.
  374. */
  375. free_irq_setup(txwin);
  376. h_deallocate_vas_window(txwin->vas_win.winid);
  377. out:
  378. atomic_dec(&cop_feat_caps->nr_used_credits);
  379. kfree(txwin);
  380. return ERR_PTR(rc);
  381. }
  382. static u64 vas_paste_address(struct vas_window *vwin)
  383. {
  384. struct pseries_vas_window *win;
  385. win = container_of(vwin, struct pseries_vas_window, vas_win);
  386. return win->win_addr;
  387. }
  388. static int deallocate_free_window(struct pseries_vas_window *win)
  389. {
  390. int rc = 0;
  391. /*
  392. * The hypervisor waits for all requests including faults
  393. * are processed before closing the window - Means all
  394. * credits have to be returned. In the case of fault
  395. * request, a credit is returned after OS issues
  396. * H_GET_NX_FAULT hcall.
  397. * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
  398. * hcall.
  399. */
  400. rc = h_deallocate_vas_window(win->vas_win.winid);
  401. if (!rc)
  402. free_irq_setup(win);
  403. return rc;
  404. }
  405. static int vas_deallocate_window(struct vas_window *vwin)
  406. {
  407. struct pseries_vas_window *win;
  408. struct vas_cop_feat_caps *caps;
  409. int rc = 0;
  410. if (!vwin)
  411. return -EINVAL;
  412. win = container_of(vwin, struct pseries_vas_window, vas_win);
  413. /* Should not happen */
  414. if (win->win_type >= VAS_MAX_FEAT_TYPE) {
  415. pr_err("Window (%u): Invalid window type %u\n",
  416. vwin->winid, win->win_type);
  417. return -EINVAL;
  418. }
  419. caps = &vascaps[win->win_type].caps;
  420. mutex_lock(&vas_pseries_mutex);
  421. /*
  422. * VAS window is already closed in the hypervisor when
  423. * lost the credit or with migration. So just remove the entry
  424. * from the list, remove task references and free vas_window
  425. * struct.
  426. */
  427. if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
  428. !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
  429. rc = deallocate_free_window(win);
  430. if (rc) {
  431. mutex_unlock(&vas_pseries_mutex);
  432. return rc;
  433. }
  434. } else
  435. vascaps[win->win_type].nr_close_wins--;
  436. list_del(&win->win_list);
  437. atomic_dec(&caps->nr_used_credits);
  438. vascaps[win->win_type].nr_open_windows--;
  439. mutex_unlock(&vas_pseries_mutex);
  440. mm_context_remove_vas_window(vwin->task_ref.mm);
  441. put_vas_user_win_ref(&vwin->task_ref);
  442. kfree(win);
  443. return 0;
  444. }
  445. static const struct vas_user_win_ops vops_pseries = {
  446. .open_win = vas_allocate_window, /* Open and configure window */
  447. .paste_addr = vas_paste_address, /* To do copy/paste */
  448. .close_win = vas_deallocate_window, /* Close window */
  449. };
  450. /*
  451. * Supporting only nx-gzip coprocessor type now, but this API code
  452. * extended to other coprocessor types later.
  453. */
  454. int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
  455. const char *name)
  456. {
  457. if (!copypaste_feat)
  458. return -ENOTSUPP;
  459. return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
  460. }
  461. EXPORT_SYMBOL_GPL(vas_register_api_pseries);
  462. void vas_unregister_api_pseries(void)
  463. {
  464. vas_unregister_coproc_api();
  465. }
  466. EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
  467. /*
  468. * Get the specific capabilities based on the feature type.
  469. * Right now supports GZIP default and GZIP QoS capabilities.
  470. */
  471. static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
  472. struct hv_vas_cop_feat_caps *hv_caps)
  473. {
  474. struct vas_cop_feat_caps *caps;
  475. struct vas_caps *vcaps;
  476. int rc = 0;
  477. vcaps = &vascaps[type];
  478. memset(vcaps, 0, sizeof(*vcaps));
  479. INIT_LIST_HEAD(&vcaps->list);
  480. vcaps->feat = feat;
  481. caps = &vcaps->caps;
  482. rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
  483. (u64)virt_to_phys(hv_caps));
  484. if (rc)
  485. return rc;
  486. caps->user_mode = hv_caps->user_mode;
  487. if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
  488. pr_err("User space COPY/PASTE is not supported\n");
  489. return -ENOTSUPP;
  490. }
  491. caps->descriptor = be64_to_cpu(hv_caps->descriptor);
  492. caps->win_type = hv_caps->win_type;
  493. if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
  494. pr_err("Unsupported window type %u\n", caps->win_type);
  495. return -EINVAL;
  496. }
  497. caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
  498. caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
  499. atomic_set(&caps->nr_total_credits,
  500. be16_to_cpu(hv_caps->target_lpar_creds));
  501. if (feat == VAS_GZIP_DEF_FEAT) {
  502. caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
  503. if (caps->max_win_creds < DEF_WIN_CREDS) {
  504. pr_err("Window creds(%u) > max allowed window creds(%u)\n",
  505. DEF_WIN_CREDS, caps->max_win_creds);
  506. return -EINVAL;
  507. }
  508. }
  509. rc = sysfs_add_vas_caps(caps);
  510. if (rc)
  511. return rc;
  512. copypaste_feat = true;
  513. return 0;
  514. }
  515. /*
  516. * VAS windows can be closed due to lost credits when the core is
  517. * removed. So reopen them if credits are available due to DLPAR
  518. * core add and set the window active status. When NX sees the page
  519. * fault on the unmapped paste address, the kernel handles the fault
  520. * by setting the remapping to new paste address if the window is
  521. * active.
  522. */
  523. static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
  524. bool migrate)
  525. {
  526. long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
  527. struct vas_cop_feat_caps *caps = &vcaps->caps;
  528. struct pseries_vas_window *win = NULL, *tmp;
  529. int rc, mv_ents = 0;
  530. int flag;
  531. /*
  532. * Nothing to do if there are no closed windows.
  533. */
  534. if (!vcaps->nr_close_wins)
  535. return 0;
  536. /*
  537. * For the core removal, the hypervisor reduces the credits
  538. * assigned to the LPAR and the kernel closes VAS windows
  539. * in the hypervisor depends on reduced credits. The kernel
  540. * uses LIFO (the last windows that are opened will be closed
  541. * first) and expects to open in the same order when credits
  542. * are available.
  543. * For example, 40 windows are closed when the LPAR lost 2 cores
  544. * (dedicated). If 1 core is added, this LPAR can have 20 more
  545. * credits. It means the kernel can reopen 20 windows. So move
  546. * 20 entries in the VAS windows lost and reopen next 20 windows.
  547. * For partition migration, reopen all windows that are closed
  548. * during resume.
  549. */
  550. if ((vcaps->nr_close_wins > creds) && !migrate)
  551. mv_ents = vcaps->nr_close_wins - creds;
  552. list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
  553. if (!mv_ents)
  554. break;
  555. mv_ents--;
  556. }
  557. /*
  558. * Open windows if they are closed only with migration or
  559. * DLPAR (lost credit) before.
  560. */
  561. if (migrate)
  562. flag = VAS_WIN_MIGRATE_CLOSE;
  563. else
  564. flag = VAS_WIN_NO_CRED_CLOSE;
  565. list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
  566. /*
  567. * This window is closed with DLPAR and migration events.
  568. * So reopen the window with the last event.
  569. * The user space is not suspended with the current
  570. * migration notifier. So the user space can issue DLPAR
  571. * CPU hotplug while migration in progress. In this case
  572. * this window will be opened with the last event.
  573. */
  574. if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
  575. (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
  576. win->vas_win.status &= ~flag;
  577. continue;
  578. }
  579. /*
  580. * Nothing to do on this window if it is not closed
  581. * with this flag
  582. */
  583. if (!(win->vas_win.status & flag))
  584. continue;
  585. rc = allocate_setup_window(win, (u64 *)&domain[0],
  586. caps->win_type);
  587. if (rc)
  588. return rc;
  589. rc = h_modify_vas_window(win);
  590. if (rc)
  591. goto out;
  592. mutex_lock(&win->vas_win.task_ref.mmap_mutex);
  593. /*
  594. * Set window status to active
  595. */
  596. win->vas_win.status &= ~flag;
  597. mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
  598. win->win_type = caps->win_type;
  599. if (!--vcaps->nr_close_wins)
  600. break;
  601. }
  602. return 0;
  603. out:
  604. /*
  605. * Window modify HCALL failed. So close the window to the
  606. * hypervisor and return.
  607. */
  608. free_irq_setup(win);
  609. h_deallocate_vas_window(win->vas_win.winid);
  610. return rc;
  611. }
  612. /*
  613. * The hypervisor reduces the available credits if the LPAR lost core. It
  614. * means the excessive windows should not be active and the user space
  615. * should not be using these windows to send compression requests to NX.
  616. * So the kernel closes the excessive windows and unmap the paste address
  617. * such that the user space receives paste instruction failure. Then up to
  618. * the user space to fall back to SW compression and manage with the
  619. * existing windows.
  620. */
  621. static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
  622. bool migrate)
  623. {
  624. struct pseries_vas_window *win, *tmp;
  625. struct vas_user_win_ref *task_ref;
  626. struct vm_area_struct *vma;
  627. int rc = 0, flag;
  628. if (migrate)
  629. flag = VAS_WIN_MIGRATE_CLOSE;
  630. else
  631. flag = VAS_WIN_NO_CRED_CLOSE;
  632. list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
  633. /*
  634. * This window is already closed due to lost credit
  635. * or for migration before. Go for next window.
  636. * For migration, nothing to do since this window
  637. * closed for DLPAR and will be reopened even on
  638. * the destination system with other DLPAR operation.
  639. */
  640. if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
  641. (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
  642. win->vas_win.status |= flag;
  643. continue;
  644. }
  645. task_ref = &win->vas_win.task_ref;
  646. /*
  647. * VAS mmap (coproc_mmap()) and its fault handler
  648. * (vas_mmap_fault()) are called after holding mmap lock.
  649. * So hold mmap mutex after mmap_lock to avoid deadlock.
  650. */
  651. mmap_write_lock(task_ref->mm);
  652. mutex_lock(&task_ref->mmap_mutex);
  653. vma = task_ref->vma;
  654. /*
  655. * Number of available credits are reduced, So select
  656. * and close windows.
  657. */
  658. win->vas_win.status |= flag;
  659. /*
  660. * vma is set in the original mapping. But this mapping
  661. * is done with mmap() after the window is opened with ioctl.
  662. * so we may not see the original mapping if the core remove
  663. * is done before the original mmap() and after the ioctl.
  664. */
  665. if (vma)
  666. zap_page_range(vma, vma->vm_start,
  667. vma->vm_end - vma->vm_start);
  668. mutex_unlock(&task_ref->mmap_mutex);
  669. mmap_write_unlock(task_ref->mm);
  670. /*
  671. * Close VAS window in the hypervisor, but do not
  672. * free vas_window struct since it may be reused
  673. * when the credit is available later (DLPAR with
  674. * adding cores). This struct will be used
  675. * later when the process issued with close(FD).
  676. */
  677. rc = deallocate_free_window(win);
  678. /*
  679. * This failure is from the hypervisor.
  680. * No way to stop migration for these failures.
  681. * So ignore error and continue closing other windows.
  682. */
  683. if (rc && !migrate)
  684. return rc;
  685. vcap->nr_close_wins++;
  686. /*
  687. * For migration, do not depend on lpar_creds in case if
  688. * mismatch with the hypervisor value (should not happen).
  689. * So close all active windows in the list and will be
  690. * reopened windows based on the new lpar_creds on the
  691. * destination system during resume.
  692. */
  693. if (!migrate && !--excess_creds)
  694. break;
  695. }
  696. return 0;
  697. }
  698. /*
  699. * Get new VAS capabilities when the core add/removal configuration
  700. * changes. Reconfig window configurations based on the credits
  701. * availability from this new capabilities.
  702. */
  703. int vas_reconfig_capabilties(u8 type, int new_nr_creds)
  704. {
  705. struct vas_cop_feat_caps *caps;
  706. int old_nr_creds;
  707. struct vas_caps *vcaps;
  708. int rc = 0, nr_active_wins;
  709. if (type >= VAS_MAX_FEAT_TYPE) {
  710. pr_err("Invalid credit type %d\n", type);
  711. return -EINVAL;
  712. }
  713. vcaps = &vascaps[type];
  714. caps = &vcaps->caps;
  715. mutex_lock(&vas_pseries_mutex);
  716. old_nr_creds = atomic_read(&caps->nr_total_credits);
  717. atomic_set(&caps->nr_total_credits, new_nr_creds);
  718. /*
  719. * The total number of available credits may be decreased or
  720. * increased with DLPAR operation. Means some windows have to be
  721. * closed / reopened. Hold the vas_pseries_mutex so that the
  722. * user space can not open new windows.
  723. */
  724. if (old_nr_creds < new_nr_creds) {
  725. /*
  726. * If the existing target credits is less than the new
  727. * target, reopen windows if they are closed due to
  728. * the previous DLPAR (core removal).
  729. */
  730. rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
  731. false);
  732. } else {
  733. /*
  734. * # active windows is more than new LPAR available
  735. * credits. So close the excessive windows.
  736. * On pseries, each window will have 1 credit.
  737. */
  738. nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
  739. if (nr_active_wins > new_nr_creds)
  740. rc = reconfig_close_windows(vcaps,
  741. nr_active_wins - new_nr_creds,
  742. false);
  743. }
  744. mutex_unlock(&vas_pseries_mutex);
  745. return rc;
  746. }
  747. int pseries_vas_dlpar_cpu(void)
  748. {
  749. int new_nr_creds, rc;
  750. /*
  751. * NX-GZIP is not enabled. Nothing to do for DLPAR event
  752. */
  753. if (!copypaste_feat)
  754. return 0;
  755. rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
  756. vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
  757. (u64)virt_to_phys(&hv_cop_caps));
  758. if (!rc) {
  759. new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
  760. rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
  761. }
  762. if (rc)
  763. pr_err("Failed reconfig VAS capabilities with DLPAR\n");
  764. return rc;
  765. }
  766. /*
  767. * Total number of default credits available (target_credits)
  768. * in LPAR depends on number of cores configured. It varies based on
  769. * whether processors are in shared mode or dedicated mode.
  770. * Get the notifier when CPU configuration is changed with DLPAR
  771. * operation so that get the new target_credits (vas default capabilities)
  772. * and then update the existing windows usage if needed.
  773. */
  774. static int pseries_vas_notifier(struct notifier_block *nb,
  775. unsigned long action, void *data)
  776. {
  777. struct of_reconfig_data *rd = data;
  778. struct device_node *dn = rd->dn;
  779. const __be32 *intserv = NULL;
  780. int len;
  781. /*
  782. * For shared CPU partition, the hypervisor assigns total credits
  783. * based on entitled core capacity. So updating VAS windows will
  784. * be called from lparcfg_write().
  785. */
  786. if (is_shared_processor())
  787. return NOTIFY_OK;
  788. if ((action == OF_RECONFIG_ATTACH_NODE) ||
  789. (action == OF_RECONFIG_DETACH_NODE))
  790. intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
  791. &len);
  792. /*
  793. * Processor config is not changed
  794. */
  795. if (!intserv)
  796. return NOTIFY_OK;
  797. return pseries_vas_dlpar_cpu();
  798. }
  799. static struct notifier_block pseries_vas_nb = {
  800. .notifier_call = pseries_vas_notifier,
  801. };
  802. /*
  803. * For LPM, all windows have to be closed on the source partition
  804. * before migration and reopen them on the destination partition
  805. * after migration. So closing windows during suspend and
  806. * reopen them during resume.
  807. */
  808. int vas_migration_handler(int action)
  809. {
  810. struct vas_cop_feat_caps *caps;
  811. int old_nr_creds, new_nr_creds = 0;
  812. struct vas_caps *vcaps;
  813. int i, rc = 0;
  814. /*
  815. * NX-GZIP is not enabled. Nothing to do for migration.
  816. */
  817. if (!copypaste_feat)
  818. return rc;
  819. mutex_lock(&vas_pseries_mutex);
  820. if (action == VAS_SUSPEND)
  821. migration_in_progress = true;
  822. else
  823. migration_in_progress = false;
  824. for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
  825. vcaps = &vascaps[i];
  826. caps = &vcaps->caps;
  827. old_nr_creds = atomic_read(&caps->nr_total_credits);
  828. rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
  829. vcaps->feat,
  830. (u64)virt_to_phys(&hv_cop_caps));
  831. if (!rc) {
  832. new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
  833. /*
  834. * Should not happen. But incase print messages, close
  835. * all windows in the list during suspend and reopen
  836. * windows based on new lpar_creds on the destination
  837. * system.
  838. */
  839. if (old_nr_creds != new_nr_creds) {
  840. pr_err("Target credits mismatch with the hypervisor\n");
  841. pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
  842. action, old_nr_creds, new_nr_creds);
  843. pr_err("Used creds: %d, Active creds: %d\n",
  844. atomic_read(&caps->nr_used_credits),
  845. vcaps->nr_open_windows - vcaps->nr_close_wins);
  846. }
  847. } else {
  848. pr_err("state(%d): Get VAS capabilities failed with %d\n",
  849. action, rc);
  850. /*
  851. * We can not stop migration with the current lpm
  852. * implementation. So continue closing all windows in
  853. * the list (during suspend) and return without
  854. * opening windows (during resume) if VAS capabilities
  855. * HCALL failed.
  856. */
  857. if (action == VAS_RESUME)
  858. goto out;
  859. }
  860. switch (action) {
  861. case VAS_SUSPEND:
  862. rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
  863. true);
  864. break;
  865. case VAS_RESUME:
  866. atomic_set(&caps->nr_total_credits, new_nr_creds);
  867. rc = reconfig_open_windows(vcaps, new_nr_creds, true);
  868. break;
  869. default:
  870. /* should not happen */
  871. pr_err("Invalid migration action %d\n", action);
  872. rc = -EINVAL;
  873. goto out;
  874. }
  875. /*
  876. * Ignore errors during suspend and return for resume.
  877. */
  878. if (rc && (action == VAS_RESUME))
  879. goto out;
  880. }
  881. out:
  882. mutex_unlock(&vas_pseries_mutex);
  883. return rc;
  884. }
  885. static int __init pseries_vas_init(void)
  886. {
  887. struct hv_vas_all_caps *hv_caps;
  888. int rc = 0;
  889. /*
  890. * Linux supports user space COPY/PASTE only with Radix
  891. */
  892. if (!radix_enabled()) {
  893. copypaste_feat = false;
  894. pr_err("API is supported only with radix page tables\n");
  895. return -ENOTSUPP;
  896. }
  897. hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
  898. if (!hv_caps)
  899. return -ENOMEM;
  900. /*
  901. * Get VAS overall capabilities by passing 0 to feature type.
  902. */
  903. rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
  904. (u64)virt_to_phys(hv_caps));
  905. if (rc)
  906. goto out;
  907. caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
  908. caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
  909. sysfs_pseries_vas_init(&caps_all);
  910. /*
  911. * QOS capabilities available
  912. */
  913. if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
  914. rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
  915. VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
  916. if (rc)
  917. goto out;
  918. }
  919. /*
  920. * Default capabilities available
  921. */
  922. if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
  923. rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
  924. VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
  925. if (!rc && copypaste_feat) {
  926. if (firmware_has_feature(FW_FEATURE_LPAR))
  927. of_reconfig_notifier_register(&pseries_vas_nb);
  928. pr_info("GZIP feature is available\n");
  929. } else {
  930. /*
  931. * Should not happen, but only when get default
  932. * capabilities HCALL failed. So disable copy paste
  933. * feature.
  934. */
  935. copypaste_feat = false;
  936. }
  937. out:
  938. kfree(hv_caps);
  939. return rc;
  940. }
  941. machine_device_initcall(pseries, pseries_vas_init);