pv.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Hosting Protected Virtual Machines
  4. *
  5. * Copyright IBM Corp. 2019, 2020
  6. * Author(s): Janosch Frank <[email protected]>
  7. */
  8. #include <linux/kvm.h>
  9. #include <linux/kvm_host.h>
  10. #include <linux/minmax.h>
  11. #include <linux/pagemap.h>
  12. #include <linux/sched/signal.h>
  13. #include <asm/gmap.h>
  14. #include <asm/uv.h>
  15. #include <asm/mman.h>
  16. #include <linux/pagewalk.h>
  17. #include <linux/sched/mm.h>
  18. #include <linux/mmu_notifier.h>
  19. #include "kvm-s390.h"
  20. static void kvm_s390_clear_pv_state(struct kvm *kvm)
  21. {
  22. kvm->arch.pv.handle = 0;
  23. kvm->arch.pv.guest_len = 0;
  24. kvm->arch.pv.stor_base = 0;
  25. kvm->arch.pv.stor_var = NULL;
  26. }
  27. int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
  28. {
  29. int cc;
  30. if (!kvm_s390_pv_cpu_get_handle(vcpu))
  31. return 0;
  32. cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
  33. KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
  34. vcpu->vcpu_id, *rc, *rrc);
  35. WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
  36. /* Intended memory leak for something that should never happen. */
  37. if (!cc)
  38. free_pages(vcpu->arch.pv.stor_base,
  39. get_order(uv_info.guest_cpu_stor_len));
  40. free_page(sida_origin(vcpu->arch.sie_block));
  41. vcpu->arch.sie_block->pv_handle_cpu = 0;
  42. vcpu->arch.sie_block->pv_handle_config = 0;
  43. memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
  44. vcpu->arch.sie_block->sdf = 0;
  45. /*
  46. * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
  47. * Use the reset value of gbea to avoid leaking the kernel pointer of
  48. * the just freed sida.
  49. */
  50. vcpu->arch.sie_block->gbea = 1;
  51. kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
  52. return cc ? EIO : 0;
  53. }
  54. int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
  55. {
  56. struct uv_cb_csc uvcb = {
  57. .header.cmd = UVC_CMD_CREATE_SEC_CPU,
  58. .header.len = sizeof(uvcb),
  59. };
  60. int cc;
  61. if (kvm_s390_pv_cpu_get_handle(vcpu))
  62. return -EINVAL;
  63. vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
  64. get_order(uv_info.guest_cpu_stor_len));
  65. if (!vcpu->arch.pv.stor_base)
  66. return -ENOMEM;
  67. /* Input */
  68. uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
  69. uvcb.num = vcpu->arch.sie_block->icpua;
  70. uvcb.state_origin = (u64)vcpu->arch.sie_block;
  71. uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
  72. /* Alloc Secure Instruction Data Area Designation */
  73. vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
  74. if (!vcpu->arch.sie_block->sidad) {
  75. free_pages(vcpu->arch.pv.stor_base,
  76. get_order(uv_info.guest_cpu_stor_len));
  77. return -ENOMEM;
  78. }
  79. cc = uv_call(0, (u64)&uvcb);
  80. *rc = uvcb.header.rc;
  81. *rrc = uvcb.header.rrc;
  82. KVM_UV_EVENT(vcpu->kvm, 3,
  83. "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
  84. vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
  85. uvcb.header.rrc);
  86. if (cc) {
  87. u16 dummy;
  88. kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
  89. return -EIO;
  90. }
  91. /* Output */
  92. vcpu->arch.pv.handle = uvcb.cpu_handle;
  93. vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
  94. vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
  95. vcpu->arch.sie_block->sdf = 2;
  96. kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
  97. return 0;
  98. }
  99. /* only free resources when the destroy was successful */
  100. static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
  101. {
  102. vfree(kvm->arch.pv.stor_var);
  103. free_pages(kvm->arch.pv.stor_base,
  104. get_order(uv_info.guest_base_stor_len));
  105. kvm_s390_clear_pv_state(kvm);
  106. }
  107. static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
  108. {
  109. unsigned long base = uv_info.guest_base_stor_len;
  110. unsigned long virt = uv_info.guest_virt_var_stor_len;
  111. unsigned long npages = 0, vlen = 0;
  112. kvm->arch.pv.stor_var = NULL;
  113. kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
  114. if (!kvm->arch.pv.stor_base)
  115. return -ENOMEM;
  116. /*
  117. * Calculate current guest storage for allocation of the
  118. * variable storage, which is based on the length in MB.
  119. *
  120. * Slots are sorted by GFN
  121. */
  122. mutex_lock(&kvm->slots_lock);
  123. npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
  124. mutex_unlock(&kvm->slots_lock);
  125. kvm->arch.pv.guest_len = npages * PAGE_SIZE;
  126. /* Allocate variable storage */
  127. vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
  128. vlen += uv_info.guest_virt_base_stor_len;
  129. kvm->arch.pv.stor_var = vzalloc(vlen);
  130. if (!kvm->arch.pv.stor_var)
  131. goto out_err;
  132. return 0;
  133. out_err:
  134. kvm_s390_pv_dealloc_vm(kvm);
  135. return -ENOMEM;
  136. }
  137. /* this should not fail, but if it does, we must not free the donated memory */
  138. int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
  139. {
  140. int cc;
  141. cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
  142. UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
  143. WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
  144. /*
  145. * if the mm still has a mapping, make all its pages accessible
  146. * before destroying the guest
  147. */
  148. if (mmget_not_zero(kvm->mm)) {
  149. s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
  150. mmput(kvm->mm);
  151. }
  152. if (!cc) {
  153. atomic_dec(&kvm->mm->context.protected_count);
  154. kvm_s390_pv_dealloc_vm(kvm);
  155. } else {
  156. /* Intended memory leak on "impossible" error */
  157. s390_replace_asce(kvm->arch.gmap);
  158. }
  159. KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
  160. WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
  161. return cc ? -EIO : 0;
  162. }
  163. static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
  164. struct mm_struct *mm)
  165. {
  166. struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
  167. u16 dummy;
  168. /*
  169. * No locking is needed since this is the last thread of the last user of this
  170. * struct mm.
  171. * When the struct kvm gets deinitialized, this notifier is also
  172. * unregistered. This means that if this notifier runs, then the
  173. * struct kvm is still valid.
  174. */
  175. kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
  176. }
  177. static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
  178. .release = kvm_s390_pv_mmu_notifier_release,
  179. };
  180. int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
  181. {
  182. struct uv_cb_cgc uvcb = {
  183. .header.cmd = UVC_CMD_CREATE_SEC_CONF,
  184. .header.len = sizeof(uvcb)
  185. };
  186. int cc, ret;
  187. u16 dummy;
  188. ret = kvm_s390_pv_alloc_vm(kvm);
  189. if (ret)
  190. return ret;
  191. /* Inputs */
  192. uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
  193. uvcb.guest_stor_len = kvm->arch.pv.guest_len;
  194. uvcb.guest_asce = kvm->arch.gmap->asce;
  195. uvcb.guest_sca = (unsigned long)kvm->arch.sca;
  196. uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
  197. uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
  198. cc = uv_call_sched(0, (u64)&uvcb);
  199. *rc = uvcb.header.rc;
  200. *rrc = uvcb.header.rrc;
  201. KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
  202. uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
  203. /* Outputs */
  204. kvm->arch.pv.handle = uvcb.guest_handle;
  205. atomic_inc(&kvm->mm->context.protected_count);
  206. if (cc) {
  207. if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
  208. kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
  209. } else {
  210. atomic_dec(&kvm->mm->context.protected_count);
  211. kvm_s390_pv_dealloc_vm(kvm);
  212. }
  213. return -EIO;
  214. }
  215. kvm->arch.gmap->guest_handle = uvcb.guest_handle;
  216. /* Add the notifier only once. No races because we hold kvm->lock */
  217. if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
  218. kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
  219. mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
  220. }
  221. return 0;
  222. }
  223. int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
  224. u16 *rrc)
  225. {
  226. struct uv_cb_ssc uvcb = {
  227. .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
  228. .header.len = sizeof(uvcb),
  229. .sec_header_origin = (u64)hdr,
  230. .sec_header_len = length,
  231. .guest_handle = kvm_s390_pv_get_handle(kvm),
  232. };
  233. int cc = uv_call(0, (u64)&uvcb);
  234. *rc = uvcb.header.rc;
  235. *rrc = uvcb.header.rrc;
  236. KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
  237. *rc, *rrc);
  238. return cc ? -EINVAL : 0;
  239. }
  240. static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
  241. u64 offset, u16 *rc, u16 *rrc)
  242. {
  243. struct uv_cb_unp uvcb = {
  244. .header.cmd = UVC_CMD_UNPACK_IMG,
  245. .header.len = sizeof(uvcb),
  246. .guest_handle = kvm_s390_pv_get_handle(kvm),
  247. .gaddr = addr,
  248. .tweak[0] = tweak,
  249. .tweak[1] = offset,
  250. };
  251. int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
  252. *rc = uvcb.header.rc;
  253. *rrc = uvcb.header.rrc;
  254. if (ret && ret != -EAGAIN)
  255. KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
  256. uvcb.gaddr, *rc, *rrc);
  257. return ret;
  258. }
  259. int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
  260. unsigned long tweak, u16 *rc, u16 *rrc)
  261. {
  262. u64 offset = 0;
  263. int ret = 0;
  264. if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
  265. return -EINVAL;
  266. KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
  267. addr, size);
  268. while (offset < size) {
  269. ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
  270. if (ret == -EAGAIN) {
  271. cond_resched();
  272. if (fatal_signal_pending(current))
  273. break;
  274. continue;
  275. }
  276. if (ret)
  277. break;
  278. addr += PAGE_SIZE;
  279. offset += PAGE_SIZE;
  280. }
  281. if (!ret)
  282. KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
  283. return ret;
  284. }
  285. int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
  286. {
  287. struct uv_cb_cpu_set_state uvcb = {
  288. .header.cmd = UVC_CMD_CPU_SET_STATE,
  289. .header.len = sizeof(uvcb),
  290. .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
  291. .state = state,
  292. };
  293. int cc;
  294. cc = uv_call(0, (u64)&uvcb);
  295. KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
  296. vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
  297. if (cc)
  298. return -EINVAL;
  299. return 0;
  300. }
  301. int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
  302. {
  303. struct uv_cb_dump_cpu uvcb = {
  304. .header.cmd = UVC_CMD_DUMP_CPU,
  305. .header.len = sizeof(uvcb),
  306. .cpu_handle = vcpu->arch.pv.handle,
  307. .dump_area_origin = (u64)buff,
  308. };
  309. int cc;
  310. cc = uv_call_sched(0, (u64)&uvcb);
  311. *rc = uvcb.header.rc;
  312. *rrc = uvcb.header.rrc;
  313. return cc;
  314. }
  315. /* Size of the cache for the storage state dump data. 1MB for now */
  316. #define DUMP_BUFF_LEN HPAGE_SIZE
  317. /**
  318. * kvm_s390_pv_dump_stor_state
  319. *
  320. * @kvm: pointer to the guest's KVM struct
  321. * @buff_user: Userspace pointer where we will write the results to
  322. * @gaddr: Starting absolute guest address for which the storage state
  323. * is requested.
  324. * @buff_user_len: Length of the buff_user buffer
  325. * @rc: Pointer to where the uvcb return code is stored
  326. * @rrc: Pointer to where the uvcb return reason code is stored
  327. *
  328. * Stores buff_len bytes of tweak component values to buff_user
  329. * starting with the 1MB block specified by the absolute guest address
  330. * (gaddr). The gaddr pointer will be updated with the last address
  331. * for which data was written when returning to userspace. buff_user
  332. * might be written to even if an error rc is returned. For instance
  333. * if we encounter a fault after writing the first page of data.
  334. *
  335. * Context: kvm->lock needs to be held
  336. *
  337. * Return:
  338. * 0 on success
  339. * -ENOMEM if allocating the cache fails
  340. * -EINVAL if gaddr is not aligned to 1MB
  341. * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
  342. * -EINVAL if the UV call fails, rc and rrc will be set in this case
  343. * -EFAULT if copying the result to buff_user failed
  344. */
  345. int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
  346. u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
  347. {
  348. struct uv_cb_dump_stor_state uvcb = {
  349. .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
  350. .header.len = sizeof(uvcb),
  351. .config_handle = kvm->arch.pv.handle,
  352. .gaddr = *gaddr,
  353. .dump_area_origin = 0,
  354. };
  355. const u64 increment_len = uv_info.conf_dump_storage_state_len;
  356. size_t buff_kvm_size;
  357. size_t size_done = 0;
  358. u8 *buff_kvm = NULL;
  359. int cc, ret;
  360. ret = -EINVAL;
  361. /* UV call processes 1MB guest storage chunks at a time */
  362. if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
  363. goto out;
  364. /*
  365. * We provide the storage state for 1MB chunks of guest
  366. * storage. The buffer will need to be aligned to
  367. * conf_dump_storage_state_len so we don't end on a partial
  368. * chunk.
  369. */
  370. if (!buff_user_len ||
  371. !IS_ALIGNED(buff_user_len, increment_len))
  372. goto out;
  373. /*
  374. * Allocate a buffer from which we will later copy to the user
  375. * process. We don't want userspace to dictate our buffer size
  376. * so we limit it to DUMP_BUFF_LEN.
  377. */
  378. ret = -ENOMEM;
  379. buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
  380. buff_kvm = vzalloc(buff_kvm_size);
  381. if (!buff_kvm)
  382. goto out;
  383. ret = 0;
  384. uvcb.dump_area_origin = (u64)buff_kvm;
  385. /* We will loop until the user buffer is filled or an error occurs */
  386. do {
  387. /* Get 1MB worth of guest storage state data */
  388. cc = uv_call_sched(0, (u64)&uvcb);
  389. /* All or nothing */
  390. if (cc) {
  391. ret = -EINVAL;
  392. break;
  393. }
  394. size_done += increment_len;
  395. uvcb.dump_area_origin += increment_len;
  396. buff_user_len -= increment_len;
  397. uvcb.gaddr += HPAGE_SIZE;
  398. /* KVM Buffer full, time to copy to the process */
  399. if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
  400. if (copy_to_user(buff_user, buff_kvm, size_done)) {
  401. ret = -EFAULT;
  402. break;
  403. }
  404. buff_user += size_done;
  405. size_done = 0;
  406. uvcb.dump_area_origin = (u64)buff_kvm;
  407. }
  408. } while (buff_user_len);
  409. /* Report back where we ended dumping */
  410. *gaddr = uvcb.gaddr;
  411. /* Lets only log errors, we don't want to spam */
  412. out:
  413. if (ret)
  414. KVM_UV_EVENT(kvm, 3,
  415. "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
  416. uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
  417. *rc = uvcb.header.rc;
  418. *rrc = uvcb.header.rrc;
  419. vfree(buff_kvm);
  420. return ret;
  421. }
  422. /**
  423. * kvm_s390_pv_dump_complete
  424. *
  425. * @kvm: pointer to the guest's KVM struct
  426. * @buff_user: Userspace pointer where we will write the results to
  427. * @rc: Pointer to where the uvcb return code is stored
  428. * @rrc: Pointer to where the uvcb return reason code is stored
  429. *
  430. * Completes the dumping operation and writes the completion data to
  431. * user space.
  432. *
  433. * Context: kvm->lock needs to be held
  434. *
  435. * Return:
  436. * 0 on success
  437. * -ENOMEM if allocating the completion buffer fails
  438. * -EINVAL if the UV call fails, rc and rrc will be set in this case
  439. * -EFAULT if copying the result to buff_user failed
  440. */
  441. int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
  442. u16 *rc, u16 *rrc)
  443. {
  444. struct uv_cb_dump_complete complete = {
  445. .header.len = sizeof(complete),
  446. .header.cmd = UVC_CMD_DUMP_COMPLETE,
  447. .config_handle = kvm_s390_pv_get_handle(kvm),
  448. };
  449. u64 *compl_data;
  450. int ret;
  451. /* Allocate dump area */
  452. compl_data = vzalloc(uv_info.conf_dump_finalize_len);
  453. if (!compl_data)
  454. return -ENOMEM;
  455. complete.dump_area_origin = (u64)compl_data;
  456. ret = uv_call_sched(0, (u64)&complete);
  457. *rc = complete.header.rc;
  458. *rrc = complete.header.rrc;
  459. KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
  460. complete.header.rc, complete.header.rrc);
  461. if (!ret) {
  462. /*
  463. * kvm_s390_pv_dealloc_vm() will also (mem)set
  464. * this to false on a reboot or other destroy
  465. * operation for this vm.
  466. */
  467. kvm->arch.pv.dumping = false;
  468. kvm_s390_vcpu_unblock_all(kvm);
  469. ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
  470. if (ret)
  471. ret = -EFAULT;
  472. }
  473. vfree(compl_data);
  474. /* If the UVC returned an error, translate it to -EINVAL */
  475. if (ret > 0)
  476. ret = -EINVAL;
  477. return ret;
  478. }