xen.c 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
  4. * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  5. *
  6. * KVM Xen emulation
  7. */
  8. #include "x86.h"
  9. #include "xen.h"
  10. #include "hyperv.h"
  11. #include "lapic.h"
  12. #include <linux/eventfd.h>
  13. #include <linux/kvm_host.h>
  14. #include <linux/sched/stat.h>
  15. #include <trace/events/kvm.h>
  16. #include <xen/interface/xen.h>
  17. #include <xen/interface/vcpu.h>
  18. #include <xen/interface/version.h>
  19. #include <xen/interface/event_channel.h>
  20. #include <xen/interface/sched.h>
  21. #include "trace.h"
  22. static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm);
  23. static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
  24. static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r);
  25. DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
  26. static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
  27. {
  28. struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
  29. struct pvclock_wall_clock *wc;
  30. gpa_t gpa = gfn_to_gpa(gfn);
  31. u32 *wc_sec_hi;
  32. u32 wc_version;
  33. u64 wall_nsec;
  34. int ret = 0;
  35. int idx = srcu_read_lock(&kvm->srcu);
  36. if (gfn == GPA_INVALID) {
  37. kvm_gpc_deactivate(kvm, gpc);
  38. goto out;
  39. }
  40. do {
  41. ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
  42. PAGE_SIZE);
  43. if (ret)
  44. goto out;
  45. /*
  46. * This code mirrors kvm_write_wall_clock() except that it writes
  47. * directly through the pfn cache and doesn't mark the page dirty.
  48. */
  49. wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
  50. /* It could be invalid again already, so we need to check */
  51. read_lock_irq(&gpc->lock);
  52. if (gpc->valid)
  53. break;
  54. read_unlock_irq(&gpc->lock);
  55. } while (1);
  56. /* Paranoia checks on the 32-bit struct layout */
  57. BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
  58. BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924);
  59. BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
  60. #ifdef CONFIG_X86_64
  61. /* Paranoia checks on the 64-bit struct layout */
  62. BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00);
  63. BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c);
  64. if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
  65. struct shared_info *shinfo = gpc->khva;
  66. wc_sec_hi = &shinfo->wc_sec_hi;
  67. wc = &shinfo->wc;
  68. } else
  69. #endif
  70. {
  71. struct compat_shared_info *shinfo = gpc->khva;
  72. wc_sec_hi = &shinfo->arch.wc_sec_hi;
  73. wc = &shinfo->wc;
  74. }
  75. /* Increment and ensure an odd value */
  76. wc_version = wc->version = (wc->version + 1) | 1;
  77. smp_wmb();
  78. wc->nsec = do_div(wall_nsec, 1000000000);
  79. wc->sec = (u32)wall_nsec;
  80. *wc_sec_hi = wall_nsec >> 32;
  81. smp_wmb();
  82. wc->version = wc_version + 1;
  83. read_unlock_irq(&gpc->lock);
  84. kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);
  85. out:
  86. srcu_read_unlock(&kvm->srcu, idx);
  87. return ret;
  88. }
  89. void kvm_xen_inject_timer_irqs(struct kvm_vcpu *vcpu)
  90. {
  91. if (atomic_read(&vcpu->arch.xen.timer_pending) > 0) {
  92. struct kvm_xen_evtchn e;
  93. e.vcpu_id = vcpu->vcpu_id;
  94. e.vcpu_idx = vcpu->vcpu_idx;
  95. e.port = vcpu->arch.xen.timer_virq;
  96. e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
  97. kvm_xen_set_evtchn(&e, vcpu->kvm);
  98. vcpu->arch.xen.timer_expires = 0;
  99. atomic_set(&vcpu->arch.xen.timer_pending, 0);
  100. }
  101. }
  102. static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
  103. {
  104. struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu,
  105. arch.xen.timer);
  106. if (atomic_read(&vcpu->arch.xen.timer_pending))
  107. return HRTIMER_NORESTART;
  108. atomic_inc(&vcpu->arch.xen.timer_pending);
  109. kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
  110. kvm_vcpu_kick(vcpu);
  111. return HRTIMER_NORESTART;
  112. }
  113. static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_ns)
  114. {
  115. atomic_set(&vcpu->arch.xen.timer_pending, 0);
  116. vcpu->arch.xen.timer_expires = guest_abs;
  117. if (delta_ns <= 0) {
  118. xen_timer_callback(&vcpu->arch.xen.timer);
  119. } else {
  120. ktime_t ktime_now = ktime_get();
  121. hrtimer_start(&vcpu->arch.xen.timer,
  122. ktime_add_ns(ktime_now, delta_ns),
  123. HRTIMER_MODE_ABS_HARD);
  124. }
  125. }
  126. static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu)
  127. {
  128. hrtimer_cancel(&vcpu->arch.xen.timer);
  129. vcpu->arch.xen.timer_expires = 0;
  130. atomic_set(&vcpu->arch.xen.timer_pending, 0);
  131. }
  132. static void kvm_xen_init_timer(struct kvm_vcpu *vcpu)
  133. {
  134. hrtimer_init(&vcpu->arch.xen.timer, CLOCK_MONOTONIC,
  135. HRTIMER_MODE_ABS_HARD);
  136. vcpu->arch.xen.timer.function = xen_timer_callback;
  137. }
  138. static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
  139. {
  140. struct kvm_vcpu_xen *vx = &v->arch.xen;
  141. u64 now = get_kvmclock_ns(v->kvm);
  142. u64 delta_ns = now - vx->runstate_entry_time;
  143. u64 run_delay = current->sched_info.run_delay;
  144. if (unlikely(!vx->runstate_entry_time))
  145. vx->current_runstate = RUNSTATE_offline;
  146. /*
  147. * Time waiting for the scheduler isn't "stolen" if the
  148. * vCPU wasn't running anyway.
  149. */
  150. if (vx->current_runstate == RUNSTATE_running) {
  151. u64 steal_ns = run_delay - vx->last_steal;
  152. delta_ns -= steal_ns;
  153. vx->runstate_times[RUNSTATE_runnable] += steal_ns;
  154. }
  155. vx->last_steal = run_delay;
  156. vx->runstate_times[vx->current_runstate] += delta_ns;
  157. vx->current_runstate = state;
  158. vx->runstate_entry_time = now;
  159. }
  160. void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
  161. {
  162. struct kvm_vcpu_xen *vx = &v->arch.xen;
  163. struct gfn_to_pfn_cache *gpc = &vx->runstate_cache;
  164. uint64_t *user_times;
  165. unsigned long flags;
  166. size_t user_len;
  167. int *user_state;
  168. kvm_xen_update_runstate(v, state);
  169. if (!vx->runstate_cache.active)
  170. return;
  171. if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
  172. user_len = sizeof(struct vcpu_runstate_info);
  173. else
  174. user_len = sizeof(struct compat_vcpu_runstate_info);
  175. read_lock_irqsave(&gpc->lock, flags);
  176. while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
  177. user_len)) {
  178. read_unlock_irqrestore(&gpc->lock, flags);
  179. /* When invoked from kvm_sched_out() we cannot sleep */
  180. if (state == RUNSTATE_runnable)
  181. return;
  182. if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len))
  183. return;
  184. read_lock_irqsave(&gpc->lock, flags);
  185. }
  186. /*
  187. * The only difference between 32-bit and 64-bit versions of the
  188. * runstate struct us the alignment of uint64_t in 32-bit, which
  189. * means that the 64-bit version has an additional 4 bytes of
  190. * padding after the first field 'state'.
  191. *
  192. * So we use 'int __user *user_state' to point to the state field,
  193. * and 'uint64_t __user *user_times' for runstate_entry_time. So
  194. * the actual array of time[] in each state starts at user_times[1].
  195. */
  196. BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
  197. BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
  198. BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
  199. #ifdef CONFIG_X86_64
  200. BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
  201. offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
  202. BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
  203. offsetof(struct compat_vcpu_runstate_info, time) + 4);
  204. #endif
  205. user_state = gpc->khva;
  206. if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
  207. user_times = gpc->khva + offsetof(struct vcpu_runstate_info,
  208. state_entry_time);
  209. else
  210. user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info,
  211. state_entry_time);
  212. /*
  213. * First write the updated state_entry_time at the appropriate
  214. * location determined by 'offset'.
  215. */
  216. BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
  217. sizeof(user_times[0]));
  218. BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
  219. sizeof(user_times[0]));
  220. user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
  221. smp_wmb();
  222. /*
  223. * Next, write the new runstate. This is in the *same* place
  224. * for 32-bit and 64-bit guests, asserted here for paranoia.
  225. */
  226. BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
  227. offsetof(struct compat_vcpu_runstate_info, state));
  228. BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
  229. sizeof(vx->current_runstate));
  230. BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
  231. sizeof(vx->current_runstate));
  232. *user_state = vx->current_runstate;
  233. /*
  234. * Write the actual runstate times immediately after the
  235. * runstate_entry_time.
  236. */
  237. BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
  238. offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
  239. BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
  240. offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
  241. BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
  242. sizeof_field(struct compat_vcpu_runstate_info, time));
  243. BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
  244. sizeof(vx->runstate_times));
  245. memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
  246. smp_wmb();
  247. /*
  248. * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
  249. * runstate_entry_time field.
  250. */
  251. user_times[0] &= ~XEN_RUNSTATE_UPDATE;
  252. smp_wmb();
  253. read_unlock_irqrestore(&gpc->lock, flags);
  254. mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
  255. }
  256. static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
  257. {
  258. struct kvm_lapic_irq irq = { };
  259. int r;
  260. irq.dest_id = v->vcpu_id;
  261. irq.vector = v->arch.xen.upcall_vector;
  262. irq.dest_mode = APIC_DEST_PHYSICAL;
  263. irq.shorthand = APIC_DEST_NOSHORT;
  264. irq.delivery_mode = APIC_DM_FIXED;
  265. irq.level = 1;
  266. /* The fast version will always work for physical unicast */
  267. WARN_ON_ONCE(!kvm_irq_delivery_to_apic_fast(v->kvm, NULL, &irq, &r, NULL));
  268. }
  269. /*
  270. * On event channel delivery, the vcpu_info may not have been accessible.
  271. * In that case, there are bits in vcpu->arch.xen.evtchn_pending_sel which
  272. * need to be marked into the vcpu_info (and evtchn_upcall_pending set).
  273. * Do so now that we can sleep in the context of the vCPU to bring the
  274. * page in, and refresh the pfn cache for it.
  275. */
  276. void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
  277. {
  278. unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
  279. struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
  280. unsigned long flags;
  281. if (!evtchn_pending_sel)
  282. return;
  283. /*
  284. * Yes, this is an open-coded loop. But that's just what put_user()
  285. * does anyway. Page it in and retry the instruction. We're just a
  286. * little more honest about it.
  287. */
  288. read_lock_irqsave(&gpc->lock, flags);
  289. while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
  290. sizeof(struct vcpu_info))) {
  291. read_unlock_irqrestore(&gpc->lock, flags);
  292. if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
  293. sizeof(struct vcpu_info)))
  294. return;
  295. read_lock_irqsave(&gpc->lock, flags);
  296. }
  297. /* Now gpc->khva is a valid kernel address for the vcpu_info */
  298. if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) {
  299. struct vcpu_info *vi = gpc->khva;
  300. asm volatile(LOCK_PREFIX "orq %0, %1\n"
  301. "notq %0\n"
  302. LOCK_PREFIX "andq %0, %2\n"
  303. : "=r" (evtchn_pending_sel),
  304. "+m" (vi->evtchn_pending_sel),
  305. "+m" (v->arch.xen.evtchn_pending_sel)
  306. : "0" (evtchn_pending_sel));
  307. WRITE_ONCE(vi->evtchn_upcall_pending, 1);
  308. } else {
  309. u32 evtchn_pending_sel32 = evtchn_pending_sel;
  310. struct compat_vcpu_info *vi = gpc->khva;
  311. asm volatile(LOCK_PREFIX "orl %0, %1\n"
  312. "notl %0\n"
  313. LOCK_PREFIX "andl %0, %2\n"
  314. : "=r" (evtchn_pending_sel32),
  315. "+m" (vi->evtchn_pending_sel),
  316. "+m" (v->arch.xen.evtchn_pending_sel)
  317. : "0" (evtchn_pending_sel32));
  318. WRITE_ONCE(vi->evtchn_upcall_pending, 1);
  319. }
  320. read_unlock_irqrestore(&gpc->lock, flags);
  321. /* For the per-vCPU lapic vector, deliver it as MSI. */
  322. if (v->arch.xen.upcall_vector)
  323. kvm_xen_inject_vcpu_vector(v);
  324. mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
  325. }
  326. int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
  327. {
  328. struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
  329. unsigned long flags;
  330. u8 rc = 0;
  331. /*
  332. * If the global upcall vector (HVMIRQ_callback_vector) is set and
  333. * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
  334. */
  335. /* No need for compat handling here */
  336. BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
  337. offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
  338. BUILD_BUG_ON(sizeof(rc) !=
  339. sizeof_field(struct vcpu_info, evtchn_upcall_pending));
  340. BUILD_BUG_ON(sizeof(rc) !=
  341. sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
  342. read_lock_irqsave(&gpc->lock, flags);
  343. while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
  344. sizeof(struct vcpu_info))) {
  345. read_unlock_irqrestore(&gpc->lock, flags);
  346. /*
  347. * This function gets called from kvm_vcpu_block() after setting the
  348. * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
  349. * from a HLT. So we really mustn't sleep. If the page ended up absent
  350. * at that point, just return 1 in order to trigger an immediate wake,
  351. * and we'll end up getting called again from a context where we *can*
  352. * fault in the page and wait for it.
  353. */
  354. if (in_atomic() || !task_is_running(current))
  355. return 1;
  356. if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
  357. sizeof(struct vcpu_info))) {
  358. /*
  359. * If this failed, userspace has screwed up the
  360. * vcpu_info mapping. No interrupts for you.
  361. */
  362. return 0;
  363. }
  364. read_lock_irqsave(&gpc->lock, flags);
  365. }
  366. rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending;
  367. read_unlock_irqrestore(&gpc->lock, flags);
  368. return rc;
  369. }
  370. int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
  371. {
  372. int r = -ENOENT;
  373. switch (data->type) {
  374. case KVM_XEN_ATTR_TYPE_LONG_MODE:
  375. if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) {
  376. r = -EINVAL;
  377. } else {
  378. mutex_lock(&kvm->lock);
  379. kvm->arch.xen.long_mode = !!data->u.long_mode;
  380. mutex_unlock(&kvm->lock);
  381. r = 0;
  382. }
  383. break;
  384. case KVM_XEN_ATTR_TYPE_SHARED_INFO:
  385. mutex_lock(&kvm->lock);
  386. r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
  387. mutex_unlock(&kvm->lock);
  388. break;
  389. case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
  390. if (data->u.vector && data->u.vector < 0x10)
  391. r = -EINVAL;
  392. else {
  393. mutex_lock(&kvm->lock);
  394. kvm->arch.xen.upcall_vector = data->u.vector;
  395. mutex_unlock(&kvm->lock);
  396. r = 0;
  397. }
  398. break;
  399. case KVM_XEN_ATTR_TYPE_EVTCHN:
  400. r = kvm_xen_setattr_evtchn(kvm, data);
  401. break;
  402. case KVM_XEN_ATTR_TYPE_XEN_VERSION:
  403. mutex_lock(&kvm->lock);
  404. kvm->arch.xen.xen_version = data->u.xen_version;
  405. mutex_unlock(&kvm->lock);
  406. r = 0;
  407. break;
  408. default:
  409. break;
  410. }
  411. return r;
  412. }
  413. int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
  414. {
  415. int r = -ENOENT;
  416. mutex_lock(&kvm->lock);
  417. switch (data->type) {
  418. case KVM_XEN_ATTR_TYPE_LONG_MODE:
  419. data->u.long_mode = kvm->arch.xen.long_mode;
  420. r = 0;
  421. break;
  422. case KVM_XEN_ATTR_TYPE_SHARED_INFO:
  423. if (kvm->arch.xen.shinfo_cache.active)
  424. data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
  425. else
  426. data->u.shared_info.gfn = GPA_INVALID;
  427. r = 0;
  428. break;
  429. case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
  430. data->u.vector = kvm->arch.xen.upcall_vector;
  431. r = 0;
  432. break;
  433. case KVM_XEN_ATTR_TYPE_XEN_VERSION:
  434. data->u.xen_version = kvm->arch.xen.xen_version;
  435. r = 0;
  436. break;
  437. default:
  438. break;
  439. }
  440. mutex_unlock(&kvm->lock);
  441. return r;
  442. }
  443. int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
  444. {
  445. int idx, r = -ENOENT;
  446. mutex_lock(&vcpu->kvm->lock);
  447. idx = srcu_read_lock(&vcpu->kvm->srcu);
  448. switch (data->type) {
  449. case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
  450. /* No compat necessary here. */
  451. BUILD_BUG_ON(sizeof(struct vcpu_info) !=
  452. sizeof(struct compat_vcpu_info));
  453. BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
  454. offsetof(struct compat_vcpu_info, time));
  455. if (data->u.gpa == GPA_INVALID) {
  456. kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
  457. r = 0;
  458. break;
  459. }
  460. r = kvm_gpc_activate(vcpu->kvm,
  461. &vcpu->arch.xen.vcpu_info_cache, NULL,
  462. KVM_HOST_USES_PFN, data->u.gpa,
  463. sizeof(struct vcpu_info));
  464. if (!r)
  465. kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
  466. break;
  467. case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
  468. if (data->u.gpa == GPA_INVALID) {
  469. kvm_gpc_deactivate(vcpu->kvm,
  470. &vcpu->arch.xen.vcpu_time_info_cache);
  471. r = 0;
  472. break;
  473. }
  474. r = kvm_gpc_activate(vcpu->kvm,
  475. &vcpu->arch.xen.vcpu_time_info_cache,
  476. NULL, KVM_HOST_USES_PFN, data->u.gpa,
  477. sizeof(struct pvclock_vcpu_time_info));
  478. if (!r)
  479. kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
  480. break;
  481. case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
  482. if (!sched_info_on()) {
  483. r = -EOPNOTSUPP;
  484. break;
  485. }
  486. if (data->u.gpa == GPA_INVALID) {
  487. kvm_gpc_deactivate(vcpu->kvm,
  488. &vcpu->arch.xen.runstate_cache);
  489. r = 0;
  490. break;
  491. }
  492. r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
  493. NULL, KVM_HOST_USES_PFN, data->u.gpa,
  494. sizeof(struct vcpu_runstate_info));
  495. break;
  496. case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
  497. if (!sched_info_on()) {
  498. r = -EOPNOTSUPP;
  499. break;
  500. }
  501. if (data->u.runstate.state > RUNSTATE_offline) {
  502. r = -EINVAL;
  503. break;
  504. }
  505. kvm_xen_update_runstate(vcpu, data->u.runstate.state);
  506. r = 0;
  507. break;
  508. case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
  509. if (!sched_info_on()) {
  510. r = -EOPNOTSUPP;
  511. break;
  512. }
  513. if (data->u.runstate.state > RUNSTATE_offline) {
  514. r = -EINVAL;
  515. break;
  516. }
  517. if (data->u.runstate.state_entry_time !=
  518. (data->u.runstate.time_running +
  519. data->u.runstate.time_runnable +
  520. data->u.runstate.time_blocked +
  521. data->u.runstate.time_offline)) {
  522. r = -EINVAL;
  523. break;
  524. }
  525. if (get_kvmclock_ns(vcpu->kvm) <
  526. data->u.runstate.state_entry_time) {
  527. r = -EINVAL;
  528. break;
  529. }
  530. vcpu->arch.xen.current_runstate = data->u.runstate.state;
  531. vcpu->arch.xen.runstate_entry_time =
  532. data->u.runstate.state_entry_time;
  533. vcpu->arch.xen.runstate_times[RUNSTATE_running] =
  534. data->u.runstate.time_running;
  535. vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
  536. data->u.runstate.time_runnable;
  537. vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
  538. data->u.runstate.time_blocked;
  539. vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
  540. data->u.runstate.time_offline;
  541. vcpu->arch.xen.last_steal = current->sched_info.run_delay;
  542. r = 0;
  543. break;
  544. case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
  545. if (!sched_info_on()) {
  546. r = -EOPNOTSUPP;
  547. break;
  548. }
  549. if (data->u.runstate.state > RUNSTATE_offline &&
  550. data->u.runstate.state != (u64)-1) {
  551. r = -EINVAL;
  552. break;
  553. }
  554. /* The adjustment must add up */
  555. if (data->u.runstate.state_entry_time !=
  556. (data->u.runstate.time_running +
  557. data->u.runstate.time_runnable +
  558. data->u.runstate.time_blocked +
  559. data->u.runstate.time_offline)) {
  560. r = -EINVAL;
  561. break;
  562. }
  563. if (get_kvmclock_ns(vcpu->kvm) <
  564. (vcpu->arch.xen.runstate_entry_time +
  565. data->u.runstate.state_entry_time)) {
  566. r = -EINVAL;
  567. break;
  568. }
  569. vcpu->arch.xen.runstate_entry_time +=
  570. data->u.runstate.state_entry_time;
  571. vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
  572. data->u.runstate.time_running;
  573. vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
  574. data->u.runstate.time_runnable;
  575. vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
  576. data->u.runstate.time_blocked;
  577. vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
  578. data->u.runstate.time_offline;
  579. if (data->u.runstate.state <= RUNSTATE_offline)
  580. kvm_xen_update_runstate(vcpu, data->u.runstate.state);
  581. r = 0;
  582. break;
  583. case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID:
  584. if (data->u.vcpu_id >= KVM_MAX_VCPUS)
  585. r = -EINVAL;
  586. else {
  587. vcpu->arch.xen.vcpu_id = data->u.vcpu_id;
  588. r = 0;
  589. }
  590. break;
  591. case KVM_XEN_VCPU_ATTR_TYPE_TIMER:
  592. if (data->u.timer.port &&
  593. data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) {
  594. r = -EINVAL;
  595. break;
  596. }
  597. if (!vcpu->arch.xen.timer.function)
  598. kvm_xen_init_timer(vcpu);
  599. /* Stop the timer (if it's running) before changing the vector */
  600. kvm_xen_stop_timer(vcpu);
  601. vcpu->arch.xen.timer_virq = data->u.timer.port;
  602. /* Start the timer if the new value has a valid vector+expiry. */
  603. if (data->u.timer.port && data->u.timer.expires_ns)
  604. kvm_xen_start_timer(vcpu, data->u.timer.expires_ns,
  605. data->u.timer.expires_ns -
  606. get_kvmclock_ns(vcpu->kvm));
  607. r = 0;
  608. break;
  609. case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR:
  610. if (data->u.vector && data->u.vector < 0x10)
  611. r = -EINVAL;
  612. else {
  613. vcpu->arch.xen.upcall_vector = data->u.vector;
  614. r = 0;
  615. }
  616. break;
  617. default:
  618. break;
  619. }
  620. srcu_read_unlock(&vcpu->kvm->srcu, idx);
  621. mutex_unlock(&vcpu->kvm->lock);
  622. return r;
  623. }
  624. int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
  625. {
  626. int r = -ENOENT;
  627. mutex_lock(&vcpu->kvm->lock);
  628. switch (data->type) {
  629. case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
  630. if (vcpu->arch.xen.vcpu_info_cache.active)
  631. data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
  632. else
  633. data->u.gpa = GPA_INVALID;
  634. r = 0;
  635. break;
  636. case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
  637. if (vcpu->arch.xen.vcpu_time_info_cache.active)
  638. data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
  639. else
  640. data->u.gpa = GPA_INVALID;
  641. r = 0;
  642. break;
  643. case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
  644. if (!sched_info_on()) {
  645. r = -EOPNOTSUPP;
  646. break;
  647. }
  648. if (vcpu->arch.xen.runstate_cache.active) {
  649. data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
  650. r = 0;
  651. }
  652. break;
  653. case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
  654. if (!sched_info_on()) {
  655. r = -EOPNOTSUPP;
  656. break;
  657. }
  658. data->u.runstate.state = vcpu->arch.xen.current_runstate;
  659. r = 0;
  660. break;
  661. case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
  662. if (!sched_info_on()) {
  663. r = -EOPNOTSUPP;
  664. break;
  665. }
  666. data->u.runstate.state = vcpu->arch.xen.current_runstate;
  667. data->u.runstate.state_entry_time =
  668. vcpu->arch.xen.runstate_entry_time;
  669. data->u.runstate.time_running =
  670. vcpu->arch.xen.runstate_times[RUNSTATE_running];
  671. data->u.runstate.time_runnable =
  672. vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
  673. data->u.runstate.time_blocked =
  674. vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
  675. data->u.runstate.time_offline =
  676. vcpu->arch.xen.runstate_times[RUNSTATE_offline];
  677. r = 0;
  678. break;
  679. case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
  680. r = -EINVAL;
  681. break;
  682. case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID:
  683. data->u.vcpu_id = vcpu->arch.xen.vcpu_id;
  684. r = 0;
  685. break;
  686. case KVM_XEN_VCPU_ATTR_TYPE_TIMER:
  687. data->u.timer.port = vcpu->arch.xen.timer_virq;
  688. data->u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
  689. data->u.timer.expires_ns = vcpu->arch.xen.timer_expires;
  690. r = 0;
  691. break;
  692. case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR:
  693. data->u.vector = vcpu->arch.xen.upcall_vector;
  694. r = 0;
  695. break;
  696. default:
  697. break;
  698. }
  699. mutex_unlock(&vcpu->kvm->lock);
  700. return r;
  701. }
  702. int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
  703. {
  704. struct kvm *kvm = vcpu->kvm;
  705. u32 page_num = data & ~PAGE_MASK;
  706. u64 page_addr = data & PAGE_MASK;
  707. bool lm = is_long_mode(vcpu);
  708. /* Latch long_mode for shared_info pages etc. */
  709. vcpu->kvm->arch.xen.long_mode = lm;
  710. /*
  711. * If Xen hypercall intercept is enabled, fill the hypercall
  712. * page with VMCALL/VMMCALL instructions since that's what
  713. * we catch. Else the VMM has provided the hypercall pages
  714. * with instructions of its own choosing, so use those.
  715. */
  716. if (kvm_xen_hypercall_enabled(kvm)) {
  717. u8 instructions[32];
  718. int i;
  719. if (page_num)
  720. return 1;
  721. /* mov imm32, %eax */
  722. instructions[0] = 0xb8;
  723. /* vmcall / vmmcall */
  724. static_call(kvm_x86_patch_hypercall)(vcpu, instructions + 5);
  725. /* ret */
  726. instructions[8] = 0xc3;
  727. /* int3 to pad */
  728. memset(instructions + 9, 0xcc, sizeof(instructions) - 9);
  729. for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) {
  730. *(u32 *)&instructions[1] = i;
  731. if (kvm_vcpu_write_guest(vcpu,
  732. page_addr + (i * sizeof(instructions)),
  733. instructions, sizeof(instructions)))
  734. return 1;
  735. }
  736. } else {
  737. /*
  738. * Note, truncation is a non-issue as 'lm' is guaranteed to be
  739. * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes.
  740. */
  741. hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64
  742. : kvm->arch.xen_hvm_config.blob_addr_32;
  743. u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
  744. : kvm->arch.xen_hvm_config.blob_size_32;
  745. u8 *page;
  746. if (page_num >= blob_size)
  747. return 1;
  748. blob_addr += page_num * PAGE_SIZE;
  749. page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE);
  750. if (IS_ERR(page))
  751. return PTR_ERR(page);
  752. if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
  753. kfree(page);
  754. return 1;
  755. }
  756. }
  757. return 0;
  758. }
  759. int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
  760. {
  761. /* Only some feature flags need to be *enabled* by userspace */
  762. u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
  763. KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
  764. if (xhc->flags & ~permitted_flags)
  765. return -EINVAL;
  766. /*
  767. * With hypercall interception the kernel generates its own
  768. * hypercall page so it must not be provided.
  769. */
  770. if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) &&
  771. (xhc->blob_addr_32 || xhc->blob_addr_64 ||
  772. xhc->blob_size_32 || xhc->blob_size_64))
  773. return -EINVAL;
  774. mutex_lock(&kvm->lock);
  775. if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
  776. static_branch_inc(&kvm_xen_enabled.key);
  777. else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
  778. static_branch_slow_dec_deferred(&kvm_xen_enabled);
  779. memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
  780. mutex_unlock(&kvm->lock);
  781. return 0;
  782. }
  783. static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
  784. {
  785. kvm_rax_write(vcpu, result);
  786. return kvm_skip_emulated_instruction(vcpu);
  787. }
  788. static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
  789. {
  790. struct kvm_run *run = vcpu->run;
  791. if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip)))
  792. return 1;
  793. return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
  794. }
  795. static inline int max_evtchn_port(struct kvm *kvm)
  796. {
  797. if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
  798. return EVTCHN_2L_NR_CHANNELS;
  799. else
  800. return COMPAT_EVTCHN_2L_NR_CHANNELS;
  801. }
  802. static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
  803. evtchn_port_t *ports)
  804. {
  805. struct kvm *kvm = vcpu->kvm;
  806. struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
  807. unsigned long *pending_bits;
  808. unsigned long flags;
  809. bool ret = true;
  810. int idx, i;
  811. read_lock_irqsave(&gpc->lock, flags);
  812. idx = srcu_read_lock(&kvm->srcu);
  813. if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
  814. goto out_rcu;
  815. ret = false;
  816. if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
  817. struct shared_info *shinfo = gpc->khva;
  818. pending_bits = (unsigned long *)&shinfo->evtchn_pending;
  819. } else {
  820. struct compat_shared_info *shinfo = gpc->khva;
  821. pending_bits = (unsigned long *)&shinfo->evtchn_pending;
  822. }
  823. for (i = 0; i < nr_ports; i++) {
  824. if (test_bit(ports[i], pending_bits)) {
  825. ret = true;
  826. break;
  827. }
  828. }
  829. out_rcu:
  830. srcu_read_unlock(&kvm->srcu, idx);
  831. read_unlock_irqrestore(&gpc->lock, flags);
  832. return ret;
  833. }
  834. static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
  835. u64 param, u64 *r)
  836. {
  837. int idx, i;
  838. struct sched_poll sched_poll;
  839. evtchn_port_t port, *ports;
  840. gpa_t gpa;
  841. if (!longmode || !lapic_in_kernel(vcpu) ||
  842. !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
  843. return false;
  844. idx = srcu_read_lock(&vcpu->kvm->srcu);
  845. gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
  846. srcu_read_unlock(&vcpu->kvm->srcu, idx);
  847. if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
  848. sizeof(sched_poll))) {
  849. *r = -EFAULT;
  850. return true;
  851. }
  852. if (unlikely(sched_poll.nr_ports > 1)) {
  853. /* Xen (unofficially) limits number of pollers to 128 */
  854. if (sched_poll.nr_ports > 128) {
  855. *r = -EINVAL;
  856. return true;
  857. }
  858. ports = kmalloc_array(sched_poll.nr_ports,
  859. sizeof(*ports), GFP_KERNEL);
  860. if (!ports) {
  861. *r = -ENOMEM;
  862. return true;
  863. }
  864. } else
  865. ports = &port;
  866. for (i = 0; i < sched_poll.nr_ports; i++) {
  867. idx = srcu_read_lock(&vcpu->kvm->srcu);
  868. gpa = kvm_mmu_gva_to_gpa_system(vcpu,
  869. (gva_t)(sched_poll.ports + i),
  870. NULL);
  871. srcu_read_unlock(&vcpu->kvm->srcu, idx);
  872. if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
  873. &ports[i], sizeof(port))) {
  874. *r = -EFAULT;
  875. goto out;
  876. }
  877. if (ports[i] >= max_evtchn_port(vcpu->kvm)) {
  878. *r = -EINVAL;
  879. goto out;
  880. }
  881. }
  882. if (sched_poll.nr_ports == 1)
  883. vcpu->arch.xen.poll_evtchn = port;
  884. else
  885. vcpu->arch.xen.poll_evtchn = -1;
  886. set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask);
  887. if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) {
  888. vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
  889. if (sched_poll.timeout)
  890. mod_timer(&vcpu->arch.xen.poll_timer,
  891. jiffies + nsecs_to_jiffies(sched_poll.timeout));
  892. kvm_vcpu_halt(vcpu);
  893. if (sched_poll.timeout)
  894. del_timer(&vcpu->arch.xen.poll_timer);
  895. vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
  896. }
  897. vcpu->arch.xen.poll_evtchn = 0;
  898. *r = 0;
  899. out:
  900. /* Really, this is only needed in case of timeout */
  901. clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask);
  902. if (unlikely(sched_poll.nr_ports > 1))
  903. kfree(ports);
  904. return true;
  905. }
  906. static void cancel_evtchn_poll(struct timer_list *t)
  907. {
  908. struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer);
  909. kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
  910. kvm_vcpu_kick(vcpu);
  911. }
  912. static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
  913. int cmd, u64 param, u64 *r)
  914. {
  915. switch (cmd) {
  916. case SCHEDOP_poll:
  917. if (kvm_xen_schedop_poll(vcpu, longmode, param, r))
  918. return true;
  919. fallthrough;
  920. case SCHEDOP_yield:
  921. kvm_vcpu_on_spin(vcpu, true);
  922. *r = 0;
  923. return true;
  924. default:
  925. break;
  926. }
  927. return false;
  928. }
  929. struct compat_vcpu_set_singleshot_timer {
  930. uint64_t timeout_abs_ns;
  931. uint32_t flags;
  932. } __attribute__((packed));
  933. static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
  934. int vcpu_id, u64 param, u64 *r)
  935. {
  936. struct vcpu_set_singleshot_timer oneshot;
  937. s64 delta;
  938. gpa_t gpa;
  939. int idx;
  940. if (!kvm_xen_timer_enabled(vcpu))
  941. return false;
  942. switch (cmd) {
  943. case VCPUOP_set_singleshot_timer:
  944. if (vcpu->arch.xen.vcpu_id != vcpu_id) {
  945. *r = -EINVAL;
  946. return true;
  947. }
  948. idx = srcu_read_lock(&vcpu->kvm->srcu);
  949. gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
  950. srcu_read_unlock(&vcpu->kvm->srcu, idx);
  951. /*
  952. * The only difference for 32-bit compat is the 4 bytes of
  953. * padding after the interesting part of the structure. So
  954. * for a faithful emulation of Xen we have to *try* to copy
  955. * the padding and return -EFAULT if we can't. Otherwise we
  956. * might as well just have copied the 12-byte 32-bit struct.
  957. */
  958. BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) !=
  959. offsetof(struct vcpu_set_singleshot_timer, timeout_abs_ns));
  960. BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) !=
  961. sizeof_field(struct vcpu_set_singleshot_timer, timeout_abs_ns));
  962. BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, flags) !=
  963. offsetof(struct vcpu_set_singleshot_timer, flags));
  964. BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) !=
  965. sizeof_field(struct vcpu_set_singleshot_timer, flags));
  966. if (!gpa ||
  967. kvm_vcpu_read_guest(vcpu, gpa, &oneshot, longmode ? sizeof(oneshot) :
  968. sizeof(struct compat_vcpu_set_singleshot_timer))) {
  969. *r = -EFAULT;
  970. return true;
  971. }
  972. delta = oneshot.timeout_abs_ns - get_kvmclock_ns(vcpu->kvm);
  973. if ((oneshot.flags & VCPU_SSHOTTMR_future) && delta < 0) {
  974. *r = -ETIME;
  975. return true;
  976. }
  977. kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, delta);
  978. *r = 0;
  979. return true;
  980. case VCPUOP_stop_singleshot_timer:
  981. if (vcpu->arch.xen.vcpu_id != vcpu_id) {
  982. *r = -EINVAL;
  983. return true;
  984. }
  985. kvm_xen_stop_timer(vcpu);
  986. *r = 0;
  987. return true;
  988. }
  989. return false;
  990. }
  991. static bool kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout,
  992. u64 *r)
  993. {
  994. if (!kvm_xen_timer_enabled(vcpu))
  995. return false;
  996. if (timeout) {
  997. uint64_t guest_now = get_kvmclock_ns(vcpu->kvm);
  998. int64_t delta = timeout - guest_now;
  999. /* Xen has a 'Linux workaround' in do_set_timer_op() which
  1000. * checks for negative absolute timeout values (caused by
  1001. * integer overflow), and for values about 13 days in the
  1002. * future (2^50ns) which would be caused by jiffies
  1003. * overflow. For those cases, it sets the timeout 100ms in
  1004. * the future (not *too* soon, since if a guest really did
  1005. * set a long timeout on purpose we don't want to keep
  1006. * churning CPU time by waking it up).
  1007. */
  1008. if (unlikely((int64_t)timeout < 0 ||
  1009. (delta > 0 && (uint32_t) (delta >> 50) != 0))) {
  1010. delta = 100 * NSEC_PER_MSEC;
  1011. timeout = guest_now + delta;
  1012. }
  1013. kvm_xen_start_timer(vcpu, timeout, delta);
  1014. } else {
  1015. kvm_xen_stop_timer(vcpu);
  1016. }
  1017. *r = 0;
  1018. return true;
  1019. }
  1020. int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
  1021. {
  1022. bool longmode;
  1023. u64 input, params[6], r = -ENOSYS;
  1024. bool handled = false;
  1025. u8 cpl;
  1026. input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
  1027. /* Hyper-V hypercalls get bit 31 set in EAX */
  1028. if ((input & 0x80000000) &&
  1029. kvm_hv_hypercall_enabled(vcpu))
  1030. return kvm_hv_hypercall(vcpu);
  1031. longmode = is_64_bit_hypercall(vcpu);
  1032. if (!longmode) {
  1033. params[0] = (u32)kvm_rbx_read(vcpu);
  1034. params[1] = (u32)kvm_rcx_read(vcpu);
  1035. params[2] = (u32)kvm_rdx_read(vcpu);
  1036. params[3] = (u32)kvm_rsi_read(vcpu);
  1037. params[4] = (u32)kvm_rdi_read(vcpu);
  1038. params[5] = (u32)kvm_rbp_read(vcpu);
  1039. }
  1040. #ifdef CONFIG_X86_64
  1041. else {
  1042. params[0] = (u64)kvm_rdi_read(vcpu);
  1043. params[1] = (u64)kvm_rsi_read(vcpu);
  1044. params[2] = (u64)kvm_rdx_read(vcpu);
  1045. params[3] = (u64)kvm_r10_read(vcpu);
  1046. params[4] = (u64)kvm_r8_read(vcpu);
  1047. params[5] = (u64)kvm_r9_read(vcpu);
  1048. }
  1049. #endif
  1050. cpl = static_call(kvm_x86_get_cpl)(vcpu);
  1051. trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
  1052. params[3], params[4], params[5]);
  1053. /*
  1054. * Only allow hypercall acceleration for CPL0. The rare hypercalls that
  1055. * are permitted in guest userspace can be handled by the VMM.
  1056. */
  1057. if (unlikely(cpl > 0))
  1058. goto handle_in_userspace;
  1059. switch (input) {
  1060. case __HYPERVISOR_xen_version:
  1061. if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) {
  1062. r = vcpu->kvm->arch.xen.xen_version;
  1063. handled = true;
  1064. }
  1065. break;
  1066. case __HYPERVISOR_event_channel_op:
  1067. if (params[0] == EVTCHNOP_send)
  1068. handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r);
  1069. break;
  1070. case __HYPERVISOR_sched_op:
  1071. handled = kvm_xen_hcall_sched_op(vcpu, longmode, params[0],
  1072. params[1], &r);
  1073. break;
  1074. case __HYPERVISOR_vcpu_op:
  1075. handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, params[0], params[1],
  1076. params[2], &r);
  1077. break;
  1078. case __HYPERVISOR_set_timer_op: {
  1079. u64 timeout = params[0];
  1080. /* In 32-bit mode, the 64-bit timeout is in two 32-bit params. */
  1081. if (!longmode)
  1082. timeout |= params[1] << 32;
  1083. handled = kvm_xen_hcall_set_timer_op(vcpu, timeout, &r);
  1084. break;
  1085. }
  1086. default:
  1087. break;
  1088. }
  1089. if (handled)
  1090. return kvm_xen_hypercall_set_result(vcpu, r);
  1091. handle_in_userspace:
  1092. vcpu->run->exit_reason = KVM_EXIT_XEN;
  1093. vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
  1094. vcpu->run->xen.u.hcall.longmode = longmode;
  1095. vcpu->run->xen.u.hcall.cpl = cpl;
  1096. vcpu->run->xen.u.hcall.input = input;
  1097. vcpu->run->xen.u.hcall.params[0] = params[0];
  1098. vcpu->run->xen.u.hcall.params[1] = params[1];
  1099. vcpu->run->xen.u.hcall.params[2] = params[2];
  1100. vcpu->run->xen.u.hcall.params[3] = params[3];
  1101. vcpu->run->xen.u.hcall.params[4] = params[4];
  1102. vcpu->run->xen.u.hcall.params[5] = params[5];
  1103. vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu);
  1104. vcpu->arch.complete_userspace_io =
  1105. kvm_xen_hypercall_complete_userspace;
  1106. return 0;
  1107. }
  1108. static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port)
  1109. {
  1110. int poll_evtchn = vcpu->arch.xen.poll_evtchn;
  1111. if ((poll_evtchn == port || poll_evtchn == -1) &&
  1112. test_and_clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask)) {
  1113. kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
  1114. kvm_vcpu_kick(vcpu);
  1115. }
  1116. }
  1117. /*
  1118. * The return value from this function is propagated to kvm_set_irq() API,
  1119. * so it returns:
  1120. * < 0 Interrupt was ignored (masked or not delivered for other reasons)
  1121. * = 0 Interrupt was coalesced (previous irq is still pending)
  1122. * > 0 Number of CPUs interrupt was delivered to
  1123. *
  1124. * It is also called directly from kvm_arch_set_irq_inatomic(), where the
  1125. * only check on its return value is a comparison with -EWOULDBLOCK'.
  1126. */
  1127. int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
  1128. {
  1129. struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
  1130. struct kvm_vcpu *vcpu;
  1131. unsigned long *pending_bits, *mask_bits;
  1132. unsigned long flags;
  1133. int port_word_bit;
  1134. bool kick_vcpu = false;
  1135. int vcpu_idx, idx, rc;
  1136. vcpu_idx = READ_ONCE(xe->vcpu_idx);
  1137. if (vcpu_idx >= 0)
  1138. vcpu = kvm_get_vcpu(kvm, vcpu_idx);
  1139. else {
  1140. vcpu = kvm_get_vcpu_by_id(kvm, xe->vcpu_id);
  1141. if (!vcpu)
  1142. return -EINVAL;
  1143. WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx);
  1144. }
  1145. if (!vcpu->arch.xen.vcpu_info_cache.active)
  1146. return -EINVAL;
  1147. if (xe->port >= max_evtchn_port(kvm))
  1148. return -EINVAL;
  1149. rc = -EWOULDBLOCK;
  1150. idx = srcu_read_lock(&kvm->srcu);
  1151. read_lock_irqsave(&gpc->lock, flags);
  1152. if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
  1153. goto out_rcu;
  1154. if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
  1155. struct shared_info *shinfo = gpc->khva;
  1156. pending_bits = (unsigned long *)&shinfo->evtchn_pending;
  1157. mask_bits = (unsigned long *)&shinfo->evtchn_mask;
  1158. port_word_bit = xe->port / 64;
  1159. } else {
  1160. struct compat_shared_info *shinfo = gpc->khva;
  1161. pending_bits = (unsigned long *)&shinfo->evtchn_pending;
  1162. mask_bits = (unsigned long *)&shinfo->evtchn_mask;
  1163. port_word_bit = xe->port / 32;
  1164. }
  1165. /*
  1166. * If this port wasn't already set, and if it isn't masked, then
  1167. * we try to set the corresponding bit in the in-kernel shadow of
  1168. * evtchn_pending_sel for the target vCPU. And if *that* wasn't
  1169. * already set, then we kick the vCPU in question to write to the
  1170. * *real* evtchn_pending_sel in its own guest vcpu_info struct.
  1171. */
  1172. if (test_and_set_bit(xe->port, pending_bits)) {
  1173. rc = 0; /* It was already raised */
  1174. } else if (test_bit(xe->port, mask_bits)) {
  1175. rc = -ENOTCONN; /* Masked */
  1176. kvm_xen_check_poller(vcpu, xe->port);
  1177. } else {
  1178. rc = 1; /* Delivered to the bitmap in shared_info. */
  1179. /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
  1180. read_unlock_irqrestore(&gpc->lock, flags);
  1181. gpc = &vcpu->arch.xen.vcpu_info_cache;
  1182. read_lock_irqsave(&gpc->lock, flags);
  1183. if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) {
  1184. /*
  1185. * Could not access the vcpu_info. Set the bit in-kernel
  1186. * and prod the vCPU to deliver it for itself.
  1187. */
  1188. if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
  1189. kick_vcpu = true;
  1190. goto out_rcu;
  1191. }
  1192. if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
  1193. struct vcpu_info *vcpu_info = gpc->khva;
  1194. if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) {
  1195. WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
  1196. kick_vcpu = true;
  1197. }
  1198. } else {
  1199. struct compat_vcpu_info *vcpu_info = gpc->khva;
  1200. if (!test_and_set_bit(port_word_bit,
  1201. (unsigned long *)&vcpu_info->evtchn_pending_sel)) {
  1202. WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
  1203. kick_vcpu = true;
  1204. }
  1205. }
  1206. /* For the per-vCPU lapic vector, deliver it as MSI. */
  1207. if (kick_vcpu && vcpu->arch.xen.upcall_vector) {
  1208. kvm_xen_inject_vcpu_vector(vcpu);
  1209. kick_vcpu = false;
  1210. }
  1211. }
  1212. out_rcu:
  1213. read_unlock_irqrestore(&gpc->lock, flags);
  1214. srcu_read_unlock(&kvm->srcu, idx);
  1215. if (kick_vcpu) {
  1216. kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
  1217. kvm_vcpu_kick(vcpu);
  1218. }
  1219. return rc;
  1220. }
  1221. static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
  1222. {
  1223. bool mm_borrowed = false;
  1224. int rc;
  1225. rc = kvm_xen_set_evtchn_fast(xe, kvm);
  1226. if (rc != -EWOULDBLOCK)
  1227. return rc;
  1228. if (current->mm != kvm->mm) {
  1229. /*
  1230. * If not on a thread which already belongs to this KVM,
  1231. * we'd better be in the irqfd workqueue.
  1232. */
  1233. if (WARN_ON_ONCE(current->mm))
  1234. return -EINVAL;
  1235. kthread_use_mm(kvm->mm);
  1236. mm_borrowed = true;
  1237. }
  1238. /*
  1239. * For the irqfd workqueue, using the main kvm->lock mutex is
  1240. * fine since this function is invoked from kvm_set_irq() with
  1241. * no other lock held, no srcu. In future if it will be called
  1242. * directly from a vCPU thread (e.g. on hypercall for an IPI)
  1243. * then it may need to switch to using a leaf-node mutex for
  1244. * serializing the shared_info mapping.
  1245. */
  1246. mutex_lock(&kvm->lock);
  1247. /*
  1248. * It is theoretically possible for the page to be unmapped
  1249. * and the MMU notifier to invalidate the shared_info before
  1250. * we even get to use it. In that case, this looks like an
  1251. * infinite loop. It was tempting to do it via the userspace
  1252. * HVA instead... but that just *hides* the fact that it's
  1253. * an infinite loop, because if a fault occurs and it waits
  1254. * for the page to come back, it can *still* immediately
  1255. * fault and have to wait again, repeatedly.
  1256. *
  1257. * Conversely, the page could also have been reinstated by
  1258. * another thread before we even obtain the mutex above, so
  1259. * check again *first* before remapping it.
  1260. */
  1261. do {
  1262. struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
  1263. int idx;
  1264. rc = kvm_xen_set_evtchn_fast(xe, kvm);
  1265. if (rc != -EWOULDBLOCK)
  1266. break;
  1267. idx = srcu_read_lock(&kvm->srcu);
  1268. rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE);
  1269. srcu_read_unlock(&kvm->srcu, idx);
  1270. } while(!rc);
  1271. mutex_unlock(&kvm->lock);
  1272. if (mm_borrowed)
  1273. kthread_unuse_mm(kvm->mm);
  1274. return rc;
  1275. }
  1276. /* This is the version called from kvm_set_irq() as the .set function */
  1277. static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
  1278. int irq_source_id, int level, bool line_status)
  1279. {
  1280. if (!level)
  1281. return -EINVAL;
  1282. return kvm_xen_set_evtchn(&e->xen_evtchn, kvm);
  1283. }
  1284. /*
  1285. * Set up an event channel interrupt from the KVM IRQ routing table.
  1286. * Used for e.g. PIRQ from passed through physical devices.
  1287. */
  1288. int kvm_xen_setup_evtchn(struct kvm *kvm,
  1289. struct kvm_kernel_irq_routing_entry *e,
  1290. const struct kvm_irq_routing_entry *ue)
  1291. {
  1292. struct kvm_vcpu *vcpu;
  1293. if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
  1294. return -EINVAL;
  1295. /* We only support 2 level event channels for now */
  1296. if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
  1297. return -EINVAL;
  1298. /*
  1299. * Xen gives us interesting mappings from vCPU index to APIC ID,
  1300. * which means kvm_get_vcpu_by_id() has to iterate over all vCPUs
  1301. * to find it. Do that once at setup time, instead of every time.
  1302. * But beware that on live update / live migration, the routing
  1303. * table might be reinstated before the vCPU threads have finished
  1304. * recreating their vCPUs.
  1305. */
  1306. vcpu = kvm_get_vcpu_by_id(kvm, ue->u.xen_evtchn.vcpu);
  1307. if (vcpu)
  1308. e->xen_evtchn.vcpu_idx = vcpu->vcpu_idx;
  1309. else
  1310. e->xen_evtchn.vcpu_idx = -1;
  1311. e->xen_evtchn.port = ue->u.xen_evtchn.port;
  1312. e->xen_evtchn.vcpu_id = ue->u.xen_evtchn.vcpu;
  1313. e->xen_evtchn.priority = ue->u.xen_evtchn.priority;
  1314. e->set = evtchn_set_fn;
  1315. return 0;
  1316. }
  1317. /*
  1318. * Explicit event sending from userspace with KVM_XEN_HVM_EVTCHN_SEND ioctl.
  1319. */
  1320. int kvm_xen_hvm_evtchn_send(struct kvm *kvm, struct kvm_irq_routing_xen_evtchn *uxe)
  1321. {
  1322. struct kvm_xen_evtchn e;
  1323. int ret;
  1324. if (!uxe->port || uxe->port >= max_evtchn_port(kvm))
  1325. return -EINVAL;
  1326. /* We only support 2 level event channels for now */
  1327. if (uxe->priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
  1328. return -EINVAL;
  1329. e.port = uxe->port;
  1330. e.vcpu_id = uxe->vcpu;
  1331. e.vcpu_idx = -1;
  1332. e.priority = uxe->priority;
  1333. ret = kvm_xen_set_evtchn(&e, kvm);
  1334. /*
  1335. * None of that 'return 1 if it actually got delivered' nonsense.
  1336. * We don't care if it was masked (-ENOTCONN) either.
  1337. */
  1338. if (ret > 0 || ret == -ENOTCONN)
  1339. ret = 0;
  1340. return ret;
  1341. }
  1342. /*
  1343. * Support for *outbound* event channel events via the EVTCHNOP_send hypercall.
  1344. */
  1345. struct evtchnfd {
  1346. u32 send_port;
  1347. u32 type;
  1348. union {
  1349. struct kvm_xen_evtchn port;
  1350. struct {
  1351. u32 port; /* zero */
  1352. struct eventfd_ctx *ctx;
  1353. } eventfd;
  1354. } deliver;
  1355. };
  1356. /*
  1357. * Update target vCPU or priority for a registered sending channel.
  1358. */
  1359. static int kvm_xen_eventfd_update(struct kvm *kvm,
  1360. struct kvm_xen_hvm_attr *data)
  1361. {
  1362. u32 port = data->u.evtchn.send_port;
  1363. struct evtchnfd *evtchnfd;
  1364. if (!port || port >= max_evtchn_port(kvm))
  1365. return -EINVAL;
  1366. mutex_lock(&kvm->lock);
  1367. evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port);
  1368. mutex_unlock(&kvm->lock);
  1369. if (!evtchnfd)
  1370. return -ENOENT;
  1371. /* For an UPDATE, nothing may change except the priority/vcpu */
  1372. if (evtchnfd->type != data->u.evtchn.type)
  1373. return -EINVAL;
  1374. /*
  1375. * Port cannot change, and if it's zero that was an eventfd
  1376. * which can't be changed either.
  1377. */
  1378. if (!evtchnfd->deliver.port.port ||
  1379. evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port)
  1380. return -EINVAL;
  1381. /* We only support 2 level event channels for now */
  1382. if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
  1383. return -EINVAL;
  1384. mutex_lock(&kvm->lock);
  1385. evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
  1386. if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) {
  1387. evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu;
  1388. evtchnfd->deliver.port.vcpu_idx = -1;
  1389. }
  1390. mutex_unlock(&kvm->lock);
  1391. return 0;
  1392. }
  1393. /*
  1394. * Configure the target (eventfd or local port delivery) for sending on
  1395. * a given event channel.
  1396. */
  1397. static int kvm_xen_eventfd_assign(struct kvm *kvm,
  1398. struct kvm_xen_hvm_attr *data)
  1399. {
  1400. u32 port = data->u.evtchn.send_port;
  1401. struct eventfd_ctx *eventfd = NULL;
  1402. struct evtchnfd *evtchnfd = NULL;
  1403. int ret = -EINVAL;
  1404. if (!port || port >= max_evtchn_port(kvm))
  1405. return -EINVAL;
  1406. evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL);
  1407. if (!evtchnfd)
  1408. return -ENOMEM;
  1409. switch(data->u.evtchn.type) {
  1410. case EVTCHNSTAT_ipi:
  1411. /* IPI must map back to the same port# */
  1412. if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port)
  1413. goto out_noeventfd; /* -EINVAL */
  1414. break;
  1415. case EVTCHNSTAT_interdomain:
  1416. if (data->u.evtchn.deliver.port.port) {
  1417. if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm))
  1418. goto out_noeventfd; /* -EINVAL */
  1419. } else {
  1420. eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd);
  1421. if (IS_ERR(eventfd)) {
  1422. ret = PTR_ERR(eventfd);
  1423. goto out_noeventfd;
  1424. }
  1425. }
  1426. break;
  1427. case EVTCHNSTAT_virq:
  1428. case EVTCHNSTAT_closed:
  1429. case EVTCHNSTAT_unbound:
  1430. case EVTCHNSTAT_pirq:
  1431. default: /* Unknown event channel type */
  1432. goto out; /* -EINVAL */
  1433. }
  1434. evtchnfd->send_port = data->u.evtchn.send_port;
  1435. evtchnfd->type = data->u.evtchn.type;
  1436. if (eventfd) {
  1437. evtchnfd->deliver.eventfd.ctx = eventfd;
  1438. } else {
  1439. /* We only support 2 level event channels for now */
  1440. if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
  1441. goto out; /* -EINVAL; */
  1442. evtchnfd->deliver.port.port = data->u.evtchn.deliver.port.port;
  1443. evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu;
  1444. evtchnfd->deliver.port.vcpu_idx = -1;
  1445. evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
  1446. }
  1447. mutex_lock(&kvm->lock);
  1448. ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1,
  1449. GFP_KERNEL);
  1450. mutex_unlock(&kvm->lock);
  1451. if (ret >= 0)
  1452. return 0;
  1453. if (ret == -ENOSPC)
  1454. ret = -EEXIST;
  1455. out:
  1456. if (eventfd)
  1457. eventfd_ctx_put(eventfd);
  1458. out_noeventfd:
  1459. kfree(evtchnfd);
  1460. return ret;
  1461. }
  1462. static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
  1463. {
  1464. struct evtchnfd *evtchnfd;
  1465. mutex_lock(&kvm->lock);
  1466. evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port);
  1467. mutex_unlock(&kvm->lock);
  1468. if (!evtchnfd)
  1469. return -ENOENT;
  1470. if (kvm)
  1471. synchronize_srcu(&kvm->srcu);
  1472. if (!evtchnfd->deliver.port.port)
  1473. eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
  1474. kfree(evtchnfd);
  1475. return 0;
  1476. }
  1477. static int kvm_xen_eventfd_reset(struct kvm *kvm)
  1478. {
  1479. struct evtchnfd *evtchnfd, **all_evtchnfds;
  1480. int i;
  1481. int n = 0;
  1482. mutex_lock(&kvm->lock);
  1483. /*
  1484. * Because synchronize_srcu() cannot be called inside the
  1485. * critical section, first collect all the evtchnfd objects
  1486. * in an array as they are removed from evtchn_ports.
  1487. */
  1488. idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i)
  1489. n++;
  1490. all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL);
  1491. if (!all_evtchnfds) {
  1492. mutex_unlock(&kvm->lock);
  1493. return -ENOMEM;
  1494. }
  1495. n = 0;
  1496. idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
  1497. all_evtchnfds[n++] = evtchnfd;
  1498. idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port);
  1499. }
  1500. mutex_unlock(&kvm->lock);
  1501. synchronize_srcu(&kvm->srcu);
  1502. while (n--) {
  1503. evtchnfd = all_evtchnfds[n];
  1504. if (!evtchnfd->deliver.port.port)
  1505. eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
  1506. kfree(evtchnfd);
  1507. }
  1508. kfree(all_evtchnfds);
  1509. return 0;
  1510. }
  1511. static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
  1512. {
  1513. u32 port = data->u.evtchn.send_port;
  1514. if (data->u.evtchn.flags == KVM_XEN_EVTCHN_RESET)
  1515. return kvm_xen_eventfd_reset(kvm);
  1516. if (!port || port >= max_evtchn_port(kvm))
  1517. return -EINVAL;
  1518. if (data->u.evtchn.flags == KVM_XEN_EVTCHN_DEASSIGN)
  1519. return kvm_xen_eventfd_deassign(kvm, port);
  1520. if (data->u.evtchn.flags == KVM_XEN_EVTCHN_UPDATE)
  1521. return kvm_xen_eventfd_update(kvm, data);
  1522. if (data->u.evtchn.flags)
  1523. return -EINVAL;
  1524. return kvm_xen_eventfd_assign(kvm, data);
  1525. }
  1526. static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
  1527. {
  1528. struct evtchnfd *evtchnfd;
  1529. struct evtchn_send send;
  1530. gpa_t gpa;
  1531. int idx;
  1532. idx = srcu_read_lock(&vcpu->kvm->srcu);
  1533. gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
  1534. srcu_read_unlock(&vcpu->kvm->srcu, idx);
  1535. if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &send, sizeof(send))) {
  1536. *r = -EFAULT;
  1537. return true;
  1538. }
  1539. /* The evtchn_ports idr is protected by vcpu->kvm->srcu */
  1540. evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port);
  1541. if (!evtchnfd)
  1542. return false;
  1543. if (evtchnfd->deliver.port.port) {
  1544. int ret = kvm_xen_set_evtchn(&evtchnfd->deliver.port, vcpu->kvm);
  1545. if (ret < 0 && ret != -ENOTCONN)
  1546. return false;
  1547. } else {
  1548. eventfd_signal(evtchnfd->deliver.eventfd.ctx, 1);
  1549. }
  1550. *r = 0;
  1551. return true;
  1552. }
  1553. void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
  1554. {
  1555. vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx;
  1556. vcpu->arch.xen.poll_evtchn = 0;
  1557. timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
  1558. kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
  1559. kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
  1560. kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
  1561. }
  1562. void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
  1563. {
  1564. if (kvm_xen_timer_enabled(vcpu))
  1565. kvm_xen_stop_timer(vcpu);
  1566. kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
  1567. kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
  1568. kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
  1569. del_timer_sync(&vcpu->arch.xen.poll_timer);
  1570. }
  1571. void kvm_xen_init_vm(struct kvm *kvm)
  1572. {
  1573. idr_init(&kvm->arch.xen.evtchn_ports);
  1574. kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
  1575. }
  1576. void kvm_xen_destroy_vm(struct kvm *kvm)
  1577. {
  1578. struct evtchnfd *evtchnfd;
  1579. int i;
  1580. kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
  1581. idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
  1582. if (!evtchnfd->deliver.port.port)
  1583. eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
  1584. kfree(evtchnfd);
  1585. }
  1586. idr_destroy(&kvm->arch.xen.evtchn_ports);
  1587. if (kvm->arch.xen_hvm_config.msr)
  1588. static_branch_slow_dec_deferred(&kvm_xen_enabled);
  1589. }