hyperv.c 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * KVM Microsoft Hyper-V emulation
  4. *
  5. * derived from arch/x86/kvm/x86.c
  6. *
  7. * Copyright (C) 2006 Qumranet, Inc.
  8. * Copyright (C) 2008 Qumranet, Inc.
  9. * Copyright IBM Corporation, 2008
  10. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  11. * Copyright (C) 2015 Andrey Smetanin <[email protected]>
  12. *
  13. * Authors:
  14. * Avi Kivity <[email protected]>
  15. * Yaniv Kamay <[email protected]>
  16. * Amit Shah <[email protected]>
  17. * Ben-Ami Yassour <[email protected]>
  18. * Andrey Smetanin <[email protected]>
  19. */
  20. #include "x86.h"
  21. #include "lapic.h"
  22. #include "ioapic.h"
  23. #include "cpuid.h"
  24. #include "hyperv.h"
  25. #include "xen.h"
  26. #include <linux/cpu.h>
  27. #include <linux/kvm_host.h>
  28. #include <linux/highmem.h>
  29. #include <linux/sched/cputime.h>
  30. #include <linux/eventfd.h>
  31. #include <asm/apicdef.h>
  32. #include <trace/events/kvm.h>
  33. #include "trace.h"
  34. #include "irq.h"
  35. #include "fpu.h"
  36. #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
  37. static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
  38. bool vcpu_kick);
  39. static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
  40. {
  41. return atomic64_read(&synic->sint[sint]);
  42. }
  43. static inline int synic_get_sint_vector(u64 sint_value)
  44. {
  45. if (sint_value & HV_SYNIC_SINT_MASKED)
  46. return -1;
  47. return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
  48. }
  49. static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
  50. int vector)
  51. {
  52. int i;
  53. for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  54. if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
  55. return true;
  56. }
  57. return false;
  58. }
  59. static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
  60. int vector)
  61. {
  62. int i;
  63. u64 sint_value;
  64. for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  65. sint_value = synic_read_sint(synic, i);
  66. if (synic_get_sint_vector(sint_value) == vector &&
  67. sint_value & HV_SYNIC_SINT_AUTO_EOI)
  68. return true;
  69. }
  70. return false;
  71. }
  72. static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
  73. int vector)
  74. {
  75. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  76. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  77. bool auto_eoi_old, auto_eoi_new;
  78. if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
  79. return;
  80. if (synic_has_vector_connected(synic, vector))
  81. __set_bit(vector, synic->vec_bitmap);
  82. else
  83. __clear_bit(vector, synic->vec_bitmap);
  84. auto_eoi_old = !bitmap_empty(synic->auto_eoi_bitmap, 256);
  85. if (synic_has_vector_auto_eoi(synic, vector))
  86. __set_bit(vector, synic->auto_eoi_bitmap);
  87. else
  88. __clear_bit(vector, synic->auto_eoi_bitmap);
  89. auto_eoi_new = !bitmap_empty(synic->auto_eoi_bitmap, 256);
  90. if (auto_eoi_old == auto_eoi_new)
  91. return;
  92. if (!enable_apicv)
  93. return;
  94. down_write(&vcpu->kvm->arch.apicv_update_lock);
  95. if (auto_eoi_new)
  96. hv->synic_auto_eoi_used++;
  97. else
  98. hv->synic_auto_eoi_used--;
  99. /*
  100. * Inhibit APICv if any vCPU is using SynIC's AutoEOI, which relies on
  101. * the hypervisor to manually inject IRQs.
  102. */
  103. __kvm_set_or_clear_apicv_inhibit(vcpu->kvm,
  104. APICV_INHIBIT_REASON_HYPERV,
  105. !!hv->synic_auto_eoi_used);
  106. up_write(&vcpu->kvm->arch.apicv_update_lock);
  107. }
  108. static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
  109. u64 data, bool host)
  110. {
  111. int vector, old_vector;
  112. bool masked;
  113. vector = data & HV_SYNIC_SINT_VECTOR_MASK;
  114. masked = data & HV_SYNIC_SINT_MASKED;
  115. /*
  116. * Valid vectors are 16-255, however, nested Hyper-V attempts to write
  117. * default '0x10000' value on boot and this should not #GP. We need to
  118. * allow zero-initing the register from host as well.
  119. */
  120. if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
  121. return 1;
  122. /*
  123. * Guest may configure multiple SINTs to use the same vector, so
  124. * we maintain a bitmap of vectors handled by synic, and a
  125. * bitmap of vectors with auto-eoi behavior. The bitmaps are
  126. * updated here, and atomically queried on fast paths.
  127. */
  128. old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
  129. atomic64_set(&synic->sint[sint], data);
  130. synic_update_vector(synic, old_vector);
  131. synic_update_vector(synic, vector);
  132. /* Load SynIC vectors into EOI exit bitmap */
  133. kvm_make_request(KVM_REQ_SCAN_IOAPIC, hv_synic_to_vcpu(synic));
  134. return 0;
  135. }
  136. static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
  137. {
  138. struct kvm_vcpu *vcpu = NULL;
  139. unsigned long i;
  140. if (vpidx >= KVM_MAX_VCPUS)
  141. return NULL;
  142. vcpu = kvm_get_vcpu(kvm, vpidx);
  143. if (vcpu && kvm_hv_get_vpindex(vcpu) == vpidx)
  144. return vcpu;
  145. kvm_for_each_vcpu(i, vcpu, kvm)
  146. if (kvm_hv_get_vpindex(vcpu) == vpidx)
  147. return vcpu;
  148. return NULL;
  149. }
  150. static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
  151. {
  152. struct kvm_vcpu *vcpu;
  153. struct kvm_vcpu_hv_synic *synic;
  154. vcpu = get_vcpu_by_vpidx(kvm, vpidx);
  155. if (!vcpu || !to_hv_vcpu(vcpu))
  156. return NULL;
  157. synic = to_hv_synic(vcpu);
  158. return (synic->active) ? synic : NULL;
  159. }
  160. static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
  161. {
  162. struct kvm *kvm = vcpu->kvm;
  163. struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
  164. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  165. struct kvm_vcpu_hv_stimer *stimer;
  166. int gsi, idx;
  167. trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
  168. /* Try to deliver pending Hyper-V SynIC timers messages */
  169. for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
  170. stimer = &hv_vcpu->stimer[idx];
  171. if (stimer->msg_pending && stimer->config.enable &&
  172. !stimer->config.direct_mode &&
  173. stimer->config.sintx == sint)
  174. stimer_mark_pending(stimer, false);
  175. }
  176. idx = srcu_read_lock(&kvm->irq_srcu);
  177. gsi = atomic_read(&synic->sint_to_gsi[sint]);
  178. if (gsi != -1)
  179. kvm_notify_acked_gsi(kvm, gsi);
  180. srcu_read_unlock(&kvm->irq_srcu, idx);
  181. }
  182. static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
  183. {
  184. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  185. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  186. hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
  187. hv_vcpu->exit.u.synic.msr = msr;
  188. hv_vcpu->exit.u.synic.control = synic->control;
  189. hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
  190. hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
  191. kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
  192. }
  193. static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
  194. u32 msr, u64 data, bool host)
  195. {
  196. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  197. int ret;
  198. if (!synic->active && (!host || data))
  199. return 1;
  200. trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
  201. ret = 0;
  202. switch (msr) {
  203. case HV_X64_MSR_SCONTROL:
  204. synic->control = data;
  205. if (!host)
  206. synic_exit(synic, msr);
  207. break;
  208. case HV_X64_MSR_SVERSION:
  209. if (!host) {
  210. ret = 1;
  211. break;
  212. }
  213. synic->version = data;
  214. break;
  215. case HV_X64_MSR_SIEFP:
  216. if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
  217. !synic->dont_zero_synic_pages)
  218. if (kvm_clear_guest(vcpu->kvm,
  219. data & PAGE_MASK, PAGE_SIZE)) {
  220. ret = 1;
  221. break;
  222. }
  223. synic->evt_page = data;
  224. if (!host)
  225. synic_exit(synic, msr);
  226. break;
  227. case HV_X64_MSR_SIMP:
  228. if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
  229. !synic->dont_zero_synic_pages)
  230. if (kvm_clear_guest(vcpu->kvm,
  231. data & PAGE_MASK, PAGE_SIZE)) {
  232. ret = 1;
  233. break;
  234. }
  235. synic->msg_page = data;
  236. if (!host)
  237. synic_exit(synic, msr);
  238. break;
  239. case HV_X64_MSR_EOM: {
  240. int i;
  241. if (!synic->active)
  242. break;
  243. for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
  244. kvm_hv_notify_acked_sint(vcpu, i);
  245. break;
  246. }
  247. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  248. ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
  249. break;
  250. default:
  251. ret = 1;
  252. break;
  253. }
  254. return ret;
  255. }
  256. static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu)
  257. {
  258. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  259. return hv_vcpu->cpuid_cache.syndbg_cap_eax &
  260. HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
  261. }
  262. static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
  263. {
  264. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  265. if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL)
  266. hv->hv_syndbg.control.status =
  267. vcpu->run->hyperv.u.syndbg.status;
  268. return 1;
  269. }
  270. static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr)
  271. {
  272. struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
  273. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  274. hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG;
  275. hv_vcpu->exit.u.syndbg.msr = msr;
  276. hv_vcpu->exit.u.syndbg.control = syndbg->control.control;
  277. hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page;
  278. hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page;
  279. hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page;
  280. vcpu->arch.complete_userspace_io =
  281. kvm_hv_syndbg_complete_userspace;
  282. kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
  283. }
  284. static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
  285. {
  286. struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
  287. if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
  288. return 1;
  289. trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id,
  290. to_hv_vcpu(vcpu)->vp_index, msr, data);
  291. switch (msr) {
  292. case HV_X64_MSR_SYNDBG_CONTROL:
  293. syndbg->control.control = data;
  294. if (!host)
  295. syndbg_exit(vcpu, msr);
  296. break;
  297. case HV_X64_MSR_SYNDBG_STATUS:
  298. syndbg->control.status = data;
  299. break;
  300. case HV_X64_MSR_SYNDBG_SEND_BUFFER:
  301. syndbg->control.send_page = data;
  302. break;
  303. case HV_X64_MSR_SYNDBG_RECV_BUFFER:
  304. syndbg->control.recv_page = data;
  305. break;
  306. case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  307. syndbg->control.pending_page = data;
  308. if (!host)
  309. syndbg_exit(vcpu, msr);
  310. break;
  311. case HV_X64_MSR_SYNDBG_OPTIONS:
  312. syndbg->options = data;
  313. break;
  314. default:
  315. break;
  316. }
  317. return 0;
  318. }
  319. static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
  320. {
  321. struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
  322. if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
  323. return 1;
  324. switch (msr) {
  325. case HV_X64_MSR_SYNDBG_CONTROL:
  326. *pdata = syndbg->control.control;
  327. break;
  328. case HV_X64_MSR_SYNDBG_STATUS:
  329. *pdata = syndbg->control.status;
  330. break;
  331. case HV_X64_MSR_SYNDBG_SEND_BUFFER:
  332. *pdata = syndbg->control.send_page;
  333. break;
  334. case HV_X64_MSR_SYNDBG_RECV_BUFFER:
  335. *pdata = syndbg->control.recv_page;
  336. break;
  337. case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  338. *pdata = syndbg->control.pending_page;
  339. break;
  340. case HV_X64_MSR_SYNDBG_OPTIONS:
  341. *pdata = syndbg->options;
  342. break;
  343. default:
  344. break;
  345. }
  346. trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, kvm_hv_get_vpindex(vcpu), msr, *pdata);
  347. return 0;
  348. }
  349. static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
  350. bool host)
  351. {
  352. int ret;
  353. if (!synic->active && !host)
  354. return 1;
  355. ret = 0;
  356. switch (msr) {
  357. case HV_X64_MSR_SCONTROL:
  358. *pdata = synic->control;
  359. break;
  360. case HV_X64_MSR_SVERSION:
  361. *pdata = synic->version;
  362. break;
  363. case HV_X64_MSR_SIEFP:
  364. *pdata = synic->evt_page;
  365. break;
  366. case HV_X64_MSR_SIMP:
  367. *pdata = synic->msg_page;
  368. break;
  369. case HV_X64_MSR_EOM:
  370. *pdata = 0;
  371. break;
  372. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  373. *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
  374. break;
  375. default:
  376. ret = 1;
  377. break;
  378. }
  379. return ret;
  380. }
  381. static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
  382. {
  383. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  384. struct kvm_lapic_irq irq;
  385. int ret, vector;
  386. if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm))
  387. return -EINVAL;
  388. if (sint >= ARRAY_SIZE(synic->sint))
  389. return -EINVAL;
  390. vector = synic_get_sint_vector(synic_read_sint(synic, sint));
  391. if (vector < 0)
  392. return -ENOENT;
  393. memset(&irq, 0, sizeof(irq));
  394. irq.shorthand = APIC_DEST_SELF;
  395. irq.dest_mode = APIC_DEST_PHYSICAL;
  396. irq.delivery_mode = APIC_DM_FIXED;
  397. irq.vector = vector;
  398. irq.level = 1;
  399. ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
  400. trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
  401. return ret;
  402. }
  403. int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
  404. {
  405. struct kvm_vcpu_hv_synic *synic;
  406. synic = synic_get(kvm, vpidx);
  407. if (!synic)
  408. return -EINVAL;
  409. return synic_set_irq(synic, sint);
  410. }
  411. void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
  412. {
  413. struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
  414. int i;
  415. trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
  416. for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
  417. if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
  418. kvm_hv_notify_acked_sint(vcpu, i);
  419. }
  420. static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
  421. {
  422. struct kvm_vcpu_hv_synic *synic;
  423. synic = synic_get(kvm, vpidx);
  424. if (!synic)
  425. return -EINVAL;
  426. if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
  427. return -EINVAL;
  428. atomic_set(&synic->sint_to_gsi[sint], gsi);
  429. return 0;
  430. }
  431. void kvm_hv_irq_routing_update(struct kvm *kvm)
  432. {
  433. struct kvm_irq_routing_table *irq_rt;
  434. struct kvm_kernel_irq_routing_entry *e;
  435. u32 gsi;
  436. irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
  437. lockdep_is_held(&kvm->irq_lock));
  438. for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
  439. hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
  440. if (e->type == KVM_IRQ_ROUTING_HV_SINT)
  441. kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
  442. e->hv_sint.sint, gsi);
  443. }
  444. }
  445. }
  446. static void synic_init(struct kvm_vcpu_hv_synic *synic)
  447. {
  448. int i;
  449. memset(synic, 0, sizeof(*synic));
  450. synic->version = HV_SYNIC_VERSION_1;
  451. for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  452. atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
  453. atomic_set(&synic->sint_to_gsi[i], -1);
  454. }
  455. }
  456. static u64 get_time_ref_counter(struct kvm *kvm)
  457. {
  458. struct kvm_hv *hv = to_kvm_hv(kvm);
  459. struct kvm_vcpu *vcpu;
  460. u64 tsc;
  461. /*
  462. * Fall back to get_kvmclock_ns() when TSC page hasn't been set up,
  463. * is broken, disabled or being updated.
  464. */
  465. if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET)
  466. return div_u64(get_kvmclock_ns(kvm), 100);
  467. vcpu = kvm_get_vcpu(kvm, 0);
  468. tsc = kvm_read_l1_tsc(vcpu, rdtsc());
  469. return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
  470. + hv->tsc_ref.tsc_offset;
  471. }
  472. static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
  473. bool vcpu_kick)
  474. {
  475. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  476. set_bit(stimer->index,
  477. to_hv_vcpu(vcpu)->stimer_pending_bitmap);
  478. kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
  479. if (vcpu_kick)
  480. kvm_vcpu_kick(vcpu);
  481. }
  482. static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
  483. {
  484. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  485. trace_kvm_hv_stimer_cleanup(hv_stimer_to_vcpu(stimer)->vcpu_id,
  486. stimer->index);
  487. hrtimer_cancel(&stimer->timer);
  488. clear_bit(stimer->index,
  489. to_hv_vcpu(vcpu)->stimer_pending_bitmap);
  490. stimer->msg_pending = false;
  491. stimer->exp_time = 0;
  492. }
  493. static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
  494. {
  495. struct kvm_vcpu_hv_stimer *stimer;
  496. stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
  497. trace_kvm_hv_stimer_callback(hv_stimer_to_vcpu(stimer)->vcpu_id,
  498. stimer->index);
  499. stimer_mark_pending(stimer, true);
  500. return HRTIMER_NORESTART;
  501. }
  502. /*
  503. * stimer_start() assumptions:
  504. * a) stimer->count is not equal to 0
  505. * b) stimer->config has HV_STIMER_ENABLE flag
  506. */
  507. static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
  508. {
  509. u64 time_now;
  510. ktime_t ktime_now;
  511. time_now = get_time_ref_counter(hv_stimer_to_vcpu(stimer)->kvm);
  512. ktime_now = ktime_get();
  513. if (stimer->config.periodic) {
  514. if (stimer->exp_time) {
  515. if (time_now >= stimer->exp_time) {
  516. u64 remainder;
  517. div64_u64_rem(time_now - stimer->exp_time,
  518. stimer->count, &remainder);
  519. stimer->exp_time =
  520. time_now + (stimer->count - remainder);
  521. }
  522. } else
  523. stimer->exp_time = time_now + stimer->count;
  524. trace_kvm_hv_stimer_start_periodic(
  525. hv_stimer_to_vcpu(stimer)->vcpu_id,
  526. stimer->index,
  527. time_now, stimer->exp_time);
  528. hrtimer_start(&stimer->timer,
  529. ktime_add_ns(ktime_now,
  530. 100 * (stimer->exp_time - time_now)),
  531. HRTIMER_MODE_ABS);
  532. return 0;
  533. }
  534. stimer->exp_time = stimer->count;
  535. if (time_now >= stimer->count) {
  536. /*
  537. * Expire timer according to Hypervisor Top-Level Functional
  538. * specification v4(15.3.1):
  539. * "If a one shot is enabled and the specified count is in
  540. * the past, it will expire immediately."
  541. */
  542. stimer_mark_pending(stimer, false);
  543. return 0;
  544. }
  545. trace_kvm_hv_stimer_start_one_shot(hv_stimer_to_vcpu(stimer)->vcpu_id,
  546. stimer->index,
  547. time_now, stimer->count);
  548. hrtimer_start(&stimer->timer,
  549. ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
  550. HRTIMER_MODE_ABS);
  551. return 0;
  552. }
  553. static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
  554. bool host)
  555. {
  556. union hv_stimer_config new_config = {.as_uint64 = config},
  557. old_config = {.as_uint64 = stimer->config.as_uint64};
  558. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  559. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  560. struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
  561. if (!synic->active && (!host || config))
  562. return 1;
  563. if (unlikely(!host && hv_vcpu->enforce_cpuid && new_config.direct_mode &&
  564. !(hv_vcpu->cpuid_cache.features_edx &
  565. HV_STIMER_DIRECT_MODE_AVAILABLE)))
  566. return 1;
  567. trace_kvm_hv_stimer_set_config(hv_stimer_to_vcpu(stimer)->vcpu_id,
  568. stimer->index, config, host);
  569. stimer_cleanup(stimer);
  570. if (old_config.enable &&
  571. !new_config.direct_mode && new_config.sintx == 0)
  572. new_config.enable = 0;
  573. stimer->config.as_uint64 = new_config.as_uint64;
  574. if (stimer->config.enable)
  575. stimer_mark_pending(stimer, false);
  576. return 0;
  577. }
  578. static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
  579. bool host)
  580. {
  581. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  582. struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
  583. if (!synic->active && (!host || count))
  584. return 1;
  585. trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id,
  586. stimer->index, count, host);
  587. stimer_cleanup(stimer);
  588. stimer->count = count;
  589. if (!host) {
  590. if (stimer->count == 0)
  591. stimer->config.enable = 0;
  592. else if (stimer->config.auto_enable)
  593. stimer->config.enable = 1;
  594. }
  595. if (stimer->config.enable)
  596. stimer_mark_pending(stimer, false);
  597. return 0;
  598. }
  599. static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
  600. {
  601. *pconfig = stimer->config.as_uint64;
  602. return 0;
  603. }
  604. static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
  605. {
  606. *pcount = stimer->count;
  607. return 0;
  608. }
  609. static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
  610. struct hv_message *src_msg, bool no_retry)
  611. {
  612. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  613. int msg_off = offsetof(struct hv_message_page, sint_message[sint]);
  614. gfn_t msg_page_gfn;
  615. struct hv_message_header hv_hdr;
  616. int r;
  617. if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
  618. return -ENOENT;
  619. msg_page_gfn = synic->msg_page >> PAGE_SHIFT;
  620. /*
  621. * Strictly following the spec-mandated ordering would assume setting
  622. * .msg_pending before checking .message_type. However, this function
  623. * is only called in vcpu context so the entire update is atomic from
  624. * guest POV and thus the exact order here doesn't matter.
  625. */
  626. r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type,
  627. msg_off + offsetof(struct hv_message,
  628. header.message_type),
  629. sizeof(hv_hdr.message_type));
  630. if (r < 0)
  631. return r;
  632. if (hv_hdr.message_type != HVMSG_NONE) {
  633. if (no_retry)
  634. return 0;
  635. hv_hdr.message_flags.msg_pending = 1;
  636. r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn,
  637. &hv_hdr.message_flags,
  638. msg_off +
  639. offsetof(struct hv_message,
  640. header.message_flags),
  641. sizeof(hv_hdr.message_flags));
  642. if (r < 0)
  643. return r;
  644. return -EAGAIN;
  645. }
  646. r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off,
  647. sizeof(src_msg->header) +
  648. src_msg->header.payload_size);
  649. if (r < 0)
  650. return r;
  651. r = synic_set_irq(synic, sint);
  652. if (r < 0)
  653. return r;
  654. if (r == 0)
  655. return -EFAULT;
  656. return 0;
  657. }
  658. static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
  659. {
  660. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  661. struct hv_message *msg = &stimer->msg;
  662. struct hv_timer_message_payload *payload =
  663. (struct hv_timer_message_payload *)&msg->u.payload;
  664. /*
  665. * To avoid piling up periodic ticks, don't retry message
  666. * delivery for them (within "lazy" lost ticks policy).
  667. */
  668. bool no_retry = stimer->config.periodic;
  669. payload->expiration_time = stimer->exp_time;
  670. payload->delivery_time = get_time_ref_counter(vcpu->kvm);
  671. return synic_deliver_msg(to_hv_synic(vcpu),
  672. stimer->config.sintx, msg,
  673. no_retry);
  674. }
  675. static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
  676. {
  677. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  678. struct kvm_lapic_irq irq = {
  679. .delivery_mode = APIC_DM_FIXED,
  680. .vector = stimer->config.apic_vector
  681. };
  682. if (lapic_in_kernel(vcpu))
  683. return !kvm_apic_set_irq(vcpu, &irq, NULL);
  684. return 0;
  685. }
  686. static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
  687. {
  688. int r, direct = stimer->config.direct_mode;
  689. stimer->msg_pending = true;
  690. if (!direct)
  691. r = stimer_send_msg(stimer);
  692. else
  693. r = stimer_notify_direct(stimer);
  694. trace_kvm_hv_stimer_expiration(hv_stimer_to_vcpu(stimer)->vcpu_id,
  695. stimer->index, direct, r);
  696. if (!r) {
  697. stimer->msg_pending = false;
  698. if (!(stimer->config.periodic))
  699. stimer->config.enable = 0;
  700. }
  701. }
  702. void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
  703. {
  704. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  705. struct kvm_vcpu_hv_stimer *stimer;
  706. u64 time_now, exp_time;
  707. int i;
  708. if (!hv_vcpu)
  709. return;
  710. for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
  711. if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
  712. stimer = &hv_vcpu->stimer[i];
  713. if (stimer->config.enable) {
  714. exp_time = stimer->exp_time;
  715. if (exp_time) {
  716. time_now =
  717. get_time_ref_counter(vcpu->kvm);
  718. if (time_now >= exp_time)
  719. stimer_expiration(stimer);
  720. }
  721. if ((stimer->config.enable) &&
  722. stimer->count) {
  723. if (!stimer->msg_pending)
  724. stimer_start(stimer);
  725. } else
  726. stimer_cleanup(stimer);
  727. }
  728. }
  729. }
  730. void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
  731. {
  732. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  733. int i;
  734. if (!hv_vcpu)
  735. return;
  736. for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
  737. stimer_cleanup(&hv_vcpu->stimer[i]);
  738. kfree(hv_vcpu);
  739. vcpu->arch.hyperv = NULL;
  740. }
  741. bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
  742. {
  743. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  744. if (!hv_vcpu)
  745. return false;
  746. if (!(hv_vcpu->hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
  747. return false;
  748. return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
  749. }
  750. EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
  751. bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
  752. struct hv_vp_assist_page *assist_page)
  753. {
  754. if (!kvm_hv_assist_page_enabled(vcpu))
  755. return false;
  756. return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
  757. assist_page, sizeof(*assist_page));
  758. }
  759. EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
  760. static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
  761. {
  762. struct hv_message *msg = &stimer->msg;
  763. struct hv_timer_message_payload *payload =
  764. (struct hv_timer_message_payload *)&msg->u.payload;
  765. memset(&msg->header, 0, sizeof(msg->header));
  766. msg->header.message_type = HVMSG_TIMER_EXPIRED;
  767. msg->header.payload_size = sizeof(*payload);
  768. payload->timer_index = stimer->index;
  769. payload->expiration_time = 0;
  770. payload->delivery_time = 0;
  771. }
  772. static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
  773. {
  774. memset(stimer, 0, sizeof(*stimer));
  775. stimer->index = timer_index;
  776. hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  777. stimer->timer.function = stimer_timer_callback;
  778. stimer_prepare_msg(stimer);
  779. }
  780. int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
  781. {
  782. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  783. int i;
  784. if (hv_vcpu)
  785. return 0;
  786. hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT);
  787. if (!hv_vcpu)
  788. return -ENOMEM;
  789. vcpu->arch.hyperv = hv_vcpu;
  790. hv_vcpu->vcpu = vcpu;
  791. synic_init(&hv_vcpu->synic);
  792. bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
  793. for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
  794. stimer_init(&hv_vcpu->stimer[i], i);
  795. hv_vcpu->vp_index = vcpu->vcpu_idx;
  796. return 0;
  797. }
  798. int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
  799. {
  800. struct kvm_vcpu_hv_synic *synic;
  801. int r;
  802. r = kvm_hv_vcpu_init(vcpu);
  803. if (r)
  804. return r;
  805. synic = to_hv_synic(vcpu);
  806. synic->active = true;
  807. synic->dont_zero_synic_pages = dont_zero_synic_pages;
  808. synic->control = HV_SYNIC_CONTROL_ENABLE;
  809. return 0;
  810. }
  811. static bool kvm_hv_msr_partition_wide(u32 msr)
  812. {
  813. bool r = false;
  814. switch (msr) {
  815. case HV_X64_MSR_GUEST_OS_ID:
  816. case HV_X64_MSR_HYPERCALL:
  817. case HV_X64_MSR_REFERENCE_TSC:
  818. case HV_X64_MSR_TIME_REF_COUNT:
  819. case HV_X64_MSR_CRASH_CTL:
  820. case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  821. case HV_X64_MSR_RESET:
  822. case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
  823. case HV_X64_MSR_TSC_EMULATION_CONTROL:
  824. case HV_X64_MSR_TSC_EMULATION_STATUS:
  825. case HV_X64_MSR_SYNDBG_OPTIONS:
  826. case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  827. r = true;
  828. break;
  829. }
  830. return r;
  831. }
  832. static int kvm_hv_msr_get_crash_data(struct kvm *kvm, u32 index, u64 *pdata)
  833. {
  834. struct kvm_hv *hv = to_kvm_hv(kvm);
  835. size_t size = ARRAY_SIZE(hv->hv_crash_param);
  836. if (WARN_ON_ONCE(index >= size))
  837. return -EINVAL;
  838. *pdata = hv->hv_crash_param[array_index_nospec(index, size)];
  839. return 0;
  840. }
  841. static int kvm_hv_msr_get_crash_ctl(struct kvm *kvm, u64 *pdata)
  842. {
  843. struct kvm_hv *hv = to_kvm_hv(kvm);
  844. *pdata = hv->hv_crash_ctl;
  845. return 0;
  846. }
  847. static int kvm_hv_msr_set_crash_ctl(struct kvm *kvm, u64 data)
  848. {
  849. struct kvm_hv *hv = to_kvm_hv(kvm);
  850. hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY;
  851. return 0;
  852. }
  853. static int kvm_hv_msr_set_crash_data(struct kvm *kvm, u32 index, u64 data)
  854. {
  855. struct kvm_hv *hv = to_kvm_hv(kvm);
  856. size_t size = ARRAY_SIZE(hv->hv_crash_param);
  857. if (WARN_ON_ONCE(index >= size))
  858. return -EINVAL;
  859. hv->hv_crash_param[array_index_nospec(index, size)] = data;
  860. return 0;
  861. }
  862. /*
  863. * The kvmclock and Hyper-V TSC page use similar formulas, and converting
  864. * between them is possible:
  865. *
  866. * kvmclock formula:
  867. * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
  868. * + system_time
  869. *
  870. * Hyper-V formula:
  871. * nsec/100 = ticks * scale / 2^64 + offset
  872. *
  873. * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
  874. * By dividing the kvmclock formula by 100 and equating what's left we get:
  875. * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
  876. * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
  877. * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100
  878. *
  879. * Now expand the kvmclock formula and divide by 100:
  880. * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
  881. * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
  882. * + system_time
  883. * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
  884. * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
  885. * + system_time / 100
  886. *
  887. * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
  888. * nsec/100 = ticks * scale / 2^64
  889. * - tsc_timestamp * scale / 2^64
  890. * + system_time / 100
  891. *
  892. * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
  893. * offset = system_time / 100 - tsc_timestamp * scale / 2^64
  894. *
  895. * These two equivalencies are implemented in this function.
  896. */
  897. static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
  898. struct ms_hyperv_tsc_page *tsc_ref)
  899. {
  900. u64 max_mul;
  901. if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
  902. return false;
  903. /*
  904. * check if scale would overflow, if so we use the time ref counter
  905. * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
  906. * tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
  907. * tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
  908. */
  909. max_mul = 100ull << (32 - hv_clock->tsc_shift);
  910. if (hv_clock->tsc_to_system_mul >= max_mul)
  911. return false;
  912. /*
  913. * Otherwise compute the scale and offset according to the formulas
  914. * derived above.
  915. */
  916. tsc_ref->tsc_scale =
  917. mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
  918. hv_clock->tsc_to_system_mul,
  919. 100);
  920. tsc_ref->tsc_offset = hv_clock->system_time;
  921. do_div(tsc_ref->tsc_offset, 100);
  922. tsc_ref->tsc_offset -=
  923. mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
  924. return true;
  925. }
  926. /*
  927. * Don't touch TSC page values if the guest has opted for TSC emulation after
  928. * migration. KVM doesn't fully support reenlightenment notifications and TSC
  929. * access emulation and Hyper-V is known to expect the values in TSC page to
  930. * stay constant before TSC access emulation is disabled from guest side
  931. * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC
  932. * frequency and guest visible TSC value across migration (and prevent it when
  933. * TSC scaling is unsupported).
  934. */
  935. static inline bool tsc_page_update_unsafe(struct kvm_hv *hv)
  936. {
  937. return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
  938. hv->hv_tsc_emulation_control;
  939. }
  940. void kvm_hv_setup_tsc_page(struct kvm *kvm,
  941. struct pvclock_vcpu_time_info *hv_clock)
  942. {
  943. struct kvm_hv *hv = to_kvm_hv(kvm);
  944. u32 tsc_seq;
  945. u64 gfn;
  946. BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
  947. BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
  948. mutex_lock(&hv->hv_lock);
  949. if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
  950. hv->hv_tsc_page_status == HV_TSC_PAGE_SET ||
  951. hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
  952. goto out_unlock;
  953. if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
  954. goto out_unlock;
  955. gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
  956. /*
  957. * Because the TSC parameters only vary when there is a
  958. * change in the master clock, do not bother with caching.
  959. */
  960. if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
  961. &tsc_seq, sizeof(tsc_seq))))
  962. goto out_err;
  963. if (tsc_seq && tsc_page_update_unsafe(hv)) {
  964. if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
  965. goto out_err;
  966. hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
  967. goto out_unlock;
  968. }
  969. /*
  970. * While we're computing and writing the parameters, force the
  971. * guest to use the time reference count MSR.
  972. */
  973. hv->tsc_ref.tsc_sequence = 0;
  974. if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
  975. &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
  976. goto out_err;
  977. if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
  978. goto out_err;
  979. /* Ensure sequence is zero before writing the rest of the struct. */
  980. smp_wmb();
  981. if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
  982. goto out_err;
  983. /*
  984. * Now switch to the TSC page mechanism by writing the sequence.
  985. */
  986. tsc_seq++;
  987. if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
  988. tsc_seq = 1;
  989. /* Write the struct entirely before the non-zero sequence. */
  990. smp_wmb();
  991. hv->tsc_ref.tsc_sequence = tsc_seq;
  992. if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
  993. &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
  994. goto out_err;
  995. hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
  996. goto out_unlock;
  997. out_err:
  998. hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
  999. out_unlock:
  1000. mutex_unlock(&hv->hv_lock);
  1001. }
  1002. void kvm_hv_request_tsc_page_update(struct kvm *kvm)
  1003. {
  1004. struct kvm_hv *hv = to_kvm_hv(kvm);
  1005. mutex_lock(&hv->hv_lock);
  1006. if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET &&
  1007. !tsc_page_update_unsafe(hv))
  1008. hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
  1009. mutex_unlock(&hv->hv_lock);
  1010. }
  1011. static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
  1012. {
  1013. if (!hv_vcpu->enforce_cpuid)
  1014. return true;
  1015. switch (msr) {
  1016. case HV_X64_MSR_GUEST_OS_ID:
  1017. case HV_X64_MSR_HYPERCALL:
  1018. return hv_vcpu->cpuid_cache.features_eax &
  1019. HV_MSR_HYPERCALL_AVAILABLE;
  1020. case HV_X64_MSR_VP_RUNTIME:
  1021. return hv_vcpu->cpuid_cache.features_eax &
  1022. HV_MSR_VP_RUNTIME_AVAILABLE;
  1023. case HV_X64_MSR_TIME_REF_COUNT:
  1024. return hv_vcpu->cpuid_cache.features_eax &
  1025. HV_MSR_TIME_REF_COUNT_AVAILABLE;
  1026. case HV_X64_MSR_VP_INDEX:
  1027. return hv_vcpu->cpuid_cache.features_eax &
  1028. HV_MSR_VP_INDEX_AVAILABLE;
  1029. case HV_X64_MSR_RESET:
  1030. return hv_vcpu->cpuid_cache.features_eax &
  1031. HV_MSR_RESET_AVAILABLE;
  1032. case HV_X64_MSR_REFERENCE_TSC:
  1033. return hv_vcpu->cpuid_cache.features_eax &
  1034. HV_MSR_REFERENCE_TSC_AVAILABLE;
  1035. case HV_X64_MSR_SCONTROL:
  1036. case HV_X64_MSR_SVERSION:
  1037. case HV_X64_MSR_SIEFP:
  1038. case HV_X64_MSR_SIMP:
  1039. case HV_X64_MSR_EOM:
  1040. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  1041. return hv_vcpu->cpuid_cache.features_eax &
  1042. HV_MSR_SYNIC_AVAILABLE;
  1043. case HV_X64_MSR_STIMER0_CONFIG:
  1044. case HV_X64_MSR_STIMER1_CONFIG:
  1045. case HV_X64_MSR_STIMER2_CONFIG:
  1046. case HV_X64_MSR_STIMER3_CONFIG:
  1047. case HV_X64_MSR_STIMER0_COUNT:
  1048. case HV_X64_MSR_STIMER1_COUNT:
  1049. case HV_X64_MSR_STIMER2_COUNT:
  1050. case HV_X64_MSR_STIMER3_COUNT:
  1051. return hv_vcpu->cpuid_cache.features_eax &
  1052. HV_MSR_SYNTIMER_AVAILABLE;
  1053. case HV_X64_MSR_EOI:
  1054. case HV_X64_MSR_ICR:
  1055. case HV_X64_MSR_TPR:
  1056. case HV_X64_MSR_VP_ASSIST_PAGE:
  1057. return hv_vcpu->cpuid_cache.features_eax &
  1058. HV_MSR_APIC_ACCESS_AVAILABLE;
  1059. break;
  1060. case HV_X64_MSR_TSC_FREQUENCY:
  1061. case HV_X64_MSR_APIC_FREQUENCY:
  1062. return hv_vcpu->cpuid_cache.features_eax &
  1063. HV_ACCESS_FREQUENCY_MSRS;
  1064. case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
  1065. case HV_X64_MSR_TSC_EMULATION_CONTROL:
  1066. case HV_X64_MSR_TSC_EMULATION_STATUS:
  1067. return hv_vcpu->cpuid_cache.features_eax &
  1068. HV_ACCESS_REENLIGHTENMENT;
  1069. case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  1070. case HV_X64_MSR_CRASH_CTL:
  1071. return hv_vcpu->cpuid_cache.features_edx &
  1072. HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
  1073. case HV_X64_MSR_SYNDBG_OPTIONS:
  1074. case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  1075. return hv_vcpu->cpuid_cache.features_edx &
  1076. HV_FEATURE_DEBUG_MSRS_AVAILABLE;
  1077. default:
  1078. break;
  1079. }
  1080. return false;
  1081. }
  1082. static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
  1083. bool host)
  1084. {
  1085. struct kvm *kvm = vcpu->kvm;
  1086. struct kvm_hv *hv = to_kvm_hv(kvm);
  1087. if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr)))
  1088. return 1;
  1089. switch (msr) {
  1090. case HV_X64_MSR_GUEST_OS_ID:
  1091. hv->hv_guest_os_id = data;
  1092. /* setting guest os id to zero disables hypercall page */
  1093. if (!hv->hv_guest_os_id)
  1094. hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
  1095. break;
  1096. case HV_X64_MSR_HYPERCALL: {
  1097. u8 instructions[9];
  1098. int i = 0;
  1099. u64 addr;
  1100. /* if guest os id is not set hypercall should remain disabled */
  1101. if (!hv->hv_guest_os_id)
  1102. break;
  1103. if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
  1104. hv->hv_hypercall = data;
  1105. break;
  1106. }
  1107. /*
  1108. * If Xen and Hyper-V hypercalls are both enabled, disambiguate
  1109. * the same way Xen itself does, by setting the bit 31 of EAX
  1110. * which is RsvdZ in the 32-bit Hyper-V hypercall ABI and just
  1111. * going to be clobbered on 64-bit.
  1112. */
  1113. if (kvm_xen_hypercall_enabled(kvm)) {
  1114. /* orl $0x80000000, %eax */
  1115. instructions[i++] = 0x0d;
  1116. instructions[i++] = 0x00;
  1117. instructions[i++] = 0x00;
  1118. instructions[i++] = 0x00;
  1119. instructions[i++] = 0x80;
  1120. }
  1121. /* vmcall/vmmcall */
  1122. static_call(kvm_x86_patch_hypercall)(vcpu, instructions + i);
  1123. i += 3;
  1124. /* ret */
  1125. ((unsigned char *)instructions)[i++] = 0xc3;
  1126. addr = data & HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK;
  1127. if (kvm_vcpu_write_guest(vcpu, addr, instructions, i))
  1128. return 1;
  1129. hv->hv_hypercall = data;
  1130. break;
  1131. }
  1132. case HV_X64_MSR_REFERENCE_TSC:
  1133. hv->hv_tsc_page = data;
  1134. if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
  1135. if (!host)
  1136. hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED;
  1137. else
  1138. hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
  1139. kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
  1140. } else {
  1141. hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
  1142. }
  1143. break;
  1144. case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  1145. return kvm_hv_msr_set_crash_data(kvm,
  1146. msr - HV_X64_MSR_CRASH_P0,
  1147. data);
  1148. case HV_X64_MSR_CRASH_CTL:
  1149. if (host)
  1150. return kvm_hv_msr_set_crash_ctl(kvm, data);
  1151. if (data & HV_CRASH_CTL_CRASH_NOTIFY) {
  1152. vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
  1153. hv->hv_crash_param[0],
  1154. hv->hv_crash_param[1],
  1155. hv->hv_crash_param[2],
  1156. hv->hv_crash_param[3],
  1157. hv->hv_crash_param[4]);
  1158. /* Send notification about crash to user space */
  1159. kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
  1160. }
  1161. break;
  1162. case HV_X64_MSR_RESET:
  1163. if (data == 1) {
  1164. vcpu_debug(vcpu, "hyper-v reset requested\n");
  1165. kvm_make_request(KVM_REQ_HV_RESET, vcpu);
  1166. }
  1167. break;
  1168. case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
  1169. hv->hv_reenlightenment_control = data;
  1170. break;
  1171. case HV_X64_MSR_TSC_EMULATION_CONTROL:
  1172. hv->hv_tsc_emulation_control = data;
  1173. break;
  1174. case HV_X64_MSR_TSC_EMULATION_STATUS:
  1175. if (data && !host)
  1176. return 1;
  1177. hv->hv_tsc_emulation_status = data;
  1178. break;
  1179. case HV_X64_MSR_TIME_REF_COUNT:
  1180. /* read-only, but still ignore it if host-initiated */
  1181. if (!host)
  1182. return 1;
  1183. break;
  1184. case HV_X64_MSR_SYNDBG_OPTIONS:
  1185. case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  1186. return syndbg_set_msr(vcpu, msr, data, host);
  1187. default:
  1188. vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
  1189. msr, data);
  1190. return 1;
  1191. }
  1192. return 0;
  1193. }
  1194. /* Calculate cpu time spent by current task in 100ns units */
  1195. static u64 current_task_runtime_100ns(void)
  1196. {
  1197. u64 utime, stime;
  1198. task_cputime_adjusted(current, &utime, &stime);
  1199. return div_u64(utime + stime, 100);
  1200. }
  1201. static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
  1202. {
  1203. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1204. if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr)))
  1205. return 1;
  1206. switch (msr) {
  1207. case HV_X64_MSR_VP_INDEX: {
  1208. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  1209. u32 new_vp_index = (u32)data;
  1210. if (!host || new_vp_index >= KVM_MAX_VCPUS)
  1211. return 1;
  1212. if (new_vp_index == hv_vcpu->vp_index)
  1213. return 0;
  1214. /*
  1215. * The VP index is initialized to vcpu_index by
  1216. * kvm_hv_vcpu_postcreate so they initially match. Now the
  1217. * VP index is changing, adjust num_mismatched_vp_indexes if
  1218. * it now matches or no longer matches vcpu_idx.
  1219. */
  1220. if (hv_vcpu->vp_index == vcpu->vcpu_idx)
  1221. atomic_inc(&hv->num_mismatched_vp_indexes);
  1222. else if (new_vp_index == vcpu->vcpu_idx)
  1223. atomic_dec(&hv->num_mismatched_vp_indexes);
  1224. hv_vcpu->vp_index = new_vp_index;
  1225. break;
  1226. }
  1227. case HV_X64_MSR_VP_ASSIST_PAGE: {
  1228. u64 gfn;
  1229. unsigned long addr;
  1230. if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
  1231. hv_vcpu->hv_vapic = data;
  1232. if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
  1233. return 1;
  1234. break;
  1235. }
  1236. gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
  1237. addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
  1238. if (kvm_is_error_hva(addr))
  1239. return 1;
  1240. /*
  1241. * Clear apic_assist portion of struct hv_vp_assist_page
  1242. * only, there can be valuable data in the rest which needs
  1243. * to be preserved e.g. on migration.
  1244. */
  1245. if (__put_user(0, (u32 __user *)addr))
  1246. return 1;
  1247. hv_vcpu->hv_vapic = data;
  1248. kvm_vcpu_mark_page_dirty(vcpu, gfn);
  1249. if (kvm_lapic_set_pv_eoi(vcpu,
  1250. gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
  1251. sizeof(struct hv_vp_assist_page)))
  1252. return 1;
  1253. break;
  1254. }
  1255. case HV_X64_MSR_EOI:
  1256. return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
  1257. case HV_X64_MSR_ICR:
  1258. return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
  1259. case HV_X64_MSR_TPR:
  1260. return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
  1261. case HV_X64_MSR_VP_RUNTIME:
  1262. if (!host)
  1263. return 1;
  1264. hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
  1265. break;
  1266. case HV_X64_MSR_SCONTROL:
  1267. case HV_X64_MSR_SVERSION:
  1268. case HV_X64_MSR_SIEFP:
  1269. case HV_X64_MSR_SIMP:
  1270. case HV_X64_MSR_EOM:
  1271. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  1272. return synic_set_msr(to_hv_synic(vcpu), msr, data, host);
  1273. case HV_X64_MSR_STIMER0_CONFIG:
  1274. case HV_X64_MSR_STIMER1_CONFIG:
  1275. case HV_X64_MSR_STIMER2_CONFIG:
  1276. case HV_X64_MSR_STIMER3_CONFIG: {
  1277. int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
  1278. return stimer_set_config(to_hv_stimer(vcpu, timer_index),
  1279. data, host);
  1280. }
  1281. case HV_X64_MSR_STIMER0_COUNT:
  1282. case HV_X64_MSR_STIMER1_COUNT:
  1283. case HV_X64_MSR_STIMER2_COUNT:
  1284. case HV_X64_MSR_STIMER3_COUNT: {
  1285. int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
  1286. return stimer_set_count(to_hv_stimer(vcpu, timer_index),
  1287. data, host);
  1288. }
  1289. case HV_X64_MSR_TSC_FREQUENCY:
  1290. case HV_X64_MSR_APIC_FREQUENCY:
  1291. /* read-only, but still ignore it if host-initiated */
  1292. if (!host)
  1293. return 1;
  1294. break;
  1295. default:
  1296. vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
  1297. msr, data);
  1298. return 1;
  1299. }
  1300. return 0;
  1301. }
  1302. static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
  1303. bool host)
  1304. {
  1305. u64 data = 0;
  1306. struct kvm *kvm = vcpu->kvm;
  1307. struct kvm_hv *hv = to_kvm_hv(kvm);
  1308. if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr)))
  1309. return 1;
  1310. switch (msr) {
  1311. case HV_X64_MSR_GUEST_OS_ID:
  1312. data = hv->hv_guest_os_id;
  1313. break;
  1314. case HV_X64_MSR_HYPERCALL:
  1315. data = hv->hv_hypercall;
  1316. break;
  1317. case HV_X64_MSR_TIME_REF_COUNT:
  1318. data = get_time_ref_counter(kvm);
  1319. break;
  1320. case HV_X64_MSR_REFERENCE_TSC:
  1321. data = hv->hv_tsc_page;
  1322. break;
  1323. case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  1324. return kvm_hv_msr_get_crash_data(kvm,
  1325. msr - HV_X64_MSR_CRASH_P0,
  1326. pdata);
  1327. case HV_X64_MSR_CRASH_CTL:
  1328. return kvm_hv_msr_get_crash_ctl(kvm, pdata);
  1329. case HV_X64_MSR_RESET:
  1330. data = 0;
  1331. break;
  1332. case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
  1333. data = hv->hv_reenlightenment_control;
  1334. break;
  1335. case HV_X64_MSR_TSC_EMULATION_CONTROL:
  1336. data = hv->hv_tsc_emulation_control;
  1337. break;
  1338. case HV_X64_MSR_TSC_EMULATION_STATUS:
  1339. data = hv->hv_tsc_emulation_status;
  1340. break;
  1341. case HV_X64_MSR_SYNDBG_OPTIONS:
  1342. case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  1343. return syndbg_get_msr(vcpu, msr, pdata, host);
  1344. default:
  1345. vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
  1346. return 1;
  1347. }
  1348. *pdata = data;
  1349. return 0;
  1350. }
  1351. static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
  1352. bool host)
  1353. {
  1354. u64 data = 0;
  1355. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1356. if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr)))
  1357. return 1;
  1358. switch (msr) {
  1359. case HV_X64_MSR_VP_INDEX:
  1360. data = hv_vcpu->vp_index;
  1361. break;
  1362. case HV_X64_MSR_EOI:
  1363. return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
  1364. case HV_X64_MSR_ICR:
  1365. return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
  1366. case HV_X64_MSR_TPR:
  1367. return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
  1368. case HV_X64_MSR_VP_ASSIST_PAGE:
  1369. data = hv_vcpu->hv_vapic;
  1370. break;
  1371. case HV_X64_MSR_VP_RUNTIME:
  1372. data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
  1373. break;
  1374. case HV_X64_MSR_SCONTROL:
  1375. case HV_X64_MSR_SVERSION:
  1376. case HV_X64_MSR_SIEFP:
  1377. case HV_X64_MSR_SIMP:
  1378. case HV_X64_MSR_EOM:
  1379. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  1380. return synic_get_msr(to_hv_synic(vcpu), msr, pdata, host);
  1381. case HV_X64_MSR_STIMER0_CONFIG:
  1382. case HV_X64_MSR_STIMER1_CONFIG:
  1383. case HV_X64_MSR_STIMER2_CONFIG:
  1384. case HV_X64_MSR_STIMER3_CONFIG: {
  1385. int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
  1386. return stimer_get_config(to_hv_stimer(vcpu, timer_index),
  1387. pdata);
  1388. }
  1389. case HV_X64_MSR_STIMER0_COUNT:
  1390. case HV_X64_MSR_STIMER1_COUNT:
  1391. case HV_X64_MSR_STIMER2_COUNT:
  1392. case HV_X64_MSR_STIMER3_COUNT: {
  1393. int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
  1394. return stimer_get_count(to_hv_stimer(vcpu, timer_index),
  1395. pdata);
  1396. }
  1397. case HV_X64_MSR_TSC_FREQUENCY:
  1398. data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
  1399. break;
  1400. case HV_X64_MSR_APIC_FREQUENCY:
  1401. data = APIC_BUS_FREQUENCY;
  1402. break;
  1403. default:
  1404. vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
  1405. return 1;
  1406. }
  1407. *pdata = data;
  1408. return 0;
  1409. }
  1410. int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
  1411. {
  1412. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  1413. if (!host && !vcpu->arch.hyperv_enabled)
  1414. return 1;
  1415. if (kvm_hv_vcpu_init(vcpu))
  1416. return 1;
  1417. if (kvm_hv_msr_partition_wide(msr)) {
  1418. int r;
  1419. mutex_lock(&hv->hv_lock);
  1420. r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
  1421. mutex_unlock(&hv->hv_lock);
  1422. return r;
  1423. } else
  1424. return kvm_hv_set_msr(vcpu, msr, data, host);
  1425. }
  1426. int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
  1427. {
  1428. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  1429. if (!host && !vcpu->arch.hyperv_enabled)
  1430. return 1;
  1431. if (kvm_hv_vcpu_init(vcpu))
  1432. return 1;
  1433. if (kvm_hv_msr_partition_wide(msr)) {
  1434. int r;
  1435. mutex_lock(&hv->hv_lock);
  1436. r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host);
  1437. mutex_unlock(&hv->hv_lock);
  1438. return r;
  1439. } else
  1440. return kvm_hv_get_msr(vcpu, msr, pdata, host);
  1441. }
  1442. static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
  1443. u64 valid_bank_mask, unsigned long *vcpu_mask)
  1444. {
  1445. struct kvm_hv *hv = to_kvm_hv(kvm);
  1446. bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes);
  1447. u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
  1448. struct kvm_vcpu *vcpu;
  1449. int bank, sbank = 0;
  1450. unsigned long i;
  1451. u64 *bitmap;
  1452. BUILD_BUG_ON(sizeof(vp_bitmap) >
  1453. sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS));
  1454. /*
  1455. * If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly, else
  1456. * fill a temporary buffer and manually test each vCPU's VP index.
  1457. */
  1458. if (likely(!has_mismatch))
  1459. bitmap = (u64 *)vcpu_mask;
  1460. else
  1461. bitmap = vp_bitmap;
  1462. /*
  1463. * Each set of 64 VPs is packed into sparse_banks, with valid_bank_mask
  1464. * having a '1' for each bank that exists in sparse_banks. Sets must
  1465. * be in ascending order, i.e. bank0..bankN.
  1466. */
  1467. memset(bitmap, 0, sizeof(vp_bitmap));
  1468. for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
  1469. KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
  1470. bitmap[bank] = sparse_banks[sbank++];
  1471. if (likely(!has_mismatch))
  1472. return;
  1473. bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
  1474. kvm_for_each_vcpu(i, vcpu, kvm) {
  1475. if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))
  1476. __set_bit(i, vcpu_mask);
  1477. }
  1478. }
  1479. struct kvm_hv_hcall {
  1480. u64 param;
  1481. u64 ingpa;
  1482. u64 outgpa;
  1483. u16 code;
  1484. u16 var_cnt;
  1485. u16 rep_cnt;
  1486. u16 rep_idx;
  1487. bool fast;
  1488. bool rep;
  1489. sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
  1490. };
  1491. static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
  1492. int consumed_xmm_halves,
  1493. u64 *sparse_banks, gpa_t offset)
  1494. {
  1495. u16 var_cnt;
  1496. int i;
  1497. if (hc->var_cnt > 64)
  1498. return -EINVAL;
  1499. /* Ignore banks that cannot possibly contain a legal VP index. */
  1500. var_cnt = min_t(u16, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS);
  1501. if (hc->fast) {
  1502. /*
  1503. * Each XMM holds two sparse banks, but do not count halves that
  1504. * have already been consumed for hypercall parameters.
  1505. */
  1506. if (hc->var_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
  1507. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1508. for (i = 0; i < var_cnt; i++) {
  1509. int j = i + consumed_xmm_halves;
  1510. if (j % 2)
  1511. sparse_banks[i] = sse128_hi(hc->xmm[j / 2]);
  1512. else
  1513. sparse_banks[i] = sse128_lo(hc->xmm[j / 2]);
  1514. }
  1515. return 0;
  1516. }
  1517. return kvm_read_guest(kvm, hc->ingpa + offset, sparse_banks,
  1518. var_cnt * sizeof(*sparse_banks));
  1519. }
  1520. static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
  1521. {
  1522. struct kvm *kvm = vcpu->kvm;
  1523. struct hv_tlb_flush_ex flush_ex;
  1524. struct hv_tlb_flush flush;
  1525. DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
  1526. u64 valid_bank_mask;
  1527. u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
  1528. bool all_cpus;
  1529. /*
  1530. * The Hyper-V TLFS doesn't allow more than 64 sparse banks, e.g. the
  1531. * valid mask is a u64. Fail the build if KVM's max allowed number of
  1532. * vCPUs (>4096) would exceed this limit, KVM will additional changes
  1533. * for Hyper-V support to avoid setting the guest up to fail.
  1534. */
  1535. BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > 64);
  1536. if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST ||
  1537. hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) {
  1538. if (hc->fast) {
  1539. flush.address_space = hc->ingpa;
  1540. flush.flags = hc->outgpa;
  1541. flush.processor_mask = sse128_lo(hc->xmm[0]);
  1542. } else {
  1543. if (unlikely(kvm_read_guest(kvm, hc->ingpa,
  1544. &flush, sizeof(flush))))
  1545. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1546. }
  1547. trace_kvm_hv_flush_tlb(flush.processor_mask,
  1548. flush.address_space, flush.flags);
  1549. valid_bank_mask = BIT_ULL(0);
  1550. sparse_banks[0] = flush.processor_mask;
  1551. /*
  1552. * Work around possible WS2012 bug: it sends hypercalls
  1553. * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
  1554. * while also expecting us to flush something and crashing if
  1555. * we don't. Let's treat processor_mask == 0 same as
  1556. * HV_FLUSH_ALL_PROCESSORS.
  1557. */
  1558. all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
  1559. flush.processor_mask == 0;
  1560. } else {
  1561. if (hc->fast) {
  1562. flush_ex.address_space = hc->ingpa;
  1563. flush_ex.flags = hc->outgpa;
  1564. memcpy(&flush_ex.hv_vp_set,
  1565. &hc->xmm[0], sizeof(hc->xmm[0]));
  1566. } else {
  1567. if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
  1568. sizeof(flush_ex))))
  1569. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1570. }
  1571. trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
  1572. flush_ex.hv_vp_set.format,
  1573. flush_ex.address_space,
  1574. flush_ex.flags);
  1575. valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
  1576. all_cpus = flush_ex.hv_vp_set.format !=
  1577. HV_GENERIC_SET_SPARSE_4K;
  1578. if (hc->var_cnt != hweight64(valid_bank_mask))
  1579. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1580. if (all_cpus)
  1581. goto do_flush;
  1582. if (!hc->var_cnt)
  1583. goto ret_success;
  1584. if (kvm_get_sparse_vp_set(kvm, hc, 2, sparse_banks,
  1585. offsetof(struct hv_tlb_flush_ex,
  1586. hv_vp_set.bank_contents)))
  1587. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1588. }
  1589. do_flush:
  1590. /*
  1591. * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
  1592. * analyze it here, flush TLB regardless of the specified address space.
  1593. */
  1594. if (all_cpus) {
  1595. kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
  1596. } else {
  1597. sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
  1598. kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, vcpu_mask);
  1599. }
  1600. ret_success:
  1601. /* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
  1602. return (u64)HV_STATUS_SUCCESS |
  1603. ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
  1604. }
  1605. static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
  1606. unsigned long *vcpu_bitmap)
  1607. {
  1608. struct kvm_lapic_irq irq = {
  1609. .delivery_mode = APIC_DM_FIXED,
  1610. .vector = vector
  1611. };
  1612. struct kvm_vcpu *vcpu;
  1613. unsigned long i;
  1614. kvm_for_each_vcpu(i, vcpu, kvm) {
  1615. if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
  1616. continue;
  1617. /* We fail only when APIC is disabled */
  1618. kvm_apic_set_irq(vcpu, &irq, NULL);
  1619. }
  1620. }
  1621. static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
  1622. {
  1623. struct kvm *kvm = vcpu->kvm;
  1624. struct hv_send_ipi_ex send_ipi_ex;
  1625. struct hv_send_ipi send_ipi;
  1626. DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
  1627. u64 valid_bank_mask;
  1628. u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
  1629. u32 vector;
  1630. bool all_cpus;
  1631. if (hc->code == HVCALL_SEND_IPI) {
  1632. if (!hc->fast) {
  1633. if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
  1634. sizeof(send_ipi))))
  1635. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1636. sparse_banks[0] = send_ipi.cpu_mask;
  1637. vector = send_ipi.vector;
  1638. } else {
  1639. /* 'reserved' part of hv_send_ipi should be 0 */
  1640. if (unlikely(hc->ingpa >> 32 != 0))
  1641. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1642. sparse_banks[0] = hc->outgpa;
  1643. vector = (u32)hc->ingpa;
  1644. }
  1645. all_cpus = false;
  1646. valid_bank_mask = BIT_ULL(0);
  1647. trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
  1648. } else {
  1649. if (!hc->fast) {
  1650. if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
  1651. sizeof(send_ipi_ex))))
  1652. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1653. } else {
  1654. send_ipi_ex.vector = (u32)hc->ingpa;
  1655. send_ipi_ex.vp_set.format = hc->outgpa;
  1656. send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]);
  1657. }
  1658. trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
  1659. send_ipi_ex.vp_set.format,
  1660. send_ipi_ex.vp_set.valid_bank_mask);
  1661. vector = send_ipi_ex.vector;
  1662. valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
  1663. all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
  1664. if (hc->var_cnt != hweight64(valid_bank_mask))
  1665. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1666. if (all_cpus)
  1667. goto check_and_send_ipi;
  1668. if (!hc->var_cnt)
  1669. goto ret_success;
  1670. if (kvm_get_sparse_vp_set(kvm, hc, 1, sparse_banks,
  1671. offsetof(struct hv_send_ipi_ex,
  1672. vp_set.bank_contents)))
  1673. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1674. }
  1675. check_and_send_ipi:
  1676. if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
  1677. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1678. if (all_cpus) {
  1679. kvm_send_ipi_to_many(kvm, vector, NULL);
  1680. } else {
  1681. sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
  1682. kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
  1683. }
  1684. ret_success:
  1685. return HV_STATUS_SUCCESS;
  1686. }
  1687. void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled)
  1688. {
  1689. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1690. struct kvm_cpuid_entry2 *entry;
  1691. vcpu->arch.hyperv_enabled = hyperv_enabled;
  1692. if (!hv_vcpu) {
  1693. /*
  1694. * KVM should have already allocated kvm_vcpu_hv if Hyper-V is
  1695. * enabled in CPUID.
  1696. */
  1697. WARN_ON_ONCE(vcpu->arch.hyperv_enabled);
  1698. return;
  1699. }
  1700. memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache));
  1701. if (!vcpu->arch.hyperv_enabled)
  1702. return;
  1703. entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
  1704. if (entry) {
  1705. hv_vcpu->cpuid_cache.features_eax = entry->eax;
  1706. hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
  1707. hv_vcpu->cpuid_cache.features_edx = entry->edx;
  1708. }
  1709. entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
  1710. if (entry) {
  1711. hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
  1712. hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
  1713. }
  1714. entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
  1715. if (entry)
  1716. hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
  1717. entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_NESTED_FEATURES);
  1718. if (entry) {
  1719. hv_vcpu->cpuid_cache.nested_eax = entry->eax;
  1720. hv_vcpu->cpuid_cache.nested_ebx = entry->ebx;
  1721. }
  1722. }
  1723. int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce)
  1724. {
  1725. struct kvm_vcpu_hv *hv_vcpu;
  1726. int ret = 0;
  1727. if (!to_hv_vcpu(vcpu)) {
  1728. if (enforce) {
  1729. ret = kvm_hv_vcpu_init(vcpu);
  1730. if (ret)
  1731. return ret;
  1732. } else {
  1733. return 0;
  1734. }
  1735. }
  1736. hv_vcpu = to_hv_vcpu(vcpu);
  1737. hv_vcpu->enforce_cpuid = enforce;
  1738. return ret;
  1739. }
  1740. static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
  1741. {
  1742. bool longmode;
  1743. longmode = is_64_bit_hypercall(vcpu);
  1744. if (longmode)
  1745. kvm_rax_write(vcpu, result);
  1746. else {
  1747. kvm_rdx_write(vcpu, result >> 32);
  1748. kvm_rax_write(vcpu, result & 0xffffffff);
  1749. }
  1750. }
  1751. static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
  1752. {
  1753. trace_kvm_hv_hypercall_done(result);
  1754. kvm_hv_hypercall_set_result(vcpu, result);
  1755. ++vcpu->stat.hypercalls;
  1756. return kvm_skip_emulated_instruction(vcpu);
  1757. }
  1758. static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
  1759. {
  1760. return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
  1761. }
  1762. static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
  1763. {
  1764. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  1765. struct eventfd_ctx *eventfd;
  1766. if (unlikely(!hc->fast)) {
  1767. int ret;
  1768. gpa_t gpa = hc->ingpa;
  1769. if ((gpa & (__alignof__(hc->ingpa) - 1)) ||
  1770. offset_in_page(gpa) + sizeof(hc->ingpa) > PAGE_SIZE)
  1771. return HV_STATUS_INVALID_ALIGNMENT;
  1772. ret = kvm_vcpu_read_guest(vcpu, gpa,
  1773. &hc->ingpa, sizeof(hc->ingpa));
  1774. if (ret < 0)
  1775. return HV_STATUS_INVALID_ALIGNMENT;
  1776. }
  1777. /*
  1778. * Per spec, bits 32-47 contain the extra "flag number". However, we
  1779. * have no use for it, and in all known usecases it is zero, so just
  1780. * report lookup failure if it isn't.
  1781. */
  1782. if (hc->ingpa & 0xffff00000000ULL)
  1783. return HV_STATUS_INVALID_PORT_ID;
  1784. /* remaining bits are reserved-zero */
  1785. if (hc->ingpa & ~KVM_HYPERV_CONN_ID_MASK)
  1786. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1787. /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
  1788. rcu_read_lock();
  1789. eventfd = idr_find(&hv->conn_to_evt, hc->ingpa);
  1790. rcu_read_unlock();
  1791. if (!eventfd)
  1792. return HV_STATUS_INVALID_PORT_ID;
  1793. eventfd_signal(eventfd, 1);
  1794. return HV_STATUS_SUCCESS;
  1795. }
  1796. static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
  1797. {
  1798. switch (hc->code) {
  1799. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
  1800. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
  1801. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
  1802. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
  1803. case HVCALL_SEND_IPI_EX:
  1804. return true;
  1805. }
  1806. return false;
  1807. }
  1808. static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
  1809. {
  1810. int reg;
  1811. kvm_fpu_get();
  1812. for (reg = 0; reg < HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
  1813. _kvm_read_sse_reg(reg, &hc->xmm[reg]);
  1814. kvm_fpu_put();
  1815. }
  1816. static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
  1817. {
  1818. if (!hv_vcpu->enforce_cpuid)
  1819. return true;
  1820. switch (code) {
  1821. case HVCALL_NOTIFY_LONG_SPIN_WAIT:
  1822. return hv_vcpu->cpuid_cache.enlightenments_ebx &&
  1823. hv_vcpu->cpuid_cache.enlightenments_ebx != U32_MAX;
  1824. case HVCALL_POST_MESSAGE:
  1825. return hv_vcpu->cpuid_cache.features_ebx & HV_POST_MESSAGES;
  1826. case HVCALL_SIGNAL_EVENT:
  1827. return hv_vcpu->cpuid_cache.features_ebx & HV_SIGNAL_EVENTS;
  1828. case HVCALL_POST_DEBUG_DATA:
  1829. case HVCALL_RETRIEVE_DEBUG_DATA:
  1830. case HVCALL_RESET_DEBUG_SESSION:
  1831. /*
  1832. * Return 'true' when SynDBG is disabled so the resulting code
  1833. * will be HV_STATUS_INVALID_HYPERCALL_CODE.
  1834. */
  1835. return !kvm_hv_is_syndbg_enabled(hv_vcpu->vcpu) ||
  1836. hv_vcpu->cpuid_cache.features_ebx & HV_DEBUGGING;
  1837. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
  1838. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
  1839. if (!(hv_vcpu->cpuid_cache.enlightenments_eax &
  1840. HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
  1841. return false;
  1842. fallthrough;
  1843. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
  1844. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
  1845. return hv_vcpu->cpuid_cache.enlightenments_eax &
  1846. HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
  1847. case HVCALL_SEND_IPI_EX:
  1848. if (!(hv_vcpu->cpuid_cache.enlightenments_eax &
  1849. HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
  1850. return false;
  1851. fallthrough;
  1852. case HVCALL_SEND_IPI:
  1853. return hv_vcpu->cpuid_cache.enlightenments_eax &
  1854. HV_X64_CLUSTER_IPI_RECOMMENDED;
  1855. default:
  1856. break;
  1857. }
  1858. return true;
  1859. }
  1860. int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
  1861. {
  1862. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1863. struct kvm_hv_hcall hc;
  1864. u64 ret = HV_STATUS_SUCCESS;
  1865. /*
  1866. * hypercall generates UD from non zero cpl and real mode
  1867. * per HYPER-V spec
  1868. */
  1869. if (static_call(kvm_x86_get_cpl)(vcpu) != 0 || !is_protmode(vcpu)) {
  1870. kvm_queue_exception(vcpu, UD_VECTOR);
  1871. return 1;
  1872. }
  1873. #ifdef CONFIG_X86_64
  1874. if (is_64_bit_hypercall(vcpu)) {
  1875. hc.param = kvm_rcx_read(vcpu);
  1876. hc.ingpa = kvm_rdx_read(vcpu);
  1877. hc.outgpa = kvm_r8_read(vcpu);
  1878. } else
  1879. #endif
  1880. {
  1881. hc.param = ((u64)kvm_rdx_read(vcpu) << 32) |
  1882. (kvm_rax_read(vcpu) & 0xffffffff);
  1883. hc.ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
  1884. (kvm_rcx_read(vcpu) & 0xffffffff);
  1885. hc.outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
  1886. (kvm_rsi_read(vcpu) & 0xffffffff);
  1887. }
  1888. hc.code = hc.param & 0xffff;
  1889. hc.var_cnt = (hc.param & HV_HYPERCALL_VARHEAD_MASK) >> HV_HYPERCALL_VARHEAD_OFFSET;
  1890. hc.fast = !!(hc.param & HV_HYPERCALL_FAST_BIT);
  1891. hc.rep_cnt = (hc.param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
  1892. hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
  1893. hc.rep = !!(hc.rep_cnt || hc.rep_idx);
  1894. trace_kvm_hv_hypercall(hc.code, hc.fast, hc.var_cnt, hc.rep_cnt,
  1895. hc.rep_idx, hc.ingpa, hc.outgpa);
  1896. if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
  1897. ret = HV_STATUS_ACCESS_DENIED;
  1898. goto hypercall_complete;
  1899. }
  1900. if (unlikely(hc.param & HV_HYPERCALL_RSVD_MASK)) {
  1901. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1902. goto hypercall_complete;
  1903. }
  1904. if (hc.fast && is_xmm_fast_hypercall(&hc)) {
  1905. if (unlikely(hv_vcpu->enforce_cpuid &&
  1906. !(hv_vcpu->cpuid_cache.features_edx &
  1907. HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
  1908. kvm_queue_exception(vcpu, UD_VECTOR);
  1909. return 1;
  1910. }
  1911. kvm_hv_hypercall_read_xmm(&hc);
  1912. }
  1913. switch (hc.code) {
  1914. case HVCALL_NOTIFY_LONG_SPIN_WAIT:
  1915. if (unlikely(hc.rep || hc.var_cnt)) {
  1916. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1917. break;
  1918. }
  1919. kvm_vcpu_on_spin(vcpu, true);
  1920. break;
  1921. case HVCALL_SIGNAL_EVENT:
  1922. if (unlikely(hc.rep || hc.var_cnt)) {
  1923. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1924. break;
  1925. }
  1926. ret = kvm_hvcall_signal_event(vcpu, &hc);
  1927. if (ret != HV_STATUS_INVALID_PORT_ID)
  1928. break;
  1929. fallthrough; /* maybe userspace knows this conn_id */
  1930. case HVCALL_POST_MESSAGE:
  1931. /* don't bother userspace if it has no way to handle it */
  1932. if (unlikely(hc.rep || hc.var_cnt || !to_hv_synic(vcpu)->active)) {
  1933. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1934. break;
  1935. }
  1936. vcpu->run->exit_reason = KVM_EXIT_HYPERV;
  1937. vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
  1938. vcpu->run->hyperv.u.hcall.input = hc.param;
  1939. vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
  1940. vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
  1941. vcpu->arch.complete_userspace_io =
  1942. kvm_hv_hypercall_complete_userspace;
  1943. return 0;
  1944. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
  1945. if (unlikely(hc.var_cnt)) {
  1946. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1947. break;
  1948. }
  1949. fallthrough;
  1950. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
  1951. if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
  1952. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1953. break;
  1954. }
  1955. ret = kvm_hv_flush_tlb(vcpu, &hc);
  1956. break;
  1957. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
  1958. if (unlikely(hc.var_cnt)) {
  1959. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1960. break;
  1961. }
  1962. fallthrough;
  1963. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
  1964. if (unlikely(hc.rep)) {
  1965. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1966. break;
  1967. }
  1968. ret = kvm_hv_flush_tlb(vcpu, &hc);
  1969. break;
  1970. case HVCALL_SEND_IPI:
  1971. if (unlikely(hc.var_cnt)) {
  1972. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1973. break;
  1974. }
  1975. fallthrough;
  1976. case HVCALL_SEND_IPI_EX:
  1977. if (unlikely(hc.rep)) {
  1978. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  1979. break;
  1980. }
  1981. ret = kvm_hv_send_ipi(vcpu, &hc);
  1982. break;
  1983. case HVCALL_POST_DEBUG_DATA:
  1984. case HVCALL_RETRIEVE_DEBUG_DATA:
  1985. if (unlikely(hc.fast)) {
  1986. ret = HV_STATUS_INVALID_PARAMETER;
  1987. break;
  1988. }
  1989. fallthrough;
  1990. case HVCALL_RESET_DEBUG_SESSION: {
  1991. struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
  1992. if (!kvm_hv_is_syndbg_enabled(vcpu)) {
  1993. ret = HV_STATUS_INVALID_HYPERCALL_CODE;
  1994. break;
  1995. }
  1996. if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) {
  1997. ret = HV_STATUS_OPERATION_DENIED;
  1998. break;
  1999. }
  2000. vcpu->run->exit_reason = KVM_EXIT_HYPERV;
  2001. vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
  2002. vcpu->run->hyperv.u.hcall.input = hc.param;
  2003. vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
  2004. vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
  2005. vcpu->arch.complete_userspace_io =
  2006. kvm_hv_hypercall_complete_userspace;
  2007. return 0;
  2008. }
  2009. default:
  2010. ret = HV_STATUS_INVALID_HYPERCALL_CODE;
  2011. break;
  2012. }
  2013. hypercall_complete:
  2014. return kvm_hv_hypercall_complete(vcpu, ret);
  2015. }
  2016. void kvm_hv_init_vm(struct kvm *kvm)
  2017. {
  2018. struct kvm_hv *hv = to_kvm_hv(kvm);
  2019. mutex_init(&hv->hv_lock);
  2020. idr_init(&hv->conn_to_evt);
  2021. }
  2022. void kvm_hv_destroy_vm(struct kvm *kvm)
  2023. {
  2024. struct kvm_hv *hv = to_kvm_hv(kvm);
  2025. struct eventfd_ctx *eventfd;
  2026. int i;
  2027. idr_for_each_entry(&hv->conn_to_evt, eventfd, i)
  2028. eventfd_ctx_put(eventfd);
  2029. idr_destroy(&hv->conn_to_evt);
  2030. }
  2031. static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
  2032. {
  2033. struct kvm_hv *hv = to_kvm_hv(kvm);
  2034. struct eventfd_ctx *eventfd;
  2035. int ret;
  2036. eventfd = eventfd_ctx_fdget(fd);
  2037. if (IS_ERR(eventfd))
  2038. return PTR_ERR(eventfd);
  2039. mutex_lock(&hv->hv_lock);
  2040. ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
  2041. GFP_KERNEL_ACCOUNT);
  2042. mutex_unlock(&hv->hv_lock);
  2043. if (ret >= 0)
  2044. return 0;
  2045. if (ret == -ENOSPC)
  2046. ret = -EEXIST;
  2047. eventfd_ctx_put(eventfd);
  2048. return ret;
  2049. }
  2050. static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
  2051. {
  2052. struct kvm_hv *hv = to_kvm_hv(kvm);
  2053. struct eventfd_ctx *eventfd;
  2054. mutex_lock(&hv->hv_lock);
  2055. eventfd = idr_remove(&hv->conn_to_evt, conn_id);
  2056. mutex_unlock(&hv->hv_lock);
  2057. if (!eventfd)
  2058. return -ENOENT;
  2059. synchronize_srcu(&kvm->srcu);
  2060. eventfd_ctx_put(eventfd);
  2061. return 0;
  2062. }
  2063. int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
  2064. {
  2065. if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
  2066. (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
  2067. return -EINVAL;
  2068. if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
  2069. return kvm_hv_eventfd_deassign(kvm, args->conn_id);
  2070. return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
  2071. }
  2072. int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
  2073. struct kvm_cpuid_entry2 __user *entries)
  2074. {
  2075. uint16_t evmcs_ver = 0;
  2076. struct kvm_cpuid_entry2 cpuid_entries[] = {
  2077. { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS },
  2078. { .function = HYPERV_CPUID_INTERFACE },
  2079. { .function = HYPERV_CPUID_VERSION },
  2080. { .function = HYPERV_CPUID_FEATURES },
  2081. { .function = HYPERV_CPUID_ENLIGHTMENT_INFO },
  2082. { .function = HYPERV_CPUID_IMPLEMENT_LIMITS },
  2083. { .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS },
  2084. { .function = HYPERV_CPUID_SYNDBG_INTERFACE },
  2085. { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES },
  2086. { .function = HYPERV_CPUID_NESTED_FEATURES },
  2087. };
  2088. int i, nent = ARRAY_SIZE(cpuid_entries);
  2089. if (kvm_x86_ops.nested_ops->get_evmcs_version)
  2090. evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu);
  2091. if (cpuid->nent < nent)
  2092. return -E2BIG;
  2093. if (cpuid->nent > nent)
  2094. cpuid->nent = nent;
  2095. for (i = 0; i < nent; i++) {
  2096. struct kvm_cpuid_entry2 *ent = &cpuid_entries[i];
  2097. u32 signature[3];
  2098. switch (ent->function) {
  2099. case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS:
  2100. memcpy(signature, "Linux KVM Hv", 12);
  2101. ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES;
  2102. ent->ebx = signature[0];
  2103. ent->ecx = signature[1];
  2104. ent->edx = signature[2];
  2105. break;
  2106. case HYPERV_CPUID_INTERFACE:
  2107. ent->eax = HYPERV_CPUID_SIGNATURE_EAX;
  2108. break;
  2109. case HYPERV_CPUID_VERSION:
  2110. /*
  2111. * We implement some Hyper-V 2016 functions so let's use
  2112. * this version.
  2113. */
  2114. ent->eax = 0x00003839;
  2115. ent->ebx = 0x000A0000;
  2116. break;
  2117. case HYPERV_CPUID_FEATURES:
  2118. ent->eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
  2119. ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
  2120. ent->eax |= HV_MSR_SYNIC_AVAILABLE;
  2121. ent->eax |= HV_MSR_SYNTIMER_AVAILABLE;
  2122. ent->eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
  2123. ent->eax |= HV_MSR_HYPERCALL_AVAILABLE;
  2124. ent->eax |= HV_MSR_VP_INDEX_AVAILABLE;
  2125. ent->eax |= HV_MSR_RESET_AVAILABLE;
  2126. ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
  2127. ent->eax |= HV_ACCESS_FREQUENCY_MSRS;
  2128. ent->eax |= HV_ACCESS_REENLIGHTENMENT;
  2129. ent->ebx |= HV_POST_MESSAGES;
  2130. ent->ebx |= HV_SIGNAL_EVENTS;
  2131. ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
  2132. ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
  2133. ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
  2134. ent->ebx |= HV_DEBUGGING;
  2135. ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE;
  2136. ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
  2137. /*
  2138. * Direct Synthetic timers only make sense with in-kernel
  2139. * LAPIC
  2140. */
  2141. if (!vcpu || lapic_in_kernel(vcpu))
  2142. ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
  2143. break;
  2144. case HYPERV_CPUID_ENLIGHTMENT_INFO:
  2145. ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
  2146. ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
  2147. ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
  2148. ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
  2149. ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
  2150. if (evmcs_ver)
  2151. ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
  2152. if (!cpu_smt_possible())
  2153. ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
  2154. ent->eax |= HV_DEPRECATING_AEOI_RECOMMENDED;
  2155. /*
  2156. * Default number of spinlock retry attempts, matches
  2157. * HyperV 2016.
  2158. */
  2159. ent->ebx = 0x00000FFF;
  2160. break;
  2161. case HYPERV_CPUID_IMPLEMENT_LIMITS:
  2162. /* Maximum number of virtual processors */
  2163. ent->eax = KVM_MAX_VCPUS;
  2164. /*
  2165. * Maximum number of logical processors, matches
  2166. * HyperV 2016.
  2167. */
  2168. ent->ebx = 64;
  2169. break;
  2170. case HYPERV_CPUID_NESTED_FEATURES:
  2171. ent->eax = evmcs_ver;
  2172. ent->eax |= HV_X64_NESTED_MSR_BITMAP;
  2173. ent->ebx |= HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL;
  2174. break;
  2175. case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
  2176. memcpy(signature, "Linux KVM Hv", 12);
  2177. ent->eax = 0;
  2178. ent->ebx = signature[0];
  2179. ent->ecx = signature[1];
  2180. ent->edx = signature[2];
  2181. break;
  2182. case HYPERV_CPUID_SYNDBG_INTERFACE:
  2183. memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12);
  2184. ent->eax = signature[0];
  2185. break;
  2186. case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES:
  2187. ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
  2188. break;
  2189. default:
  2190. break;
  2191. }
  2192. }
  2193. if (copy_to_user(entries, cpuid_entries,
  2194. nent * sizeof(struct kvm_cpuid_entry2)))
  2195. return -EFAULT;
  2196. return 0;
  2197. }