vmbus_drv.c 74 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * Authors:
  6. * Haiyang Zhang <[email protected]>
  7. * Hank Janssen <[email protected]>
  8. * K. Y. Srinivasan <[email protected]>
  9. */
  10. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11. #include <linux/init.h>
  12. #include <linux/module.h>
  13. #include <linux/device.h>
  14. #include <linux/interrupt.h>
  15. #include <linux/sysctl.h>
  16. #include <linux/slab.h>
  17. #include <linux/acpi.h>
  18. #include <linux/completion.h>
  19. #include <linux/hyperv.h>
  20. #include <linux/kernel_stat.h>
  21. #include <linux/clockchips.h>
  22. #include <linux/cpu.h>
  23. #include <linux/sched/isolation.h>
  24. #include <linux/sched/task_stack.h>
  25. #include <linux/delay.h>
  26. #include <linux/notifier.h>
  27. #include <linux/panic_notifier.h>
  28. #include <linux/ptrace.h>
  29. #include <linux/screen_info.h>
  30. #include <linux/kdebug.h>
  31. #include <linux/efi.h>
  32. #include <linux/random.h>
  33. #include <linux/kernel.h>
  34. #include <linux/syscore_ops.h>
  35. #include <linux/dma-map-ops.h>
  36. #include <linux/pci.h>
  37. #include <clocksource/hyperv_timer.h>
  38. #include "hyperv_vmbus.h"
  39. struct vmbus_dynid {
  40. struct list_head node;
  41. struct hv_vmbus_device_id id;
  42. };
  43. static struct acpi_device *hv_acpi_dev;
  44. static int hyperv_cpuhp_online;
  45. static void *hv_panic_page;
  46. static long __percpu *vmbus_evt;
  47. /* Values parsed from ACPI DSDT */
  48. int vmbus_irq;
  49. int vmbus_interrupt;
  50. /*
  51. * Boolean to control whether to report panic messages over Hyper-V.
  52. *
  53. * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
  54. */
  55. static int sysctl_record_panic_msg = 1;
  56. static int hyperv_report_reg(void)
  57. {
  58. return !sysctl_record_panic_msg || !hv_panic_page;
  59. }
  60. static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
  61. void *args)
  62. {
  63. struct pt_regs *regs;
  64. vmbus_initiate_unload(true);
  65. /*
  66. * Hyper-V should be notified only once about a panic. If we will be
  67. * doing hv_kmsg_dump() with kmsg data later, don't do the notification
  68. * here.
  69. */
  70. if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE
  71. && hyperv_report_reg()) {
  72. regs = current_pt_regs();
  73. hyperv_report_panic(regs, val, false);
  74. }
  75. return NOTIFY_DONE;
  76. }
  77. static int hyperv_die_event(struct notifier_block *nb, unsigned long val,
  78. void *args)
  79. {
  80. struct die_args *die = args;
  81. struct pt_regs *regs = die->regs;
  82. /* Don't notify Hyper-V if the die event is other than oops */
  83. if (val != DIE_OOPS)
  84. return NOTIFY_DONE;
  85. /*
  86. * Hyper-V should be notified only once about a panic. If we will be
  87. * doing hv_kmsg_dump() with kmsg data later, don't do the notification
  88. * here.
  89. */
  90. if (hyperv_report_reg())
  91. hyperv_report_panic(regs, val, true);
  92. return NOTIFY_DONE;
  93. }
  94. static struct notifier_block hyperv_die_block = {
  95. .notifier_call = hyperv_die_event,
  96. };
  97. static struct notifier_block hyperv_panic_block = {
  98. .notifier_call = hyperv_panic_event,
  99. };
  100. static const char *fb_mmio_name = "fb_range";
  101. static struct resource *fb_mmio;
  102. static struct resource *hyperv_mmio;
  103. static DEFINE_MUTEX(hyperv_mmio_lock);
  104. static int vmbus_exists(void)
  105. {
  106. if (hv_acpi_dev == NULL)
  107. return -ENODEV;
  108. return 0;
  109. }
  110. static u8 channel_monitor_group(const struct vmbus_channel *channel)
  111. {
  112. return (u8)channel->offermsg.monitorid / 32;
  113. }
  114. static u8 channel_monitor_offset(const struct vmbus_channel *channel)
  115. {
  116. return (u8)channel->offermsg.monitorid % 32;
  117. }
  118. static u32 channel_pending(const struct vmbus_channel *channel,
  119. const struct hv_monitor_page *monitor_page)
  120. {
  121. u8 monitor_group = channel_monitor_group(channel);
  122. return monitor_page->trigger_group[monitor_group].pending;
  123. }
  124. static u32 channel_latency(const struct vmbus_channel *channel,
  125. const struct hv_monitor_page *monitor_page)
  126. {
  127. u8 monitor_group = channel_monitor_group(channel);
  128. u8 monitor_offset = channel_monitor_offset(channel);
  129. return monitor_page->latency[monitor_group][monitor_offset];
  130. }
  131. static u32 channel_conn_id(struct vmbus_channel *channel,
  132. struct hv_monitor_page *monitor_page)
  133. {
  134. u8 monitor_group = channel_monitor_group(channel);
  135. u8 monitor_offset = channel_monitor_offset(channel);
  136. return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id;
  137. }
  138. static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
  139. char *buf)
  140. {
  141. struct hv_device *hv_dev = device_to_hv_device(dev);
  142. if (!hv_dev->channel)
  143. return -ENODEV;
  144. return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
  145. }
  146. static DEVICE_ATTR_RO(id);
  147. static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
  148. char *buf)
  149. {
  150. struct hv_device *hv_dev = device_to_hv_device(dev);
  151. if (!hv_dev->channel)
  152. return -ENODEV;
  153. return sprintf(buf, "%d\n", hv_dev->channel->state);
  154. }
  155. static DEVICE_ATTR_RO(state);
  156. static ssize_t monitor_id_show(struct device *dev,
  157. struct device_attribute *dev_attr, char *buf)
  158. {
  159. struct hv_device *hv_dev = device_to_hv_device(dev);
  160. if (!hv_dev->channel)
  161. return -ENODEV;
  162. return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
  163. }
  164. static DEVICE_ATTR_RO(monitor_id);
  165. static ssize_t class_id_show(struct device *dev,
  166. struct device_attribute *dev_attr, char *buf)
  167. {
  168. struct hv_device *hv_dev = device_to_hv_device(dev);
  169. if (!hv_dev->channel)
  170. return -ENODEV;
  171. return sprintf(buf, "{%pUl}\n",
  172. &hv_dev->channel->offermsg.offer.if_type);
  173. }
  174. static DEVICE_ATTR_RO(class_id);
  175. static ssize_t device_id_show(struct device *dev,
  176. struct device_attribute *dev_attr, char *buf)
  177. {
  178. struct hv_device *hv_dev = device_to_hv_device(dev);
  179. if (!hv_dev->channel)
  180. return -ENODEV;
  181. return sprintf(buf, "{%pUl}\n",
  182. &hv_dev->channel->offermsg.offer.if_instance);
  183. }
  184. static DEVICE_ATTR_RO(device_id);
  185. static ssize_t modalias_show(struct device *dev,
  186. struct device_attribute *dev_attr, char *buf)
  187. {
  188. struct hv_device *hv_dev = device_to_hv_device(dev);
  189. return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
  190. }
  191. static DEVICE_ATTR_RO(modalias);
  192. #ifdef CONFIG_NUMA
  193. static ssize_t numa_node_show(struct device *dev,
  194. struct device_attribute *attr, char *buf)
  195. {
  196. struct hv_device *hv_dev = device_to_hv_device(dev);
  197. if (!hv_dev->channel)
  198. return -ENODEV;
  199. return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
  200. }
  201. static DEVICE_ATTR_RO(numa_node);
  202. #endif
  203. static ssize_t server_monitor_pending_show(struct device *dev,
  204. struct device_attribute *dev_attr,
  205. char *buf)
  206. {
  207. struct hv_device *hv_dev = device_to_hv_device(dev);
  208. if (!hv_dev->channel)
  209. return -ENODEV;
  210. return sprintf(buf, "%d\n",
  211. channel_pending(hv_dev->channel,
  212. vmbus_connection.monitor_pages[0]));
  213. }
  214. static DEVICE_ATTR_RO(server_monitor_pending);
  215. static ssize_t client_monitor_pending_show(struct device *dev,
  216. struct device_attribute *dev_attr,
  217. char *buf)
  218. {
  219. struct hv_device *hv_dev = device_to_hv_device(dev);
  220. if (!hv_dev->channel)
  221. return -ENODEV;
  222. return sprintf(buf, "%d\n",
  223. channel_pending(hv_dev->channel,
  224. vmbus_connection.monitor_pages[1]));
  225. }
  226. static DEVICE_ATTR_RO(client_monitor_pending);
  227. static ssize_t server_monitor_latency_show(struct device *dev,
  228. struct device_attribute *dev_attr,
  229. char *buf)
  230. {
  231. struct hv_device *hv_dev = device_to_hv_device(dev);
  232. if (!hv_dev->channel)
  233. return -ENODEV;
  234. return sprintf(buf, "%d\n",
  235. channel_latency(hv_dev->channel,
  236. vmbus_connection.monitor_pages[0]));
  237. }
  238. static DEVICE_ATTR_RO(server_monitor_latency);
  239. static ssize_t client_monitor_latency_show(struct device *dev,
  240. struct device_attribute *dev_attr,
  241. char *buf)
  242. {
  243. struct hv_device *hv_dev = device_to_hv_device(dev);
  244. if (!hv_dev->channel)
  245. return -ENODEV;
  246. return sprintf(buf, "%d\n",
  247. channel_latency(hv_dev->channel,
  248. vmbus_connection.monitor_pages[1]));
  249. }
  250. static DEVICE_ATTR_RO(client_monitor_latency);
  251. static ssize_t server_monitor_conn_id_show(struct device *dev,
  252. struct device_attribute *dev_attr,
  253. char *buf)
  254. {
  255. struct hv_device *hv_dev = device_to_hv_device(dev);
  256. if (!hv_dev->channel)
  257. return -ENODEV;
  258. return sprintf(buf, "%d\n",
  259. channel_conn_id(hv_dev->channel,
  260. vmbus_connection.monitor_pages[0]));
  261. }
  262. static DEVICE_ATTR_RO(server_monitor_conn_id);
  263. static ssize_t client_monitor_conn_id_show(struct device *dev,
  264. struct device_attribute *dev_attr,
  265. char *buf)
  266. {
  267. struct hv_device *hv_dev = device_to_hv_device(dev);
  268. if (!hv_dev->channel)
  269. return -ENODEV;
  270. return sprintf(buf, "%d\n",
  271. channel_conn_id(hv_dev->channel,
  272. vmbus_connection.monitor_pages[1]));
  273. }
  274. static DEVICE_ATTR_RO(client_monitor_conn_id);
  275. static ssize_t out_intr_mask_show(struct device *dev,
  276. struct device_attribute *dev_attr, char *buf)
  277. {
  278. struct hv_device *hv_dev = device_to_hv_device(dev);
  279. struct hv_ring_buffer_debug_info outbound;
  280. int ret;
  281. if (!hv_dev->channel)
  282. return -ENODEV;
  283. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
  284. &outbound);
  285. if (ret < 0)
  286. return ret;
  287. return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
  288. }
  289. static DEVICE_ATTR_RO(out_intr_mask);
  290. static ssize_t out_read_index_show(struct device *dev,
  291. struct device_attribute *dev_attr, char *buf)
  292. {
  293. struct hv_device *hv_dev = device_to_hv_device(dev);
  294. struct hv_ring_buffer_debug_info outbound;
  295. int ret;
  296. if (!hv_dev->channel)
  297. return -ENODEV;
  298. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
  299. &outbound);
  300. if (ret < 0)
  301. return ret;
  302. return sprintf(buf, "%d\n", outbound.current_read_index);
  303. }
  304. static DEVICE_ATTR_RO(out_read_index);
  305. static ssize_t out_write_index_show(struct device *dev,
  306. struct device_attribute *dev_attr,
  307. char *buf)
  308. {
  309. struct hv_device *hv_dev = device_to_hv_device(dev);
  310. struct hv_ring_buffer_debug_info outbound;
  311. int ret;
  312. if (!hv_dev->channel)
  313. return -ENODEV;
  314. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
  315. &outbound);
  316. if (ret < 0)
  317. return ret;
  318. return sprintf(buf, "%d\n", outbound.current_write_index);
  319. }
  320. static DEVICE_ATTR_RO(out_write_index);
  321. static ssize_t out_read_bytes_avail_show(struct device *dev,
  322. struct device_attribute *dev_attr,
  323. char *buf)
  324. {
  325. struct hv_device *hv_dev = device_to_hv_device(dev);
  326. struct hv_ring_buffer_debug_info outbound;
  327. int ret;
  328. if (!hv_dev->channel)
  329. return -ENODEV;
  330. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
  331. &outbound);
  332. if (ret < 0)
  333. return ret;
  334. return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
  335. }
  336. static DEVICE_ATTR_RO(out_read_bytes_avail);
  337. static ssize_t out_write_bytes_avail_show(struct device *dev,
  338. struct device_attribute *dev_attr,
  339. char *buf)
  340. {
  341. struct hv_device *hv_dev = device_to_hv_device(dev);
  342. struct hv_ring_buffer_debug_info outbound;
  343. int ret;
  344. if (!hv_dev->channel)
  345. return -ENODEV;
  346. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
  347. &outbound);
  348. if (ret < 0)
  349. return ret;
  350. return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
  351. }
  352. static DEVICE_ATTR_RO(out_write_bytes_avail);
  353. static ssize_t in_intr_mask_show(struct device *dev,
  354. struct device_attribute *dev_attr, char *buf)
  355. {
  356. struct hv_device *hv_dev = device_to_hv_device(dev);
  357. struct hv_ring_buffer_debug_info inbound;
  358. int ret;
  359. if (!hv_dev->channel)
  360. return -ENODEV;
  361. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
  362. if (ret < 0)
  363. return ret;
  364. return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
  365. }
  366. static DEVICE_ATTR_RO(in_intr_mask);
  367. static ssize_t in_read_index_show(struct device *dev,
  368. struct device_attribute *dev_attr, char *buf)
  369. {
  370. struct hv_device *hv_dev = device_to_hv_device(dev);
  371. struct hv_ring_buffer_debug_info inbound;
  372. int ret;
  373. if (!hv_dev->channel)
  374. return -ENODEV;
  375. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
  376. if (ret < 0)
  377. return ret;
  378. return sprintf(buf, "%d\n", inbound.current_read_index);
  379. }
  380. static DEVICE_ATTR_RO(in_read_index);
  381. static ssize_t in_write_index_show(struct device *dev,
  382. struct device_attribute *dev_attr, char *buf)
  383. {
  384. struct hv_device *hv_dev = device_to_hv_device(dev);
  385. struct hv_ring_buffer_debug_info inbound;
  386. int ret;
  387. if (!hv_dev->channel)
  388. return -ENODEV;
  389. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
  390. if (ret < 0)
  391. return ret;
  392. return sprintf(buf, "%d\n", inbound.current_write_index);
  393. }
  394. static DEVICE_ATTR_RO(in_write_index);
  395. static ssize_t in_read_bytes_avail_show(struct device *dev,
  396. struct device_attribute *dev_attr,
  397. char *buf)
  398. {
  399. struct hv_device *hv_dev = device_to_hv_device(dev);
  400. struct hv_ring_buffer_debug_info inbound;
  401. int ret;
  402. if (!hv_dev->channel)
  403. return -ENODEV;
  404. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
  405. if (ret < 0)
  406. return ret;
  407. return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
  408. }
  409. static DEVICE_ATTR_RO(in_read_bytes_avail);
  410. static ssize_t in_write_bytes_avail_show(struct device *dev,
  411. struct device_attribute *dev_attr,
  412. char *buf)
  413. {
  414. struct hv_device *hv_dev = device_to_hv_device(dev);
  415. struct hv_ring_buffer_debug_info inbound;
  416. int ret;
  417. if (!hv_dev->channel)
  418. return -ENODEV;
  419. ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
  420. if (ret < 0)
  421. return ret;
  422. return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
  423. }
  424. static DEVICE_ATTR_RO(in_write_bytes_avail);
  425. static ssize_t channel_vp_mapping_show(struct device *dev,
  426. struct device_attribute *dev_attr,
  427. char *buf)
  428. {
  429. struct hv_device *hv_dev = device_to_hv_device(dev);
  430. struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
  431. int buf_size = PAGE_SIZE, n_written, tot_written;
  432. struct list_head *cur;
  433. if (!channel)
  434. return -ENODEV;
  435. mutex_lock(&vmbus_connection.channel_mutex);
  436. tot_written = snprintf(buf, buf_size, "%u:%u\n",
  437. channel->offermsg.child_relid, channel->target_cpu);
  438. list_for_each(cur, &channel->sc_list) {
  439. if (tot_written >= buf_size - 1)
  440. break;
  441. cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
  442. n_written = scnprintf(buf + tot_written,
  443. buf_size - tot_written,
  444. "%u:%u\n",
  445. cur_sc->offermsg.child_relid,
  446. cur_sc->target_cpu);
  447. tot_written += n_written;
  448. }
  449. mutex_unlock(&vmbus_connection.channel_mutex);
  450. return tot_written;
  451. }
  452. static DEVICE_ATTR_RO(channel_vp_mapping);
  453. static ssize_t vendor_show(struct device *dev,
  454. struct device_attribute *dev_attr,
  455. char *buf)
  456. {
  457. struct hv_device *hv_dev = device_to_hv_device(dev);
  458. return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
  459. }
  460. static DEVICE_ATTR_RO(vendor);
  461. static ssize_t device_show(struct device *dev,
  462. struct device_attribute *dev_attr,
  463. char *buf)
  464. {
  465. struct hv_device *hv_dev = device_to_hv_device(dev);
  466. return sprintf(buf, "0x%x\n", hv_dev->device_id);
  467. }
  468. static DEVICE_ATTR_RO(device);
  469. static ssize_t driver_override_store(struct device *dev,
  470. struct device_attribute *attr,
  471. const char *buf, size_t count)
  472. {
  473. struct hv_device *hv_dev = device_to_hv_device(dev);
  474. int ret;
  475. ret = driver_set_override(dev, &hv_dev->driver_override, buf, count);
  476. if (ret)
  477. return ret;
  478. return count;
  479. }
  480. static ssize_t driver_override_show(struct device *dev,
  481. struct device_attribute *attr, char *buf)
  482. {
  483. struct hv_device *hv_dev = device_to_hv_device(dev);
  484. ssize_t len;
  485. device_lock(dev);
  486. len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
  487. device_unlock(dev);
  488. return len;
  489. }
  490. static DEVICE_ATTR_RW(driver_override);
  491. /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
  492. static struct attribute *vmbus_dev_attrs[] = {
  493. &dev_attr_id.attr,
  494. &dev_attr_state.attr,
  495. &dev_attr_monitor_id.attr,
  496. &dev_attr_class_id.attr,
  497. &dev_attr_device_id.attr,
  498. &dev_attr_modalias.attr,
  499. #ifdef CONFIG_NUMA
  500. &dev_attr_numa_node.attr,
  501. #endif
  502. &dev_attr_server_monitor_pending.attr,
  503. &dev_attr_client_monitor_pending.attr,
  504. &dev_attr_server_monitor_latency.attr,
  505. &dev_attr_client_monitor_latency.attr,
  506. &dev_attr_server_monitor_conn_id.attr,
  507. &dev_attr_client_monitor_conn_id.attr,
  508. &dev_attr_out_intr_mask.attr,
  509. &dev_attr_out_read_index.attr,
  510. &dev_attr_out_write_index.attr,
  511. &dev_attr_out_read_bytes_avail.attr,
  512. &dev_attr_out_write_bytes_avail.attr,
  513. &dev_attr_in_intr_mask.attr,
  514. &dev_attr_in_read_index.attr,
  515. &dev_attr_in_write_index.attr,
  516. &dev_attr_in_read_bytes_avail.attr,
  517. &dev_attr_in_write_bytes_avail.attr,
  518. &dev_attr_channel_vp_mapping.attr,
  519. &dev_attr_vendor.attr,
  520. &dev_attr_device.attr,
  521. &dev_attr_driver_override.attr,
  522. NULL,
  523. };
  524. /*
  525. * Device-level attribute_group callback function. Returns the permission for
  526. * each attribute, and returns 0 if an attribute is not visible.
  527. */
  528. static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj,
  529. struct attribute *attr, int idx)
  530. {
  531. struct device *dev = kobj_to_dev(kobj);
  532. const struct hv_device *hv_dev = device_to_hv_device(dev);
  533. /* Hide the monitor attributes if the monitor mechanism is not used. */
  534. if (!hv_dev->channel->offermsg.monitor_allocated &&
  535. (attr == &dev_attr_monitor_id.attr ||
  536. attr == &dev_attr_server_monitor_pending.attr ||
  537. attr == &dev_attr_client_monitor_pending.attr ||
  538. attr == &dev_attr_server_monitor_latency.attr ||
  539. attr == &dev_attr_client_monitor_latency.attr ||
  540. attr == &dev_attr_server_monitor_conn_id.attr ||
  541. attr == &dev_attr_client_monitor_conn_id.attr))
  542. return 0;
  543. return attr->mode;
  544. }
  545. static const struct attribute_group vmbus_dev_group = {
  546. .attrs = vmbus_dev_attrs,
  547. .is_visible = vmbus_dev_attr_is_visible
  548. };
  549. __ATTRIBUTE_GROUPS(vmbus_dev);
  550. /* Set up the attribute for /sys/bus/vmbus/hibernation */
  551. static ssize_t hibernation_show(struct bus_type *bus, char *buf)
  552. {
  553. return sprintf(buf, "%d\n", !!hv_is_hibernation_supported());
  554. }
  555. static BUS_ATTR_RO(hibernation);
  556. static struct attribute *vmbus_bus_attrs[] = {
  557. &bus_attr_hibernation.attr,
  558. NULL,
  559. };
  560. static const struct attribute_group vmbus_bus_group = {
  561. .attrs = vmbus_bus_attrs,
  562. };
  563. __ATTRIBUTE_GROUPS(vmbus_bus);
  564. /*
  565. * vmbus_uevent - add uevent for our device
  566. *
  567. * This routine is invoked when a device is added or removed on the vmbus to
  568. * generate a uevent to udev in the userspace. The udev will then look at its
  569. * rule and the uevent generated here to load the appropriate driver
  570. *
  571. * The alias string will be of the form vmbus:guid where guid is the string
  572. * representation of the device guid (each byte of the guid will be
  573. * represented with two hex characters.
  574. */
  575. static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
  576. {
  577. struct hv_device *dev = device_to_hv_device(device);
  578. const char *format = "MODALIAS=vmbus:%*phN";
  579. return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type);
  580. }
  581. static const struct hv_vmbus_device_id *
  582. hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid)
  583. {
  584. if (id == NULL)
  585. return NULL; /* empty device table */
  586. for (; !guid_is_null(&id->guid); id++)
  587. if (guid_equal(&id->guid, guid))
  588. return id;
  589. return NULL;
  590. }
  591. static const struct hv_vmbus_device_id *
  592. hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid)
  593. {
  594. const struct hv_vmbus_device_id *id = NULL;
  595. struct vmbus_dynid *dynid;
  596. spin_lock(&drv->dynids.lock);
  597. list_for_each_entry(dynid, &drv->dynids.list, node) {
  598. if (guid_equal(&dynid->id.guid, guid)) {
  599. id = &dynid->id;
  600. break;
  601. }
  602. }
  603. spin_unlock(&drv->dynids.lock);
  604. return id;
  605. }
  606. static const struct hv_vmbus_device_id vmbus_device_null;
  607. /*
  608. * Return a matching hv_vmbus_device_id pointer.
  609. * If there is no match, return NULL.
  610. */
  611. static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
  612. struct hv_device *dev)
  613. {
  614. const guid_t *guid = &dev->dev_type;
  615. const struct hv_vmbus_device_id *id;
  616. /* When driver_override is set, only bind to the matching driver */
  617. if (dev->driver_override && strcmp(dev->driver_override, drv->name))
  618. return NULL;
  619. /* Look at the dynamic ids first, before the static ones */
  620. id = hv_vmbus_dynid_match(drv, guid);
  621. if (!id)
  622. id = hv_vmbus_dev_match(drv->id_table, guid);
  623. /* driver_override will always match, send a dummy id */
  624. if (!id && dev->driver_override)
  625. id = &vmbus_device_null;
  626. return id;
  627. }
  628. /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
  629. static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid)
  630. {
  631. struct vmbus_dynid *dynid;
  632. dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
  633. if (!dynid)
  634. return -ENOMEM;
  635. dynid->id.guid = *guid;
  636. spin_lock(&drv->dynids.lock);
  637. list_add_tail(&dynid->node, &drv->dynids.list);
  638. spin_unlock(&drv->dynids.lock);
  639. return driver_attach(&drv->driver);
  640. }
  641. static void vmbus_free_dynids(struct hv_driver *drv)
  642. {
  643. struct vmbus_dynid *dynid, *n;
  644. spin_lock(&drv->dynids.lock);
  645. list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
  646. list_del(&dynid->node);
  647. kfree(dynid);
  648. }
  649. spin_unlock(&drv->dynids.lock);
  650. }
  651. /*
  652. * store_new_id - sysfs frontend to vmbus_add_dynid()
  653. *
  654. * Allow GUIDs to be added to an existing driver via sysfs.
  655. */
  656. static ssize_t new_id_store(struct device_driver *driver, const char *buf,
  657. size_t count)
  658. {
  659. struct hv_driver *drv = drv_to_hv_drv(driver);
  660. guid_t guid;
  661. ssize_t retval;
  662. retval = guid_parse(buf, &guid);
  663. if (retval)
  664. return retval;
  665. if (hv_vmbus_dynid_match(drv, &guid))
  666. return -EEXIST;
  667. retval = vmbus_add_dynid(drv, &guid);
  668. if (retval)
  669. return retval;
  670. return count;
  671. }
  672. static DRIVER_ATTR_WO(new_id);
  673. /*
  674. * store_remove_id - remove a PCI device ID from this driver
  675. *
  676. * Removes a dynamic pci device ID to this driver.
  677. */
  678. static ssize_t remove_id_store(struct device_driver *driver, const char *buf,
  679. size_t count)
  680. {
  681. struct hv_driver *drv = drv_to_hv_drv(driver);
  682. struct vmbus_dynid *dynid, *n;
  683. guid_t guid;
  684. ssize_t retval;
  685. retval = guid_parse(buf, &guid);
  686. if (retval)
  687. return retval;
  688. retval = -ENODEV;
  689. spin_lock(&drv->dynids.lock);
  690. list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
  691. struct hv_vmbus_device_id *id = &dynid->id;
  692. if (guid_equal(&id->guid, &guid)) {
  693. list_del(&dynid->node);
  694. kfree(dynid);
  695. retval = count;
  696. break;
  697. }
  698. }
  699. spin_unlock(&drv->dynids.lock);
  700. return retval;
  701. }
  702. static DRIVER_ATTR_WO(remove_id);
  703. static struct attribute *vmbus_drv_attrs[] = {
  704. &driver_attr_new_id.attr,
  705. &driver_attr_remove_id.attr,
  706. NULL,
  707. };
  708. ATTRIBUTE_GROUPS(vmbus_drv);
  709. /*
  710. * vmbus_match - Attempt to match the specified device to the specified driver
  711. */
  712. static int vmbus_match(struct device *device, struct device_driver *driver)
  713. {
  714. struct hv_driver *drv = drv_to_hv_drv(driver);
  715. struct hv_device *hv_dev = device_to_hv_device(device);
  716. /* The hv_sock driver handles all hv_sock offers. */
  717. if (is_hvsock_channel(hv_dev->channel))
  718. return drv->hvsock;
  719. if (hv_vmbus_get_id(drv, hv_dev))
  720. return 1;
  721. return 0;
  722. }
  723. /*
  724. * vmbus_probe - Add the new vmbus's child device
  725. */
  726. static int vmbus_probe(struct device *child_device)
  727. {
  728. int ret = 0;
  729. struct hv_driver *drv =
  730. drv_to_hv_drv(child_device->driver);
  731. struct hv_device *dev = device_to_hv_device(child_device);
  732. const struct hv_vmbus_device_id *dev_id;
  733. dev_id = hv_vmbus_get_id(drv, dev);
  734. if (drv->probe) {
  735. ret = drv->probe(dev, dev_id);
  736. if (ret != 0)
  737. pr_err("probe failed for device %s (%d)\n",
  738. dev_name(child_device), ret);
  739. } else {
  740. pr_err("probe not set for driver %s\n",
  741. dev_name(child_device));
  742. ret = -ENODEV;
  743. }
  744. return ret;
  745. }
  746. /*
  747. * vmbus_dma_configure -- Configure DMA coherence for VMbus device
  748. */
  749. static int vmbus_dma_configure(struct device *child_device)
  750. {
  751. /*
  752. * On ARM64, propagate the DMA coherence setting from the top level
  753. * VMbus ACPI device to the child VMbus device being added here.
  754. * On x86/x64 coherence is assumed and these calls have no effect.
  755. */
  756. hv_setup_dma_ops(child_device,
  757. device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT);
  758. return 0;
  759. }
  760. /*
  761. * vmbus_remove - Remove a vmbus device
  762. */
  763. static void vmbus_remove(struct device *child_device)
  764. {
  765. struct hv_driver *drv;
  766. struct hv_device *dev = device_to_hv_device(child_device);
  767. if (child_device->driver) {
  768. drv = drv_to_hv_drv(child_device->driver);
  769. if (drv->remove)
  770. drv->remove(dev);
  771. }
  772. }
  773. /*
  774. * vmbus_shutdown - Shutdown a vmbus device
  775. */
  776. static void vmbus_shutdown(struct device *child_device)
  777. {
  778. struct hv_driver *drv;
  779. struct hv_device *dev = device_to_hv_device(child_device);
  780. /* The device may not be attached yet */
  781. if (!child_device->driver)
  782. return;
  783. drv = drv_to_hv_drv(child_device->driver);
  784. if (drv->shutdown)
  785. drv->shutdown(dev);
  786. }
  787. #ifdef CONFIG_PM_SLEEP
  788. /*
  789. * vmbus_suspend - Suspend a vmbus device
  790. */
  791. static int vmbus_suspend(struct device *child_device)
  792. {
  793. struct hv_driver *drv;
  794. struct hv_device *dev = device_to_hv_device(child_device);
  795. /* The device may not be attached yet */
  796. if (!child_device->driver)
  797. return 0;
  798. drv = drv_to_hv_drv(child_device->driver);
  799. if (!drv->suspend)
  800. return -EOPNOTSUPP;
  801. return drv->suspend(dev);
  802. }
  803. /*
  804. * vmbus_resume - Resume a vmbus device
  805. */
  806. static int vmbus_resume(struct device *child_device)
  807. {
  808. struct hv_driver *drv;
  809. struct hv_device *dev = device_to_hv_device(child_device);
  810. /* The device may not be attached yet */
  811. if (!child_device->driver)
  812. return 0;
  813. drv = drv_to_hv_drv(child_device->driver);
  814. if (!drv->resume)
  815. return -EOPNOTSUPP;
  816. return drv->resume(dev);
  817. }
  818. #else
  819. #define vmbus_suspend NULL
  820. #define vmbus_resume NULL
  821. #endif /* CONFIG_PM_SLEEP */
  822. /*
  823. * vmbus_device_release - Final callback release of the vmbus child device
  824. */
  825. static void vmbus_device_release(struct device *device)
  826. {
  827. struct hv_device *hv_dev = device_to_hv_device(device);
  828. struct vmbus_channel *channel = hv_dev->channel;
  829. hv_debug_rm_dev_dir(hv_dev);
  830. mutex_lock(&vmbus_connection.channel_mutex);
  831. hv_process_channel_removal(channel);
  832. mutex_unlock(&vmbus_connection.channel_mutex);
  833. kfree(hv_dev);
  834. }
  835. /*
  836. * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm.
  837. *
  838. * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we
  839. * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there
  840. * is no way to wake up a Generation-2 VM.
  841. *
  842. * The other 4 ops are for hibernation.
  843. */
  844. static const struct dev_pm_ops vmbus_pm = {
  845. .suspend_noirq = NULL,
  846. .resume_noirq = NULL,
  847. .freeze_noirq = vmbus_suspend,
  848. .thaw_noirq = vmbus_resume,
  849. .poweroff_noirq = vmbus_suspend,
  850. .restore_noirq = vmbus_resume,
  851. };
  852. /* The one and only one */
  853. static struct bus_type hv_bus = {
  854. .name = "vmbus",
  855. .match = vmbus_match,
  856. .shutdown = vmbus_shutdown,
  857. .remove = vmbus_remove,
  858. .probe = vmbus_probe,
  859. .uevent = vmbus_uevent,
  860. .dma_configure = vmbus_dma_configure,
  861. .dev_groups = vmbus_dev_groups,
  862. .drv_groups = vmbus_drv_groups,
  863. .bus_groups = vmbus_bus_groups,
  864. .pm = &vmbus_pm,
  865. };
  866. struct onmessage_work_context {
  867. struct work_struct work;
  868. struct {
  869. struct hv_message_header header;
  870. u8 payload[];
  871. } msg;
  872. };
  873. static void vmbus_onmessage_work(struct work_struct *work)
  874. {
  875. struct onmessage_work_context *ctx;
  876. /* Do not process messages if we're in DISCONNECTED state */
  877. if (vmbus_connection.conn_state == DISCONNECTED)
  878. return;
  879. ctx = container_of(work, struct onmessage_work_context,
  880. work);
  881. vmbus_onmessage((struct vmbus_channel_message_header *)
  882. &ctx->msg.payload);
  883. kfree(ctx);
  884. }
  885. void vmbus_on_msg_dpc(unsigned long data)
  886. {
  887. struct hv_per_cpu_context *hv_cpu = (void *)data;
  888. void *page_addr = hv_cpu->synic_message_page;
  889. struct hv_message msg_copy, *msg = (struct hv_message *)page_addr +
  890. VMBUS_MESSAGE_SINT;
  891. struct vmbus_channel_message_header *hdr;
  892. enum vmbus_channel_message_type msgtype;
  893. const struct vmbus_channel_message_table_entry *entry;
  894. struct onmessage_work_context *ctx;
  895. __u8 payload_size;
  896. u32 message_type;
  897. /*
  898. * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as
  899. * it is being used in 'struct vmbus_channel_message_header' definition
  900. * which is supposed to match hypervisor ABI.
  901. */
  902. BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32));
  903. /*
  904. * Since the message is in memory shared with the host, an erroneous or
  905. * malicious Hyper-V could modify the message while vmbus_on_msg_dpc()
  906. * or individual message handlers are executing; to prevent this, copy
  907. * the message into private memory.
  908. */
  909. memcpy(&msg_copy, msg, sizeof(struct hv_message));
  910. message_type = msg_copy.header.message_type;
  911. if (message_type == HVMSG_NONE)
  912. /* no msg */
  913. return;
  914. hdr = (struct vmbus_channel_message_header *)msg_copy.u.payload;
  915. msgtype = hdr->msgtype;
  916. trace_vmbus_on_msg_dpc(hdr);
  917. if (msgtype >= CHANNELMSG_COUNT) {
  918. WARN_ONCE(1, "unknown msgtype=%d\n", msgtype);
  919. goto msg_handled;
  920. }
  921. payload_size = msg_copy.header.payload_size;
  922. if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) {
  923. WARN_ONCE(1, "payload size is too large (%d)\n", payload_size);
  924. goto msg_handled;
  925. }
  926. entry = &channel_message_table[msgtype];
  927. if (!entry->message_handler)
  928. goto msg_handled;
  929. if (payload_size < entry->min_payload_len) {
  930. WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", msgtype, payload_size);
  931. goto msg_handled;
  932. }
  933. if (entry->handler_type == VMHT_BLOCKING) {
  934. ctx = kmalloc(struct_size(ctx, msg.payload, payload_size), GFP_ATOMIC);
  935. if (ctx == NULL)
  936. return;
  937. INIT_WORK(&ctx->work, vmbus_onmessage_work);
  938. ctx->msg.header = msg_copy.header;
  939. memcpy(&ctx->msg.payload, msg_copy.u.payload, payload_size);
  940. /*
  941. * The host can generate a rescind message while we
  942. * may still be handling the original offer. We deal with
  943. * this condition by relying on the synchronization provided
  944. * by offer_in_progress and by channel_mutex. See also the
  945. * inline comments in vmbus_onoffer_rescind().
  946. */
  947. switch (msgtype) {
  948. case CHANNELMSG_RESCIND_CHANNELOFFER:
  949. /*
  950. * If we are handling the rescind message;
  951. * schedule the work on the global work queue.
  952. *
  953. * The OFFER message and the RESCIND message should
  954. * not be handled by the same serialized work queue,
  955. * because the OFFER handler may call vmbus_open(),
  956. * which tries to open the channel by sending an
  957. * OPEN_CHANNEL message to the host and waits for
  958. * the host's response; however, if the host has
  959. * rescinded the channel before it receives the
  960. * OPEN_CHANNEL message, the host just silently
  961. * ignores the OPEN_CHANNEL message; as a result,
  962. * the guest's OFFER handler hangs for ever, if we
  963. * handle the RESCIND message in the same serialized
  964. * work queue: the RESCIND handler can not start to
  965. * run before the OFFER handler finishes.
  966. */
  967. if (vmbus_connection.ignore_any_offer_msg)
  968. break;
  969. queue_work(vmbus_connection.rescind_work_queue, &ctx->work);
  970. break;
  971. case CHANNELMSG_OFFERCHANNEL:
  972. /*
  973. * The host sends the offer message of a given channel
  974. * before sending the rescind message of the same
  975. * channel. These messages are sent to the guest's
  976. * connect CPU; the guest then starts processing them
  977. * in the tasklet handler on this CPU:
  978. *
  979. * VMBUS_CONNECT_CPU
  980. *
  981. * [vmbus_on_msg_dpc()]
  982. * atomic_inc() // CHANNELMSG_OFFERCHANNEL
  983. * queue_work()
  984. * ...
  985. * [vmbus_on_msg_dpc()]
  986. * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER
  987. *
  988. * We rely on the memory-ordering properties of the
  989. * queue_work() and schedule_work() primitives, which
  990. * guarantee that the atomic increment will be visible
  991. * to the CPUs which will execute the offer & rescind
  992. * works by the time these works will start execution.
  993. */
  994. if (vmbus_connection.ignore_any_offer_msg)
  995. break;
  996. atomic_inc(&vmbus_connection.offer_in_progress);
  997. fallthrough;
  998. default:
  999. queue_work(vmbus_connection.work_queue, &ctx->work);
  1000. }
  1001. } else
  1002. entry->message_handler(hdr);
  1003. msg_handled:
  1004. vmbus_signal_eom(msg, message_type);
  1005. }
  1006. #ifdef CONFIG_PM_SLEEP
  1007. /*
  1008. * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
  1009. * hibernation, because hv_sock connections can not persist across hibernation.
  1010. */
  1011. static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
  1012. {
  1013. struct onmessage_work_context *ctx;
  1014. struct vmbus_channel_rescind_offer *rescind;
  1015. WARN_ON(!is_hvsock_channel(channel));
  1016. /*
  1017. * Allocation size is small and the allocation should really not fail,
  1018. * otherwise the state of the hv_sock connections ends up in limbo.
  1019. */
  1020. ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind),
  1021. GFP_KERNEL | __GFP_NOFAIL);
  1022. /*
  1023. * So far, these are not really used by Linux. Just set them to the
  1024. * reasonable values conforming to the definitions of the fields.
  1025. */
  1026. ctx->msg.header.message_type = 1;
  1027. ctx->msg.header.payload_size = sizeof(*rescind);
  1028. /* These values are actually used by Linux. */
  1029. rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload;
  1030. rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
  1031. rescind->child_relid = channel->offermsg.child_relid;
  1032. INIT_WORK(&ctx->work, vmbus_onmessage_work);
  1033. queue_work(vmbus_connection.work_queue, &ctx->work);
  1034. }
  1035. #endif /* CONFIG_PM_SLEEP */
  1036. /*
  1037. * Schedule all channels with events pending
  1038. */
  1039. static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
  1040. {
  1041. unsigned long *recv_int_page;
  1042. u32 maxbits, relid;
  1043. /*
  1044. * The event page can be directly checked to get the id of
  1045. * the channel that has the interrupt pending.
  1046. */
  1047. void *page_addr = hv_cpu->synic_event_page;
  1048. union hv_synic_event_flags *event
  1049. = (union hv_synic_event_flags *)page_addr +
  1050. VMBUS_MESSAGE_SINT;
  1051. maxbits = HV_EVENT_FLAGS_COUNT;
  1052. recv_int_page = event->flags;
  1053. if (unlikely(!recv_int_page))
  1054. return;
  1055. for_each_set_bit(relid, recv_int_page, maxbits) {
  1056. void (*callback_fn)(void *context);
  1057. struct vmbus_channel *channel;
  1058. if (!sync_test_and_clear_bit(relid, recv_int_page))
  1059. continue;
  1060. /* Special case - vmbus channel protocol msg */
  1061. if (relid == 0)
  1062. continue;
  1063. /*
  1064. * Pairs with the kfree_rcu() in vmbus_chan_release().
  1065. * Guarantees that the channel data structure doesn't
  1066. * get freed while the channel pointer below is being
  1067. * dereferenced.
  1068. */
  1069. rcu_read_lock();
  1070. /* Find channel based on relid */
  1071. channel = relid2channel(relid);
  1072. if (channel == NULL)
  1073. goto sched_unlock_rcu;
  1074. if (channel->rescind)
  1075. goto sched_unlock_rcu;
  1076. /*
  1077. * Make sure that the ring buffer data structure doesn't get
  1078. * freed while we dereference the ring buffer pointer. Test
  1079. * for the channel's onchannel_callback being NULL within a
  1080. * sched_lock critical section. See also the inline comments
  1081. * in vmbus_reset_channel_cb().
  1082. */
  1083. spin_lock(&channel->sched_lock);
  1084. callback_fn = channel->onchannel_callback;
  1085. if (unlikely(callback_fn == NULL))
  1086. goto sched_unlock;
  1087. trace_vmbus_chan_sched(channel);
  1088. ++channel->interrupts;
  1089. switch (channel->callback_mode) {
  1090. case HV_CALL_ISR:
  1091. (*callback_fn)(channel->channel_callback_context);
  1092. break;
  1093. case HV_CALL_BATCHED:
  1094. hv_begin_read(&channel->inbound);
  1095. fallthrough;
  1096. case HV_CALL_DIRECT:
  1097. tasklet_schedule(&channel->callback_event);
  1098. }
  1099. sched_unlock:
  1100. spin_unlock(&channel->sched_lock);
  1101. sched_unlock_rcu:
  1102. rcu_read_unlock();
  1103. }
  1104. }
  1105. static void vmbus_isr(void)
  1106. {
  1107. struct hv_per_cpu_context *hv_cpu
  1108. = this_cpu_ptr(hv_context.cpu_context);
  1109. void *page_addr;
  1110. struct hv_message *msg;
  1111. vmbus_chan_sched(hv_cpu);
  1112. page_addr = hv_cpu->synic_message_page;
  1113. msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
  1114. /* Check if there are actual msgs to be processed */
  1115. if (msg->header.message_type != HVMSG_NONE) {
  1116. if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
  1117. hv_stimer0_isr();
  1118. vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
  1119. } else
  1120. tasklet_schedule(&hv_cpu->msg_dpc);
  1121. }
  1122. add_interrupt_randomness(vmbus_interrupt);
  1123. }
  1124. static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
  1125. {
  1126. vmbus_isr();
  1127. return IRQ_HANDLED;
  1128. }
  1129. /*
  1130. * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
  1131. * buffer and call into Hyper-V to transfer the data.
  1132. */
  1133. static void hv_kmsg_dump(struct kmsg_dumper *dumper,
  1134. enum kmsg_dump_reason reason)
  1135. {
  1136. struct kmsg_dump_iter iter;
  1137. size_t bytes_written;
  1138. /* We are only interested in panics. */
  1139. if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg))
  1140. return;
  1141. /*
  1142. * Write dump contents to the page. No need to synchronize; panic should
  1143. * be single-threaded.
  1144. */
  1145. kmsg_dump_rewind(&iter);
  1146. kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
  1147. &bytes_written);
  1148. if (!bytes_written)
  1149. return;
  1150. /*
  1151. * P3 to contain the physical address of the panic page & P4 to
  1152. * contain the size of the panic data in that page. Rest of the
  1153. * registers are no-op when the NOTIFY_MSG flag is set.
  1154. */
  1155. hv_set_register(HV_REGISTER_CRASH_P0, 0);
  1156. hv_set_register(HV_REGISTER_CRASH_P1, 0);
  1157. hv_set_register(HV_REGISTER_CRASH_P2, 0);
  1158. hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
  1159. hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
  1160. /*
  1161. * Let Hyper-V know there is crash data available along with
  1162. * the panic message.
  1163. */
  1164. hv_set_register(HV_REGISTER_CRASH_CTL,
  1165. (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG));
  1166. }
  1167. static struct kmsg_dumper hv_kmsg_dumper = {
  1168. .dump = hv_kmsg_dump,
  1169. };
  1170. static void hv_kmsg_dump_register(void)
  1171. {
  1172. int ret;
  1173. hv_panic_page = hv_alloc_hyperv_zeroed_page();
  1174. if (!hv_panic_page) {
  1175. pr_err("Hyper-V: panic message page memory allocation failed\n");
  1176. return;
  1177. }
  1178. ret = kmsg_dump_register(&hv_kmsg_dumper);
  1179. if (ret) {
  1180. pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
  1181. hv_free_hyperv_page((unsigned long)hv_panic_page);
  1182. hv_panic_page = NULL;
  1183. }
  1184. }
  1185. static struct ctl_table_header *hv_ctl_table_hdr;
  1186. /*
  1187. * sysctl option to allow the user to control whether kmsg data should be
  1188. * reported to Hyper-V on panic.
  1189. */
  1190. static struct ctl_table hv_ctl_table[] = {
  1191. {
  1192. .procname = "hyperv_record_panic_msg",
  1193. .data = &sysctl_record_panic_msg,
  1194. .maxlen = sizeof(int),
  1195. .mode = 0644,
  1196. .proc_handler = proc_dointvec_minmax,
  1197. .extra1 = SYSCTL_ZERO,
  1198. .extra2 = SYSCTL_ONE
  1199. },
  1200. {}
  1201. };
  1202. static struct ctl_table hv_root_table[] = {
  1203. {
  1204. .procname = "kernel",
  1205. .mode = 0555,
  1206. .child = hv_ctl_table
  1207. },
  1208. {}
  1209. };
  1210. /*
  1211. * vmbus_bus_init -Main vmbus driver initialization routine.
  1212. *
  1213. * Here, we
  1214. * - initialize the vmbus driver context
  1215. * - invoke the vmbus hv main init routine
  1216. * - retrieve the channel offers
  1217. */
  1218. static int vmbus_bus_init(void)
  1219. {
  1220. int ret;
  1221. ret = hv_init();
  1222. if (ret != 0) {
  1223. pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
  1224. return ret;
  1225. }
  1226. ret = bus_register(&hv_bus);
  1227. if (ret)
  1228. return ret;
  1229. /*
  1230. * VMbus interrupts are best modeled as per-cpu interrupts. If
  1231. * on an architecture with support for per-cpu IRQs (e.g. ARM64),
  1232. * allocate a per-cpu IRQ using standard Linux kernel functionality.
  1233. * If not on such an architecture (e.g., x86/x64), then rely on
  1234. * code in the arch-specific portion of the code tree to connect
  1235. * the VMbus interrupt handler.
  1236. */
  1237. if (vmbus_irq == -1) {
  1238. hv_setup_vmbus_handler(vmbus_isr);
  1239. } else {
  1240. vmbus_evt = alloc_percpu(long);
  1241. ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr,
  1242. "Hyper-V VMbus", vmbus_evt);
  1243. if (ret) {
  1244. pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d",
  1245. vmbus_irq, ret);
  1246. free_percpu(vmbus_evt);
  1247. goto err_setup;
  1248. }
  1249. }
  1250. ret = hv_synic_alloc();
  1251. if (ret)
  1252. goto err_alloc;
  1253. /*
  1254. * Initialize the per-cpu interrupt state and stimer state.
  1255. * Then connect to the host.
  1256. */
  1257. ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
  1258. hv_synic_init, hv_synic_cleanup);
  1259. if (ret < 0)
  1260. goto err_alloc;
  1261. hyperv_cpuhp_online = ret;
  1262. ret = vmbus_connect();
  1263. if (ret)
  1264. goto err_connect;
  1265. if (hv_is_isolation_supported())
  1266. sysctl_record_panic_msg = 0;
  1267. /*
  1268. * Only register if the crash MSRs are available
  1269. */
  1270. if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
  1271. u64 hyperv_crash_ctl;
  1272. /*
  1273. * Panic message recording (sysctl_record_panic_msg)
  1274. * is enabled by default in non-isolated guests and
  1275. * disabled by default in isolated guests; the panic
  1276. * message recording won't be available in isolated
  1277. * guests should the following registration fail.
  1278. */
  1279. hv_ctl_table_hdr = register_sysctl_table(hv_root_table);
  1280. if (!hv_ctl_table_hdr)
  1281. pr_err("Hyper-V: sysctl table register error");
  1282. /*
  1283. * Register for panic kmsg callback only if the right
  1284. * capability is supported by the hypervisor.
  1285. */
  1286. hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
  1287. if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
  1288. hv_kmsg_dump_register();
  1289. register_die_notifier(&hyperv_die_block);
  1290. }
  1291. /*
  1292. * Always register the panic notifier because we need to unload
  1293. * the VMbus channel connection to prevent any VMbus
  1294. * activity after the VM panics.
  1295. */
  1296. atomic_notifier_chain_register(&panic_notifier_list,
  1297. &hyperv_panic_block);
  1298. vmbus_request_offers();
  1299. return 0;
  1300. err_connect:
  1301. cpuhp_remove_state(hyperv_cpuhp_online);
  1302. err_alloc:
  1303. hv_synic_free();
  1304. if (vmbus_irq == -1) {
  1305. hv_remove_vmbus_handler();
  1306. } else {
  1307. free_percpu_irq(vmbus_irq, vmbus_evt);
  1308. free_percpu(vmbus_evt);
  1309. }
  1310. err_setup:
  1311. bus_unregister(&hv_bus);
  1312. unregister_sysctl_table(hv_ctl_table_hdr);
  1313. hv_ctl_table_hdr = NULL;
  1314. return ret;
  1315. }
  1316. /**
  1317. * __vmbus_driver_register() - Register a vmbus's driver
  1318. * @hv_driver: Pointer to driver structure you want to register
  1319. * @owner: owner module of the drv
  1320. * @mod_name: module name string
  1321. *
  1322. * Registers the given driver with Linux through the 'driver_register()' call
  1323. * and sets up the hyper-v vmbus handling for this driver.
  1324. * It will return the state of the 'driver_register()' call.
  1325. *
  1326. */
  1327. int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name)
  1328. {
  1329. int ret;
  1330. pr_info("registering driver %s\n", hv_driver->name);
  1331. ret = vmbus_exists();
  1332. if (ret < 0)
  1333. return ret;
  1334. hv_driver->driver.name = hv_driver->name;
  1335. hv_driver->driver.owner = owner;
  1336. hv_driver->driver.mod_name = mod_name;
  1337. hv_driver->driver.bus = &hv_bus;
  1338. spin_lock_init(&hv_driver->dynids.lock);
  1339. INIT_LIST_HEAD(&hv_driver->dynids.list);
  1340. ret = driver_register(&hv_driver->driver);
  1341. return ret;
  1342. }
  1343. EXPORT_SYMBOL_GPL(__vmbus_driver_register);
  1344. /**
  1345. * vmbus_driver_unregister() - Unregister a vmbus's driver
  1346. * @hv_driver: Pointer to driver structure you want to
  1347. * un-register
  1348. *
  1349. * Un-register the given driver that was previous registered with a call to
  1350. * vmbus_driver_register()
  1351. */
  1352. void vmbus_driver_unregister(struct hv_driver *hv_driver)
  1353. {
  1354. pr_info("unregistering driver %s\n", hv_driver->name);
  1355. if (!vmbus_exists()) {
  1356. driver_unregister(&hv_driver->driver);
  1357. vmbus_free_dynids(hv_driver);
  1358. }
  1359. }
  1360. EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
  1361. /*
  1362. * Called when last reference to channel is gone.
  1363. */
  1364. static void vmbus_chan_release(struct kobject *kobj)
  1365. {
  1366. struct vmbus_channel *channel
  1367. = container_of(kobj, struct vmbus_channel, kobj);
  1368. kfree_rcu(channel, rcu);
  1369. }
  1370. struct vmbus_chan_attribute {
  1371. struct attribute attr;
  1372. ssize_t (*show)(struct vmbus_channel *chan, char *buf);
  1373. ssize_t (*store)(struct vmbus_channel *chan,
  1374. const char *buf, size_t count);
  1375. };
  1376. #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \
  1377. struct vmbus_chan_attribute chan_attr_##_name \
  1378. = __ATTR(_name, _mode, _show, _store)
  1379. #define VMBUS_CHAN_ATTR_RW(_name) \
  1380. struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name)
  1381. #define VMBUS_CHAN_ATTR_RO(_name) \
  1382. struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name)
  1383. #define VMBUS_CHAN_ATTR_WO(_name) \
  1384. struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name)
  1385. static ssize_t vmbus_chan_attr_show(struct kobject *kobj,
  1386. struct attribute *attr, char *buf)
  1387. {
  1388. const struct vmbus_chan_attribute *attribute
  1389. = container_of(attr, struct vmbus_chan_attribute, attr);
  1390. struct vmbus_channel *chan
  1391. = container_of(kobj, struct vmbus_channel, kobj);
  1392. if (!attribute->show)
  1393. return -EIO;
  1394. return attribute->show(chan, buf);
  1395. }
  1396. static ssize_t vmbus_chan_attr_store(struct kobject *kobj,
  1397. struct attribute *attr, const char *buf,
  1398. size_t count)
  1399. {
  1400. const struct vmbus_chan_attribute *attribute
  1401. = container_of(attr, struct vmbus_chan_attribute, attr);
  1402. struct vmbus_channel *chan
  1403. = container_of(kobj, struct vmbus_channel, kobj);
  1404. if (!attribute->store)
  1405. return -EIO;
  1406. return attribute->store(chan, buf, count);
  1407. }
  1408. static const struct sysfs_ops vmbus_chan_sysfs_ops = {
  1409. .show = vmbus_chan_attr_show,
  1410. .store = vmbus_chan_attr_store,
  1411. };
  1412. static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf)
  1413. {
  1414. struct hv_ring_buffer_info *rbi = &channel->outbound;
  1415. ssize_t ret;
  1416. mutex_lock(&rbi->ring_buffer_mutex);
  1417. if (!rbi->ring_buffer) {
  1418. mutex_unlock(&rbi->ring_buffer_mutex);
  1419. return -EINVAL;
  1420. }
  1421. ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
  1422. mutex_unlock(&rbi->ring_buffer_mutex);
  1423. return ret;
  1424. }
  1425. static VMBUS_CHAN_ATTR_RO(out_mask);
  1426. static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf)
  1427. {
  1428. struct hv_ring_buffer_info *rbi = &channel->inbound;
  1429. ssize_t ret;
  1430. mutex_lock(&rbi->ring_buffer_mutex);
  1431. if (!rbi->ring_buffer) {
  1432. mutex_unlock(&rbi->ring_buffer_mutex);
  1433. return -EINVAL;
  1434. }
  1435. ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask);
  1436. mutex_unlock(&rbi->ring_buffer_mutex);
  1437. return ret;
  1438. }
  1439. static VMBUS_CHAN_ATTR_RO(in_mask);
  1440. static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf)
  1441. {
  1442. struct hv_ring_buffer_info *rbi = &channel->inbound;
  1443. ssize_t ret;
  1444. mutex_lock(&rbi->ring_buffer_mutex);
  1445. if (!rbi->ring_buffer) {
  1446. mutex_unlock(&rbi->ring_buffer_mutex);
  1447. return -EINVAL;
  1448. }
  1449. ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi));
  1450. mutex_unlock(&rbi->ring_buffer_mutex);
  1451. return ret;
  1452. }
  1453. static VMBUS_CHAN_ATTR_RO(read_avail);
  1454. static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf)
  1455. {
  1456. struct hv_ring_buffer_info *rbi = &channel->outbound;
  1457. ssize_t ret;
  1458. mutex_lock(&rbi->ring_buffer_mutex);
  1459. if (!rbi->ring_buffer) {
  1460. mutex_unlock(&rbi->ring_buffer_mutex);
  1461. return -EINVAL;
  1462. }
  1463. ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
  1464. mutex_unlock(&rbi->ring_buffer_mutex);
  1465. return ret;
  1466. }
  1467. static VMBUS_CHAN_ATTR_RO(write_avail);
  1468. static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
  1469. {
  1470. return sprintf(buf, "%u\n", channel->target_cpu);
  1471. }
  1472. static ssize_t target_cpu_store(struct vmbus_channel *channel,
  1473. const char *buf, size_t count)
  1474. {
  1475. u32 target_cpu, origin_cpu;
  1476. ssize_t ret = count;
  1477. if (vmbus_proto_version < VERSION_WIN10_V4_1)
  1478. return -EIO;
  1479. if (sscanf(buf, "%uu", &target_cpu) != 1)
  1480. return -EIO;
  1481. /* Validate target_cpu for the cpumask_test_cpu() operation below. */
  1482. if (target_cpu >= nr_cpumask_bits)
  1483. return -EINVAL;
  1484. if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
  1485. return -EINVAL;
  1486. /* No CPUs should come up or down during this. */
  1487. cpus_read_lock();
  1488. if (!cpu_online(target_cpu)) {
  1489. cpus_read_unlock();
  1490. return -EINVAL;
  1491. }
  1492. /*
  1493. * Synchronizes target_cpu_store() and channel closure:
  1494. *
  1495. * { Initially: state = CHANNEL_OPENED }
  1496. *
  1497. * CPU1 CPU2
  1498. *
  1499. * [target_cpu_store()] [vmbus_disconnect_ring()]
  1500. *
  1501. * LOCK channel_mutex LOCK channel_mutex
  1502. * LOAD r1 = state LOAD r2 = state
  1503. * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED)
  1504. * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN
  1505. * [...] SEND CLOSECHANNEL
  1506. * UNLOCK channel_mutex UNLOCK channel_mutex
  1507. *
  1508. * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes
  1509. * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND
  1510. *
  1511. * Note. The host processes the channel messages "sequentially", in
  1512. * the order in which they are received on a per-partition basis.
  1513. */
  1514. mutex_lock(&vmbus_connection.channel_mutex);
  1515. /*
  1516. * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
  1517. * avoid sending the message and fail here for such channels.
  1518. */
  1519. if (channel->state != CHANNEL_OPENED_STATE) {
  1520. ret = -EIO;
  1521. goto cpu_store_unlock;
  1522. }
  1523. origin_cpu = channel->target_cpu;
  1524. if (target_cpu == origin_cpu)
  1525. goto cpu_store_unlock;
  1526. if (vmbus_send_modifychannel(channel,
  1527. hv_cpu_number_to_vp_number(target_cpu))) {
  1528. ret = -EIO;
  1529. goto cpu_store_unlock;
  1530. }
  1531. /*
  1532. * For version before VERSION_WIN10_V5_3, the following warning holds:
  1533. *
  1534. * Warning. At this point, there is *no* guarantee that the host will
  1535. * have successfully processed the vmbus_send_modifychannel() request.
  1536. * See the header comment of vmbus_send_modifychannel() for more info.
  1537. *
  1538. * Lags in the processing of the above vmbus_send_modifychannel() can
  1539. * result in missed interrupts if the "old" target CPU is taken offline
  1540. * before Hyper-V starts sending interrupts to the "new" target CPU.
  1541. * But apart from this offlining scenario, the code tolerates such
  1542. * lags. It will function correctly even if a channel interrupt comes
  1543. * in on a CPU that is different from the channel target_cpu value.
  1544. */
  1545. channel->target_cpu = target_cpu;
  1546. /* See init_vp_index(). */
  1547. if (hv_is_perf_channel(channel))
  1548. hv_update_allocated_cpus(origin_cpu, target_cpu);
  1549. /* Currently set only for storvsc channels. */
  1550. if (channel->change_target_cpu_callback) {
  1551. (*channel->change_target_cpu_callback)(channel,
  1552. origin_cpu, target_cpu);
  1553. }
  1554. cpu_store_unlock:
  1555. mutex_unlock(&vmbus_connection.channel_mutex);
  1556. cpus_read_unlock();
  1557. return ret;
  1558. }
  1559. static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
  1560. static ssize_t channel_pending_show(struct vmbus_channel *channel,
  1561. char *buf)
  1562. {
  1563. return sprintf(buf, "%d\n",
  1564. channel_pending(channel,
  1565. vmbus_connection.monitor_pages[1]));
  1566. }
  1567. static VMBUS_CHAN_ATTR(pending, 0444, channel_pending_show, NULL);
  1568. static ssize_t channel_latency_show(struct vmbus_channel *channel,
  1569. char *buf)
  1570. {
  1571. return sprintf(buf, "%d\n",
  1572. channel_latency(channel,
  1573. vmbus_connection.monitor_pages[1]));
  1574. }
  1575. static VMBUS_CHAN_ATTR(latency, 0444, channel_latency_show, NULL);
  1576. static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf)
  1577. {
  1578. return sprintf(buf, "%llu\n", channel->interrupts);
  1579. }
  1580. static VMBUS_CHAN_ATTR(interrupts, 0444, channel_interrupts_show, NULL);
  1581. static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf)
  1582. {
  1583. return sprintf(buf, "%llu\n", channel->sig_events);
  1584. }
  1585. static VMBUS_CHAN_ATTR(events, 0444, channel_events_show, NULL);
  1586. static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel,
  1587. char *buf)
  1588. {
  1589. return sprintf(buf, "%llu\n",
  1590. (unsigned long long)channel->intr_in_full);
  1591. }
  1592. static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL);
  1593. static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel,
  1594. char *buf)
  1595. {
  1596. return sprintf(buf, "%llu\n",
  1597. (unsigned long long)channel->intr_out_empty);
  1598. }
  1599. static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL);
  1600. static ssize_t channel_out_full_first_show(struct vmbus_channel *channel,
  1601. char *buf)
  1602. {
  1603. return sprintf(buf, "%llu\n",
  1604. (unsigned long long)channel->out_full_first);
  1605. }
  1606. static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL);
  1607. static ssize_t channel_out_full_total_show(struct vmbus_channel *channel,
  1608. char *buf)
  1609. {
  1610. return sprintf(buf, "%llu\n",
  1611. (unsigned long long)channel->out_full_total);
  1612. }
  1613. static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL);
  1614. static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel,
  1615. char *buf)
  1616. {
  1617. return sprintf(buf, "%u\n", channel->offermsg.monitorid);
  1618. }
  1619. static VMBUS_CHAN_ATTR(monitor_id, 0444, subchannel_monitor_id_show, NULL);
  1620. static ssize_t subchannel_id_show(struct vmbus_channel *channel,
  1621. char *buf)
  1622. {
  1623. return sprintf(buf, "%u\n",
  1624. channel->offermsg.offer.sub_channel_index);
  1625. }
  1626. static VMBUS_CHAN_ATTR_RO(subchannel_id);
  1627. static struct attribute *vmbus_chan_attrs[] = {
  1628. &chan_attr_out_mask.attr,
  1629. &chan_attr_in_mask.attr,
  1630. &chan_attr_read_avail.attr,
  1631. &chan_attr_write_avail.attr,
  1632. &chan_attr_cpu.attr,
  1633. &chan_attr_pending.attr,
  1634. &chan_attr_latency.attr,
  1635. &chan_attr_interrupts.attr,
  1636. &chan_attr_events.attr,
  1637. &chan_attr_intr_in_full.attr,
  1638. &chan_attr_intr_out_empty.attr,
  1639. &chan_attr_out_full_first.attr,
  1640. &chan_attr_out_full_total.attr,
  1641. &chan_attr_monitor_id.attr,
  1642. &chan_attr_subchannel_id.attr,
  1643. NULL
  1644. };
  1645. /*
  1646. * Channel-level attribute_group callback function. Returns the permission for
  1647. * each attribute, and returns 0 if an attribute is not visible.
  1648. */
  1649. static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj,
  1650. struct attribute *attr, int idx)
  1651. {
  1652. const struct vmbus_channel *channel =
  1653. container_of(kobj, struct vmbus_channel, kobj);
  1654. /* Hide the monitor attributes if the monitor mechanism is not used. */
  1655. if (!channel->offermsg.monitor_allocated &&
  1656. (attr == &chan_attr_pending.attr ||
  1657. attr == &chan_attr_latency.attr ||
  1658. attr == &chan_attr_monitor_id.attr))
  1659. return 0;
  1660. return attr->mode;
  1661. }
  1662. static struct attribute_group vmbus_chan_group = {
  1663. .attrs = vmbus_chan_attrs,
  1664. .is_visible = vmbus_chan_attr_is_visible
  1665. };
  1666. static struct kobj_type vmbus_chan_ktype = {
  1667. .sysfs_ops = &vmbus_chan_sysfs_ops,
  1668. .release = vmbus_chan_release,
  1669. };
  1670. /*
  1671. * vmbus_add_channel_kobj - setup a sub-directory under device/channels
  1672. */
  1673. int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel)
  1674. {
  1675. const struct device *device = &dev->device;
  1676. struct kobject *kobj = &channel->kobj;
  1677. u32 relid = channel->offermsg.child_relid;
  1678. int ret;
  1679. kobj->kset = dev->channels_kset;
  1680. ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL,
  1681. "%u", relid);
  1682. if (ret) {
  1683. kobject_put(kobj);
  1684. return ret;
  1685. }
  1686. ret = sysfs_create_group(kobj, &vmbus_chan_group);
  1687. if (ret) {
  1688. /*
  1689. * The calling functions' error handling paths will cleanup the
  1690. * empty channel directory.
  1691. */
  1692. kobject_put(kobj);
  1693. dev_err(device, "Unable to set up channel sysfs files\n");
  1694. return ret;
  1695. }
  1696. kobject_uevent(kobj, KOBJ_ADD);
  1697. return 0;
  1698. }
  1699. /*
  1700. * vmbus_remove_channel_attr_group - remove the channel's attribute group
  1701. */
  1702. void vmbus_remove_channel_attr_group(struct vmbus_channel *channel)
  1703. {
  1704. sysfs_remove_group(&channel->kobj, &vmbus_chan_group);
  1705. }
  1706. /*
  1707. * vmbus_device_create - Creates and registers a new child device
  1708. * on the vmbus.
  1709. */
  1710. struct hv_device *vmbus_device_create(const guid_t *type,
  1711. const guid_t *instance,
  1712. struct vmbus_channel *channel)
  1713. {
  1714. struct hv_device *child_device_obj;
  1715. child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL);
  1716. if (!child_device_obj) {
  1717. pr_err("Unable to allocate device object for child device\n");
  1718. return NULL;
  1719. }
  1720. child_device_obj->channel = channel;
  1721. guid_copy(&child_device_obj->dev_type, type);
  1722. guid_copy(&child_device_obj->dev_instance, instance);
  1723. child_device_obj->vendor_id = PCI_VENDOR_ID_MICROSOFT;
  1724. return child_device_obj;
  1725. }
  1726. /*
  1727. * vmbus_device_register - Register the child device
  1728. */
  1729. int vmbus_device_register(struct hv_device *child_device_obj)
  1730. {
  1731. struct kobject *kobj = &child_device_obj->device.kobj;
  1732. int ret;
  1733. dev_set_name(&child_device_obj->device, "%pUl",
  1734. &child_device_obj->channel->offermsg.offer.if_instance);
  1735. child_device_obj->device.bus = &hv_bus;
  1736. child_device_obj->device.parent = &hv_acpi_dev->dev;
  1737. child_device_obj->device.release = vmbus_device_release;
  1738. child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
  1739. child_device_obj->device.dma_mask = &child_device_obj->dma_mask;
  1740. dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64));
  1741. /*
  1742. * Register with the LDM. This will kick off the driver/device
  1743. * binding...which will eventually call vmbus_match() and vmbus_probe()
  1744. */
  1745. ret = device_register(&child_device_obj->device);
  1746. if (ret) {
  1747. pr_err("Unable to register child device\n");
  1748. put_device(&child_device_obj->device);
  1749. return ret;
  1750. }
  1751. child_device_obj->channels_kset = kset_create_and_add("channels",
  1752. NULL, kobj);
  1753. if (!child_device_obj->channels_kset) {
  1754. ret = -ENOMEM;
  1755. goto err_dev_unregister;
  1756. }
  1757. ret = vmbus_add_channel_kobj(child_device_obj,
  1758. child_device_obj->channel);
  1759. if (ret) {
  1760. pr_err("Unable to register primary channeln");
  1761. goto err_kset_unregister;
  1762. }
  1763. hv_debug_add_dev_dir(child_device_obj);
  1764. return 0;
  1765. err_kset_unregister:
  1766. kset_unregister(child_device_obj->channels_kset);
  1767. err_dev_unregister:
  1768. device_unregister(&child_device_obj->device);
  1769. return ret;
  1770. }
  1771. /*
  1772. * vmbus_device_unregister - Remove the specified child device
  1773. * from the vmbus.
  1774. */
  1775. void vmbus_device_unregister(struct hv_device *device_obj)
  1776. {
  1777. pr_debug("child device %s unregistered\n",
  1778. dev_name(&device_obj->device));
  1779. kset_unregister(device_obj->channels_kset);
  1780. /*
  1781. * Kick off the process of unregistering the device.
  1782. * This will call vmbus_remove() and eventually vmbus_device_release()
  1783. */
  1784. device_unregister(&device_obj->device);
  1785. }
  1786. /*
  1787. * VMBUS is an acpi enumerated device. Get the information we
  1788. * need from DSDT.
  1789. */
  1790. #define VTPM_BASE_ADDRESS 0xfed40000
  1791. static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
  1792. {
  1793. resource_size_t start = 0;
  1794. resource_size_t end = 0;
  1795. struct resource *new_res;
  1796. struct resource **old_res = &hyperv_mmio;
  1797. struct resource **prev_res = NULL;
  1798. struct resource r;
  1799. switch (res->type) {
  1800. /*
  1801. * "Address" descriptors are for bus windows. Ignore
  1802. * "memory" descriptors, which are for registers on
  1803. * devices.
  1804. */
  1805. case ACPI_RESOURCE_TYPE_ADDRESS32:
  1806. start = res->data.address32.address.minimum;
  1807. end = res->data.address32.address.maximum;
  1808. break;
  1809. case ACPI_RESOURCE_TYPE_ADDRESS64:
  1810. start = res->data.address64.address.minimum;
  1811. end = res->data.address64.address.maximum;
  1812. break;
  1813. /*
  1814. * The IRQ information is needed only on ARM64, which Hyper-V
  1815. * sets up in the extended format. IRQ information is present
  1816. * on x86/x64 in the non-extended format but it is not used by
  1817. * Linux. So don't bother checking for the non-extended format.
  1818. */
  1819. case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
  1820. if (!acpi_dev_resource_interrupt(res, 0, &r)) {
  1821. pr_err("Unable to parse Hyper-V ACPI interrupt\n");
  1822. return AE_ERROR;
  1823. }
  1824. /* ARM64 INTID for VMbus */
  1825. vmbus_interrupt = res->data.extended_irq.interrupts[0];
  1826. /* Linux IRQ number */
  1827. vmbus_irq = r.start;
  1828. return AE_OK;
  1829. default:
  1830. /* Unused resource type */
  1831. return AE_OK;
  1832. }
  1833. /*
  1834. * Ignore ranges that are below 1MB, as they're not
  1835. * necessary or useful here.
  1836. */
  1837. if (end < 0x100000)
  1838. return AE_OK;
  1839. new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC);
  1840. if (!new_res)
  1841. return AE_NO_MEMORY;
  1842. /* If this range overlaps the virtual TPM, truncate it. */
  1843. if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS)
  1844. end = VTPM_BASE_ADDRESS;
  1845. new_res->name = "hyperv mmio";
  1846. new_res->flags = IORESOURCE_MEM;
  1847. new_res->start = start;
  1848. new_res->end = end;
  1849. /*
  1850. * If two ranges are adjacent, merge them.
  1851. */
  1852. do {
  1853. if (!*old_res) {
  1854. *old_res = new_res;
  1855. break;
  1856. }
  1857. if (((*old_res)->end + 1) == new_res->start) {
  1858. (*old_res)->end = new_res->end;
  1859. kfree(new_res);
  1860. break;
  1861. }
  1862. if ((*old_res)->start == new_res->end + 1) {
  1863. (*old_res)->start = new_res->start;
  1864. kfree(new_res);
  1865. break;
  1866. }
  1867. if ((*old_res)->start > new_res->end) {
  1868. new_res->sibling = *old_res;
  1869. if (prev_res)
  1870. (*prev_res)->sibling = new_res;
  1871. *old_res = new_res;
  1872. break;
  1873. }
  1874. prev_res = old_res;
  1875. old_res = &(*old_res)->sibling;
  1876. } while (1);
  1877. return AE_OK;
  1878. }
  1879. static int vmbus_acpi_remove(struct acpi_device *device)
  1880. {
  1881. struct resource *cur_res;
  1882. struct resource *next_res;
  1883. if (hyperv_mmio) {
  1884. if (fb_mmio) {
  1885. __release_region(hyperv_mmio, fb_mmio->start,
  1886. resource_size(fb_mmio));
  1887. fb_mmio = NULL;
  1888. }
  1889. for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) {
  1890. next_res = cur_res->sibling;
  1891. kfree(cur_res);
  1892. }
  1893. }
  1894. return 0;
  1895. }
  1896. static void vmbus_reserve_fb(void)
  1897. {
  1898. resource_size_t start = 0, size;
  1899. struct pci_dev *pdev;
  1900. if (efi_enabled(EFI_BOOT)) {
  1901. /* Gen2 VM: get FB base from EFI framebuffer */
  1902. start = screen_info.lfb_base;
  1903. size = max_t(__u32, screen_info.lfb_size, 0x800000);
  1904. } else {
  1905. /* Gen1 VM: get FB base from PCI */
  1906. pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT,
  1907. PCI_DEVICE_ID_HYPERV_VIDEO, NULL);
  1908. if (!pdev)
  1909. return;
  1910. if (pdev->resource[0].flags & IORESOURCE_MEM) {
  1911. start = pci_resource_start(pdev, 0);
  1912. size = pci_resource_len(pdev, 0);
  1913. }
  1914. /*
  1915. * Release the PCI device so hyperv_drm or hyperv_fb driver can
  1916. * grab it later.
  1917. */
  1918. pci_dev_put(pdev);
  1919. }
  1920. if (!start)
  1921. return;
  1922. /*
  1923. * Make a claim for the frame buffer in the resource tree under the
  1924. * first node, which will be the one below 4GB. The length seems to
  1925. * be underreported, particularly in a Generation 1 VM. So start out
  1926. * reserving a larger area and make it smaller until it succeeds.
  1927. */
  1928. for (; !fb_mmio && (size >= 0x100000); size >>= 1)
  1929. fb_mmio = __request_region(hyperv_mmio, start, size, fb_mmio_name, 0);
  1930. }
  1931. /**
  1932. * vmbus_allocate_mmio() - Pick a memory-mapped I/O range.
  1933. * @new: If successful, supplied a pointer to the
  1934. * allocated MMIO space.
  1935. * @device_obj: Identifies the caller
  1936. * @min: Minimum guest physical address of the
  1937. * allocation
  1938. * @max: Maximum guest physical address
  1939. * @size: Size of the range to be allocated
  1940. * @align: Alignment of the range to be allocated
  1941. * @fb_overlap_ok: Whether this allocation can be allowed
  1942. * to overlap the video frame buffer.
  1943. *
  1944. * This function walks the resources granted to VMBus by the
  1945. * _CRS object in the ACPI namespace underneath the parent
  1946. * "bridge" whether that's a root PCI bus in the Generation 1
  1947. * case or a Module Device in the Generation 2 case. It then
  1948. * attempts to allocate from the global MMIO pool in a way that
  1949. * matches the constraints supplied in these parameters and by
  1950. * that _CRS.
  1951. *
  1952. * Return: 0 on success, -errno on failure
  1953. */
  1954. int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
  1955. resource_size_t min, resource_size_t max,
  1956. resource_size_t size, resource_size_t align,
  1957. bool fb_overlap_ok)
  1958. {
  1959. struct resource *iter, *shadow;
  1960. resource_size_t range_min, range_max, start, end;
  1961. const char *dev_n = dev_name(&device_obj->device);
  1962. int retval;
  1963. retval = -ENXIO;
  1964. mutex_lock(&hyperv_mmio_lock);
  1965. /*
  1966. * If overlaps with frame buffers are allowed, then first attempt to
  1967. * make the allocation from within the reserved region. Because it
  1968. * is already reserved, no shadow allocation is necessary.
  1969. */
  1970. if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) &&
  1971. !(max < fb_mmio->start)) {
  1972. range_min = fb_mmio->start;
  1973. range_max = fb_mmio->end;
  1974. start = (range_min + align - 1) & ~(align - 1);
  1975. for (; start + size - 1 <= range_max; start += align) {
  1976. *new = request_mem_region_exclusive(start, size, dev_n);
  1977. if (*new) {
  1978. retval = 0;
  1979. goto exit;
  1980. }
  1981. }
  1982. }
  1983. for (iter = hyperv_mmio; iter; iter = iter->sibling) {
  1984. if ((iter->start >= max) || (iter->end <= min))
  1985. continue;
  1986. range_min = iter->start;
  1987. range_max = iter->end;
  1988. start = (range_min + align - 1) & ~(align - 1);
  1989. for (; start + size - 1 <= range_max; start += align) {
  1990. end = start + size - 1;
  1991. /* Skip the whole fb_mmio region if not fb_overlap_ok */
  1992. if (!fb_overlap_ok && fb_mmio &&
  1993. (((start >= fb_mmio->start) && (start <= fb_mmio->end)) ||
  1994. ((end >= fb_mmio->start) && (end <= fb_mmio->end))))
  1995. continue;
  1996. shadow = __request_region(iter, start, size, NULL,
  1997. IORESOURCE_BUSY);
  1998. if (!shadow)
  1999. continue;
  2000. *new = request_mem_region_exclusive(start, size, dev_n);
  2001. if (*new) {
  2002. shadow->name = (char *)*new;
  2003. retval = 0;
  2004. goto exit;
  2005. }
  2006. __release_region(iter, start, size);
  2007. }
  2008. }
  2009. exit:
  2010. mutex_unlock(&hyperv_mmio_lock);
  2011. return retval;
  2012. }
  2013. EXPORT_SYMBOL_GPL(vmbus_allocate_mmio);
  2014. /**
  2015. * vmbus_free_mmio() - Free a memory-mapped I/O range.
  2016. * @start: Base address of region to release.
  2017. * @size: Size of the range to be allocated
  2018. *
  2019. * This function releases anything requested by
  2020. * vmbus_mmio_allocate().
  2021. */
  2022. void vmbus_free_mmio(resource_size_t start, resource_size_t size)
  2023. {
  2024. struct resource *iter;
  2025. mutex_lock(&hyperv_mmio_lock);
  2026. for (iter = hyperv_mmio; iter; iter = iter->sibling) {
  2027. if ((iter->start >= start + size) || (iter->end <= start))
  2028. continue;
  2029. __release_region(iter, start, size);
  2030. }
  2031. release_mem_region(start, size);
  2032. mutex_unlock(&hyperv_mmio_lock);
  2033. }
  2034. EXPORT_SYMBOL_GPL(vmbus_free_mmio);
  2035. static int vmbus_acpi_add(struct acpi_device *device)
  2036. {
  2037. acpi_status result;
  2038. int ret_val = -ENODEV;
  2039. struct acpi_device *ancestor;
  2040. hv_acpi_dev = device;
  2041. /*
  2042. * Older versions of Hyper-V for ARM64 fail to include the _CCA
  2043. * method on the top level VMbus device in the DSDT. But devices
  2044. * are hardware coherent in all current Hyper-V use cases, so fix
  2045. * up the ACPI device to behave as if _CCA is present and indicates
  2046. * hardware coherence.
  2047. */
  2048. ACPI_COMPANION_SET(&device->dev, device);
  2049. if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) &&
  2050. device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) {
  2051. pr_info("No ACPI _CCA found; assuming coherent device I/O\n");
  2052. device->flags.cca_seen = true;
  2053. device->flags.coherent_dma = true;
  2054. }
  2055. result = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
  2056. vmbus_walk_resources, NULL);
  2057. if (ACPI_FAILURE(result))
  2058. goto acpi_walk_err;
  2059. /*
  2060. * Some ancestor of the vmbus acpi device (Gen1 or Gen2
  2061. * firmware) is the VMOD that has the mmio ranges. Get that.
  2062. */
  2063. for (ancestor = acpi_dev_parent(device);
  2064. ancestor && ancestor->handle != ACPI_ROOT_OBJECT;
  2065. ancestor = acpi_dev_parent(ancestor)) {
  2066. result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS,
  2067. vmbus_walk_resources, NULL);
  2068. if (ACPI_FAILURE(result))
  2069. continue;
  2070. if (hyperv_mmio) {
  2071. vmbus_reserve_fb();
  2072. break;
  2073. }
  2074. }
  2075. ret_val = 0;
  2076. acpi_walk_err:
  2077. if (ret_val)
  2078. vmbus_acpi_remove(device);
  2079. return ret_val;
  2080. }
  2081. #ifdef CONFIG_PM_SLEEP
  2082. static int vmbus_bus_suspend(struct device *dev)
  2083. {
  2084. struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(
  2085. hv_context.cpu_context, VMBUS_CONNECT_CPU);
  2086. struct vmbus_channel *channel, *sc;
  2087. tasklet_disable(&hv_cpu->msg_dpc);
  2088. vmbus_connection.ignore_any_offer_msg = true;
  2089. /* The tasklet_enable() takes care of providing a memory barrier */
  2090. tasklet_enable(&hv_cpu->msg_dpc);
  2091. /* Drain all the workqueues as we are in suspend */
  2092. drain_workqueue(vmbus_connection.rescind_work_queue);
  2093. drain_workqueue(vmbus_connection.work_queue);
  2094. drain_workqueue(vmbus_connection.handle_primary_chan_wq);
  2095. drain_workqueue(vmbus_connection.handle_sub_chan_wq);
  2096. mutex_lock(&vmbus_connection.channel_mutex);
  2097. list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
  2098. if (!is_hvsock_channel(channel))
  2099. continue;
  2100. vmbus_force_channel_rescinded(channel);
  2101. }
  2102. mutex_unlock(&vmbus_connection.channel_mutex);
  2103. /*
  2104. * Wait until all the sub-channels and hv_sock channels have been
  2105. * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
  2106. * they would conflict with the new sub-channels that will be created
  2107. * in the resume path. hv_sock channels should also be destroyed, but
  2108. * a hv_sock channel of an established hv_sock connection can not be
  2109. * really destroyed since it may still be referenced by the userspace
  2110. * application, so we just force the hv_sock channel to be rescinded
  2111. * by vmbus_force_channel_rescinded(), and the userspace application
  2112. * will thoroughly destroy the channel after hibernation.
  2113. *
  2114. * Note: the counter nr_chan_close_on_suspend may never go above 0 if
  2115. * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
  2116. */
  2117. if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
  2118. wait_for_completion(&vmbus_connection.ready_for_suspend_event);
  2119. if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) {
  2120. pr_err("Can not suspend due to a previous failed resuming\n");
  2121. return -EBUSY;
  2122. }
  2123. mutex_lock(&vmbus_connection.channel_mutex);
  2124. list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
  2125. /*
  2126. * Remove the channel from the array of channels and invalidate
  2127. * the channel's relid. Upon resume, vmbus_onoffer() will fix
  2128. * up the relid (and other fields, if necessary) and add the
  2129. * channel back to the array.
  2130. */
  2131. vmbus_channel_unmap_relid(channel);
  2132. channel->offermsg.child_relid = INVALID_RELID;
  2133. if (is_hvsock_channel(channel)) {
  2134. if (!channel->rescind) {
  2135. pr_err("hv_sock channel not rescinded!\n");
  2136. WARN_ON_ONCE(1);
  2137. }
  2138. continue;
  2139. }
  2140. list_for_each_entry(sc, &channel->sc_list, sc_list) {
  2141. pr_err("Sub-channel not deleted!\n");
  2142. WARN_ON_ONCE(1);
  2143. }
  2144. atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
  2145. }
  2146. mutex_unlock(&vmbus_connection.channel_mutex);
  2147. vmbus_initiate_unload(false);
  2148. /* Reset the event for the next resume. */
  2149. reinit_completion(&vmbus_connection.ready_for_resume_event);
  2150. return 0;
  2151. }
  2152. static int vmbus_bus_resume(struct device *dev)
  2153. {
  2154. struct vmbus_channel_msginfo *msginfo;
  2155. size_t msgsize;
  2156. int ret;
  2157. vmbus_connection.ignore_any_offer_msg = false;
  2158. /*
  2159. * We only use the 'vmbus_proto_version', which was in use before
  2160. * hibernation, to re-negotiate with the host.
  2161. */
  2162. if (!vmbus_proto_version) {
  2163. pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
  2164. return -EINVAL;
  2165. }
  2166. msgsize = sizeof(*msginfo) +
  2167. sizeof(struct vmbus_channel_initiate_contact);
  2168. msginfo = kzalloc(msgsize, GFP_KERNEL);
  2169. if (msginfo == NULL)
  2170. return -ENOMEM;
  2171. ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
  2172. kfree(msginfo);
  2173. if (ret != 0)
  2174. return ret;
  2175. WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
  2176. vmbus_request_offers();
  2177. if (wait_for_completion_timeout(
  2178. &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0)
  2179. pr_err("Some vmbus device is missing after suspending?\n");
  2180. /* Reset the event for the next suspend. */
  2181. reinit_completion(&vmbus_connection.ready_for_suspend_event);
  2182. return 0;
  2183. }
  2184. #else
  2185. #define vmbus_bus_suspend NULL
  2186. #define vmbus_bus_resume NULL
  2187. #endif /* CONFIG_PM_SLEEP */
  2188. static const struct acpi_device_id vmbus_acpi_device_ids[] = {
  2189. {"VMBUS", 0},
  2190. {"VMBus", 0},
  2191. {"", 0},
  2192. };
  2193. MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
  2194. /*
  2195. * Note: we must use the "no_irq" ops, otherwise hibernation can not work with
  2196. * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in
  2197. * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see
  2198. * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
  2199. * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
  2200. * resume callback must also run via the "noirq" ops.
  2201. *
  2202. * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment
  2203. * earlier in this file before vmbus_pm.
  2204. */
  2205. static const struct dev_pm_ops vmbus_bus_pm = {
  2206. .suspend_noirq = NULL,
  2207. .resume_noirq = NULL,
  2208. .freeze_noirq = vmbus_bus_suspend,
  2209. .thaw_noirq = vmbus_bus_resume,
  2210. .poweroff_noirq = vmbus_bus_suspend,
  2211. .restore_noirq = vmbus_bus_resume
  2212. };
  2213. static struct acpi_driver vmbus_acpi_driver = {
  2214. .name = "vmbus",
  2215. .ids = vmbus_acpi_device_ids,
  2216. .ops = {
  2217. .add = vmbus_acpi_add,
  2218. .remove = vmbus_acpi_remove,
  2219. },
  2220. .drv.pm = &vmbus_bus_pm,
  2221. .drv.probe_type = PROBE_FORCE_SYNCHRONOUS,
  2222. };
  2223. static void hv_kexec_handler(void)
  2224. {
  2225. hv_stimer_global_cleanup();
  2226. vmbus_initiate_unload(false);
  2227. /* Make sure conn_state is set as hv_synic_cleanup checks for it */
  2228. mb();
  2229. cpuhp_remove_state(hyperv_cpuhp_online);
  2230. };
  2231. static void hv_crash_handler(struct pt_regs *regs)
  2232. {
  2233. int cpu;
  2234. vmbus_initiate_unload(true);
  2235. /*
  2236. * In crash handler we can't schedule synic cleanup for all CPUs,
  2237. * doing the cleanup for current CPU only. This should be sufficient
  2238. * for kdump.
  2239. */
  2240. cpu = smp_processor_id();
  2241. hv_stimer_cleanup(cpu);
  2242. hv_synic_disable_regs(cpu);
  2243. };
  2244. static int hv_synic_suspend(void)
  2245. {
  2246. /*
  2247. * When we reach here, all the non-boot CPUs have been offlined.
  2248. * If we're in a legacy configuration where stimer Direct Mode is
  2249. * not enabled, the stimers on the non-boot CPUs have been unbound
  2250. * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
  2251. * hv_stimer_cleanup() -> clockevents_unbind_device().
  2252. *
  2253. * hv_synic_suspend() only runs on CPU0 with interrupts disabled.
  2254. * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
  2255. * 1) it's unnecessary as interrupts remain disabled between
  2256. * syscore_suspend() and syscore_resume(): see create_image() and
  2257. * resume_target_kernel()
  2258. * 2) the stimer on CPU0 is automatically disabled later by
  2259. * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
  2260. * -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
  2261. * 3) a warning would be triggered if we call
  2262. * clockevents_unbind_device(), which may sleep, in an
  2263. * interrupts-disabled context.
  2264. */
  2265. hv_synic_disable_regs(0);
  2266. return 0;
  2267. }
  2268. static void hv_synic_resume(void)
  2269. {
  2270. hv_synic_enable_regs(0);
  2271. /*
  2272. * Note: we don't need to call hv_stimer_init(0), because the timer
  2273. * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
  2274. * automatically re-enabled in timekeeping_resume().
  2275. */
  2276. }
  2277. /* The callbacks run only on CPU0, with irqs_disabled. */
  2278. static struct syscore_ops hv_synic_syscore_ops = {
  2279. .suspend = hv_synic_suspend,
  2280. .resume = hv_synic_resume,
  2281. };
  2282. static int __init hv_acpi_init(void)
  2283. {
  2284. int ret;
  2285. if (!hv_is_hyperv_initialized())
  2286. return -ENODEV;
  2287. if (hv_root_partition)
  2288. return 0;
  2289. /*
  2290. * Get ACPI resources first.
  2291. */
  2292. ret = acpi_bus_register_driver(&vmbus_acpi_driver);
  2293. if (ret)
  2294. return ret;
  2295. if (!hv_acpi_dev) {
  2296. ret = -ENODEV;
  2297. goto cleanup;
  2298. }
  2299. /*
  2300. * If we're on an architecture with a hardcoded hypervisor
  2301. * vector (i.e. x86/x64), override the VMbus interrupt found
  2302. * in the ACPI tables. Ensure vmbus_irq is not set since the
  2303. * normal Linux IRQ mechanism is not used in this case.
  2304. */
  2305. #ifdef HYPERVISOR_CALLBACK_VECTOR
  2306. vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR;
  2307. vmbus_irq = -1;
  2308. #endif
  2309. hv_debug_init();
  2310. ret = vmbus_bus_init();
  2311. if (ret)
  2312. goto cleanup;
  2313. hv_setup_kexec_handler(hv_kexec_handler);
  2314. hv_setup_crash_handler(hv_crash_handler);
  2315. register_syscore_ops(&hv_synic_syscore_ops);
  2316. return 0;
  2317. cleanup:
  2318. acpi_bus_unregister_driver(&vmbus_acpi_driver);
  2319. hv_acpi_dev = NULL;
  2320. return ret;
  2321. }
  2322. static void __exit vmbus_exit(void)
  2323. {
  2324. int cpu;
  2325. unregister_syscore_ops(&hv_synic_syscore_ops);
  2326. hv_remove_kexec_handler();
  2327. hv_remove_crash_handler();
  2328. vmbus_connection.conn_state = DISCONNECTED;
  2329. hv_stimer_global_cleanup();
  2330. vmbus_disconnect();
  2331. if (vmbus_irq == -1) {
  2332. hv_remove_vmbus_handler();
  2333. } else {
  2334. free_percpu_irq(vmbus_irq, vmbus_evt);
  2335. free_percpu(vmbus_evt);
  2336. }
  2337. for_each_online_cpu(cpu) {
  2338. struct hv_per_cpu_context *hv_cpu
  2339. = per_cpu_ptr(hv_context.cpu_context, cpu);
  2340. tasklet_kill(&hv_cpu->msg_dpc);
  2341. }
  2342. hv_debug_rm_all_dir();
  2343. vmbus_free_channels();
  2344. kfree(vmbus_connection.channels);
  2345. if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
  2346. kmsg_dump_unregister(&hv_kmsg_dumper);
  2347. unregister_die_notifier(&hyperv_die_block);
  2348. }
  2349. /*
  2350. * The panic notifier is always registered, hence we should
  2351. * also unconditionally unregister it here as well.
  2352. */
  2353. atomic_notifier_chain_unregister(&panic_notifier_list,
  2354. &hyperv_panic_block);
  2355. free_page((unsigned long)hv_panic_page);
  2356. unregister_sysctl_table(hv_ctl_table_hdr);
  2357. hv_ctl_table_hdr = NULL;
  2358. bus_unregister(&hv_bus);
  2359. cpuhp_remove_state(hyperv_cpuhp_online);
  2360. hv_synic_free();
  2361. acpi_bus_unregister_driver(&vmbus_acpi_driver);
  2362. }
  2363. MODULE_LICENSE("GPL");
  2364. MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver");
  2365. subsys_initcall(hv_acpi_init);
  2366. module_exit(vmbus_exit);