vmw_balloon.c 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * VMware Balloon driver.
  4. *
  5. * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
  6. *
  7. * This is VMware physical memory management driver for Linux. The driver
  8. * acts like a "balloon" that can be inflated to reclaim physical pages by
  9. * reserving them in the guest and invalidating them in the monitor,
  10. * freeing up the underlying machine pages so they can be allocated to
  11. * other guests. The balloon can also be deflated to allow the guest to
  12. * use more physical memory. Higher level policies can control the sizes
  13. * of balloons in VMs in order to manage physical memory resources.
  14. */
  15. //#define DEBUG
  16. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  17. #include <linux/types.h>
  18. #include <linux/io.h>
  19. #include <linux/kernel.h>
  20. #include <linux/mm.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/sched.h>
  23. #include <linux/module.h>
  24. #include <linux/workqueue.h>
  25. #include <linux/debugfs.h>
  26. #include <linux/seq_file.h>
  27. #include <linux/rwsem.h>
  28. #include <linux/slab.h>
  29. #include <linux/spinlock.h>
  30. #include <linux/balloon_compaction.h>
  31. #include <linux/vmw_vmci_defs.h>
  32. #include <linux/vmw_vmci_api.h>
  33. #include <asm/hypervisor.h>
  34. MODULE_AUTHOR("VMware, Inc.");
  35. MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
  36. MODULE_ALIAS("dmi:*:svnVMware*:*");
  37. MODULE_ALIAS("vmware_vmmemctl");
  38. MODULE_LICENSE("GPL");
  39. static bool __read_mostly vmwballoon_shrinker_enable;
  40. module_param(vmwballoon_shrinker_enable, bool, 0444);
  41. MODULE_PARM_DESC(vmwballoon_shrinker_enable,
  42. "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
  43. /* Delay in seconds after shrink before inflation. */
  44. #define VMBALLOON_SHRINK_DELAY (5)
  45. /* Maximum number of refused pages we accumulate during inflation cycle */
  46. #define VMW_BALLOON_MAX_REFUSED 16
  47. /* Magic number for the balloon mount-point */
  48. #define BALLOON_VMW_MAGIC 0x0ba11007
  49. /*
  50. * Hypervisor communication port definitions.
  51. */
  52. #define VMW_BALLOON_HV_PORT 0x5670
  53. #define VMW_BALLOON_HV_MAGIC 0x456c6d6f
  54. #define VMW_BALLOON_GUEST_ID 1 /* Linux */
  55. enum vmwballoon_capabilities {
  56. /*
  57. * Bit 0 is reserved and not associated to any capability.
  58. */
  59. VMW_BALLOON_BASIC_CMDS = (1 << 1),
  60. VMW_BALLOON_BATCHED_CMDS = (1 << 2),
  61. VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
  62. VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
  63. VMW_BALLOON_64_BIT_TARGET = (1 << 5)
  64. };
  65. #define VMW_BALLOON_CAPABILITIES_COMMON (VMW_BALLOON_BASIC_CMDS \
  66. | VMW_BALLOON_BATCHED_CMDS \
  67. | VMW_BALLOON_BATCHED_2M_CMDS \
  68. | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
  69. #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT)
  70. /*
  71. * 64-bit targets are only supported in 64-bit
  72. */
  73. #ifdef CONFIG_64BIT
  74. #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_CAPABILITIES_COMMON \
  75. | VMW_BALLOON_64_BIT_TARGET)
  76. #else
  77. #define VMW_BALLOON_CAPABILITIES VMW_BALLOON_CAPABILITIES_COMMON
  78. #endif
  79. enum vmballoon_page_size_type {
  80. VMW_BALLOON_4K_PAGE,
  81. VMW_BALLOON_2M_PAGE,
  82. VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE
  83. };
  84. #define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1)
  85. static const char * const vmballoon_page_size_names[] = {
  86. [VMW_BALLOON_4K_PAGE] = "4k",
  87. [VMW_BALLOON_2M_PAGE] = "2M"
  88. };
  89. enum vmballoon_op {
  90. VMW_BALLOON_INFLATE,
  91. VMW_BALLOON_DEFLATE
  92. };
  93. enum vmballoon_op_stat_type {
  94. VMW_BALLOON_OP_STAT,
  95. VMW_BALLOON_OP_FAIL_STAT
  96. };
  97. #define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1)
  98. /**
  99. * enum vmballoon_cmd_type - backdoor commands.
  100. *
  101. * Availability of the commands is as followed:
  102. *
  103. * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
  104. * %VMW_BALLOON_CMD_GUEST_ID are always available.
  105. *
  106. * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
  107. * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
  108. *
  109. * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
  110. * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
  111. * are available.
  112. *
  113. * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
  114. * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
  115. * are supported.
  116. *
  117. * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
  118. * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
  119. *
  120. * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
  121. * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
  122. * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
  123. * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
  124. * to be deflated from the balloon.
  125. * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
  126. * runs in the VM.
  127. * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
  128. * ballooned pages (up to 512).
  129. * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
  130. * pages that are about to be deflated from the
  131. * balloon (up to 512).
  132. * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
  133. * for 2MB pages.
  134. * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
  135. * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
  136. * pages.
  137. * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
  138. * that would be invoked when the balloon
  139. * size changes.
  140. * @VMW_BALLOON_CMD_LAST: Value of the last command.
  141. */
  142. enum vmballoon_cmd_type {
  143. VMW_BALLOON_CMD_START,
  144. VMW_BALLOON_CMD_GET_TARGET,
  145. VMW_BALLOON_CMD_LOCK,
  146. VMW_BALLOON_CMD_UNLOCK,
  147. VMW_BALLOON_CMD_GUEST_ID,
  148. /* No command 5 */
  149. VMW_BALLOON_CMD_BATCHED_LOCK = 6,
  150. VMW_BALLOON_CMD_BATCHED_UNLOCK,
  151. VMW_BALLOON_CMD_BATCHED_2M_LOCK,
  152. VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
  153. VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  154. VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  155. };
  156. #define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1)
  157. enum vmballoon_error_codes {
  158. VMW_BALLOON_SUCCESS,
  159. VMW_BALLOON_ERROR_CMD_INVALID,
  160. VMW_BALLOON_ERROR_PPN_INVALID,
  161. VMW_BALLOON_ERROR_PPN_LOCKED,
  162. VMW_BALLOON_ERROR_PPN_UNLOCKED,
  163. VMW_BALLOON_ERROR_PPN_PINNED,
  164. VMW_BALLOON_ERROR_PPN_NOTNEEDED,
  165. VMW_BALLOON_ERROR_RESET,
  166. VMW_BALLOON_ERROR_BUSY
  167. };
  168. #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
  169. #define VMW_BALLOON_CMD_WITH_TARGET_MASK \
  170. ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \
  171. (1UL << VMW_BALLOON_CMD_LOCK) | \
  172. (1UL << VMW_BALLOON_CMD_UNLOCK) | \
  173. (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \
  174. (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \
  175. (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \
  176. (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
  177. static const char * const vmballoon_cmd_names[] = {
  178. [VMW_BALLOON_CMD_START] = "start",
  179. [VMW_BALLOON_CMD_GET_TARGET] = "target",
  180. [VMW_BALLOON_CMD_LOCK] = "lock",
  181. [VMW_BALLOON_CMD_UNLOCK] = "unlock",
  182. [VMW_BALLOON_CMD_GUEST_ID] = "guestType",
  183. [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock",
  184. [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock",
  185. [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock",
  186. [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock",
  187. [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet"
  188. };
  189. enum vmballoon_stat_page {
  190. VMW_BALLOON_PAGE_STAT_ALLOC,
  191. VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
  192. VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
  193. VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
  194. VMW_BALLOON_PAGE_STAT_FREE,
  195. VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE
  196. };
  197. #define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1)
  198. enum vmballoon_stat_general {
  199. VMW_BALLOON_STAT_TIMER,
  200. VMW_BALLOON_STAT_DOORBELL,
  201. VMW_BALLOON_STAT_RESET,
  202. VMW_BALLOON_STAT_SHRINK,
  203. VMW_BALLOON_STAT_SHRINK_FREE,
  204. VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE
  205. };
  206. #define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1)
  207. static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
  208. static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
  209. struct vmballoon_ctl {
  210. struct list_head pages;
  211. struct list_head refused_pages;
  212. struct list_head prealloc_pages;
  213. unsigned int n_refused_pages;
  214. unsigned int n_pages;
  215. enum vmballoon_page_size_type page_size;
  216. enum vmballoon_op op;
  217. };
  218. /**
  219. * struct vmballoon_batch_entry - a batch entry for lock or unlock.
  220. *
  221. * @status: the status of the operation, which is written by the hypervisor.
  222. * @reserved: reserved for future use. Must be set to zero.
  223. * @pfn: the physical frame number of the page to be locked or unlocked.
  224. */
  225. struct vmballoon_batch_entry {
  226. u64 status : 5;
  227. u64 reserved : PAGE_SHIFT - 5;
  228. u64 pfn : 52;
  229. } __packed;
  230. struct vmballoon {
  231. /**
  232. * @max_page_size: maximum supported page size for ballooning.
  233. *
  234. * Protected by @conf_sem
  235. */
  236. enum vmballoon_page_size_type max_page_size;
  237. /**
  238. * @size: balloon actual size in basic page size (frames).
  239. *
  240. * While we currently do not support size which is bigger than 32-bit,
  241. * in preparation for future support, use 64-bits.
  242. */
  243. atomic64_t size;
  244. /**
  245. * @target: balloon target size in basic page size (frames).
  246. *
  247. * We do not protect the target under the assumption that setting the
  248. * value is always done through a single write. If this assumption ever
  249. * breaks, we would have to use X_ONCE for accesses, and suffer the less
  250. * optimized code. Although we may read stale target value if multiple
  251. * accesses happen at once, the performance impact should be minor.
  252. */
  253. unsigned long target;
  254. /**
  255. * @reset_required: reset flag
  256. *
  257. * Setting this flag may introduce races, but the code is expected to
  258. * handle them gracefully. In the worst case, another operation will
  259. * fail as reset did not take place. Clearing the flag is done while
  260. * holding @conf_sem for write.
  261. */
  262. bool reset_required;
  263. /**
  264. * @capabilities: hypervisor balloon capabilities.
  265. *
  266. * Protected by @conf_sem.
  267. */
  268. unsigned long capabilities;
  269. /**
  270. * @batch_page: pointer to communication batch page.
  271. *
  272. * When batching is used, batch_page points to a page, which holds up to
  273. * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
  274. */
  275. struct vmballoon_batch_entry *batch_page;
  276. /**
  277. * @batch_max_pages: maximum pages that can be locked/unlocked.
  278. *
  279. * Indicates the number of pages that the hypervisor can lock or unlock
  280. * at once, according to whether batching is enabled. If batching is
  281. * disabled, only a single page can be locked/unlock on each operation.
  282. *
  283. * Protected by @conf_sem.
  284. */
  285. unsigned int batch_max_pages;
  286. /**
  287. * @page: page to be locked/unlocked by the hypervisor
  288. *
  289. * @page is only used when batching is disabled and a single page is
  290. * reclaimed on each iteration.
  291. *
  292. * Protected by @comm_lock.
  293. */
  294. struct page *page;
  295. /**
  296. * @shrink_timeout: timeout until the next inflation.
  297. *
  298. * After an shrink event, indicates the time in jiffies after which
  299. * inflation is allowed again. Can be written concurrently with reads,
  300. * so must use READ_ONCE/WRITE_ONCE when accessing.
  301. */
  302. unsigned long shrink_timeout;
  303. /* statistics */
  304. struct vmballoon_stats *stats;
  305. /**
  306. * @b_dev_info: balloon device information descriptor.
  307. */
  308. struct balloon_dev_info b_dev_info;
  309. struct delayed_work dwork;
  310. /**
  311. * @huge_pages - list of the inflated 2MB pages.
  312. *
  313. * Protected by @b_dev_info.pages_lock .
  314. */
  315. struct list_head huge_pages;
  316. /**
  317. * @vmci_doorbell.
  318. *
  319. * Protected by @conf_sem.
  320. */
  321. struct vmci_handle vmci_doorbell;
  322. /**
  323. * @conf_sem: semaphore to protect the configuration and the statistics.
  324. */
  325. struct rw_semaphore conf_sem;
  326. /**
  327. * @comm_lock: lock to protect the communication with the host.
  328. *
  329. * Lock ordering: @conf_sem -> @comm_lock .
  330. */
  331. spinlock_t comm_lock;
  332. /**
  333. * @shrinker: shrinker interface that is used to avoid over-inflation.
  334. */
  335. struct shrinker shrinker;
  336. /**
  337. * @shrinker_registered: whether the shrinker was registered.
  338. *
  339. * The shrinker interface does not handle gracefully the removal of
  340. * shrinker that was not registered before. This indication allows to
  341. * simplify the unregistration process.
  342. */
  343. bool shrinker_registered;
  344. };
  345. static struct vmballoon balloon;
  346. struct vmballoon_stats {
  347. /* timer / doorbell operations */
  348. atomic64_t general_stat[VMW_BALLOON_STAT_NUM];
  349. /* allocation statistics for huge and small pages */
  350. atomic64_t
  351. page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES];
  352. /* Monitor operations: total operations, and failures */
  353. atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES];
  354. };
  355. static inline bool is_vmballoon_stats_on(void)
  356. {
  357. return IS_ENABLED(CONFIG_DEBUG_FS) &&
  358. static_branch_unlikely(&balloon_stat_enabled);
  359. }
  360. static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
  361. enum vmballoon_op_stat_type type)
  362. {
  363. if (is_vmballoon_stats_on())
  364. atomic64_inc(&b->stats->ops[op][type]);
  365. }
  366. static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
  367. enum vmballoon_stat_general stat)
  368. {
  369. if (is_vmballoon_stats_on())
  370. atomic64_inc(&b->stats->general_stat[stat]);
  371. }
  372. static inline void vmballoon_stats_gen_add(struct vmballoon *b,
  373. enum vmballoon_stat_general stat,
  374. unsigned int val)
  375. {
  376. if (is_vmballoon_stats_on())
  377. atomic64_add(val, &b->stats->general_stat[stat]);
  378. }
  379. static inline void vmballoon_stats_page_inc(struct vmballoon *b,
  380. enum vmballoon_stat_page stat,
  381. enum vmballoon_page_size_type size)
  382. {
  383. if (is_vmballoon_stats_on())
  384. atomic64_inc(&b->stats->page_stat[stat][size]);
  385. }
  386. static inline void vmballoon_stats_page_add(struct vmballoon *b,
  387. enum vmballoon_stat_page stat,
  388. enum vmballoon_page_size_type size,
  389. unsigned int val)
  390. {
  391. if (is_vmballoon_stats_on())
  392. atomic64_add(val, &b->stats->page_stat[stat][size]);
  393. }
  394. static inline unsigned long
  395. __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
  396. unsigned long arg2, unsigned long *result)
  397. {
  398. unsigned long status, dummy1, dummy2, dummy3, local_result;
  399. vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT);
  400. asm volatile ("inl %%dx" :
  401. "=a"(status),
  402. "=c"(dummy1),
  403. "=d"(dummy2),
  404. "=b"(local_result),
  405. "=S"(dummy3) :
  406. "0"(VMW_BALLOON_HV_MAGIC),
  407. "1"(cmd),
  408. "2"(VMW_BALLOON_HV_PORT),
  409. "3"(arg1),
  410. "4"(arg2) :
  411. "memory");
  412. /* update the result if needed */
  413. if (result)
  414. *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
  415. local_result;
  416. /* update target when applicable */
  417. if (status == VMW_BALLOON_SUCCESS &&
  418. ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
  419. WRITE_ONCE(b->target, local_result);
  420. if (status != VMW_BALLOON_SUCCESS &&
  421. status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
  422. vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT);
  423. pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
  424. __func__, vmballoon_cmd_names[cmd], arg1, arg2,
  425. status);
  426. }
  427. /* mark reset required accordingly */
  428. if (status == VMW_BALLOON_ERROR_RESET)
  429. b->reset_required = true;
  430. return status;
  431. }
  432. static __always_inline unsigned long
  433. vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
  434. unsigned long arg2)
  435. {
  436. unsigned long dummy;
  437. return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
  438. }
  439. /*
  440. * Send "start" command to the host, communicating supported version
  441. * of the protocol.
  442. */
  443. static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
  444. {
  445. unsigned long status, capabilities;
  446. status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
  447. &capabilities);
  448. switch (status) {
  449. case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
  450. b->capabilities = capabilities;
  451. break;
  452. case VMW_BALLOON_SUCCESS:
  453. b->capabilities = VMW_BALLOON_BASIC_CMDS;
  454. break;
  455. default:
  456. return -EIO;
  457. }
  458. /*
  459. * 2MB pages are only supported with batching. If batching is for some
  460. * reason disabled, do not use 2MB pages, since otherwise the legacy
  461. * mechanism is used with 2MB pages, causing a failure.
  462. */
  463. b->max_page_size = VMW_BALLOON_4K_PAGE;
  464. if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
  465. (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
  466. b->max_page_size = VMW_BALLOON_2M_PAGE;
  467. return 0;
  468. }
  469. /**
  470. * vmballoon_send_guest_id - communicate guest type to the host.
  471. *
  472. * @b: pointer to the balloon.
  473. *
  474. * Communicate guest type to the host so that it can adjust ballooning
  475. * algorithm to the one most appropriate for the guest. This command
  476. * is normally issued after sending "start" command and is part of
  477. * standard reset sequence.
  478. *
  479. * Return: zero on success or appropriate error code.
  480. */
  481. static int vmballoon_send_guest_id(struct vmballoon *b)
  482. {
  483. unsigned long status;
  484. status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
  485. VMW_BALLOON_GUEST_ID, 0);
  486. return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  487. }
  488. /**
  489. * vmballoon_page_order() - return the order of the page
  490. * @page_size: the size of the page.
  491. *
  492. * Return: the allocation order.
  493. */
  494. static inline
  495. unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
  496. {
  497. return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
  498. }
  499. /**
  500. * vmballoon_page_in_frames() - returns the number of frames in a page.
  501. * @page_size: the size of the page.
  502. *
  503. * Return: the number of 4k frames.
  504. */
  505. static inline unsigned int
  506. vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
  507. {
  508. return 1 << vmballoon_page_order(page_size);
  509. }
  510. /**
  511. * vmballoon_mark_page_offline() - mark a page as offline
  512. * @page: pointer for the page.
  513. * @page_size: the size of the page.
  514. */
  515. static void
  516. vmballoon_mark_page_offline(struct page *page,
  517. enum vmballoon_page_size_type page_size)
  518. {
  519. int i;
  520. for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
  521. __SetPageOffline(page + i);
  522. }
  523. /**
  524. * vmballoon_mark_page_online() - mark a page as online
  525. * @page: pointer for the page.
  526. * @page_size: the size of the page.
  527. */
  528. static void
  529. vmballoon_mark_page_online(struct page *page,
  530. enum vmballoon_page_size_type page_size)
  531. {
  532. int i;
  533. for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
  534. __ClearPageOffline(page + i);
  535. }
  536. /**
  537. * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
  538. *
  539. * @b: pointer to the balloon.
  540. *
  541. * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
  542. * by the host-guest protocol and EIO if an error occurred in communicating with
  543. * the host.
  544. */
  545. static int vmballoon_send_get_target(struct vmballoon *b)
  546. {
  547. unsigned long status;
  548. unsigned long limit;
  549. limit = totalram_pages();
  550. /* Ensure limit fits in 32-bits if 64-bit targets are not supported */
  551. if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) &&
  552. limit != (u32)limit)
  553. return -EINVAL;
  554. status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
  555. return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  556. }
  557. /**
  558. * vmballoon_alloc_page_list - allocates a list of pages.
  559. *
  560. * @b: pointer to the balloon.
  561. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
  562. * @req_n_pages: the number of requested pages.
  563. *
  564. * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
  565. * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
  566. *
  567. * Return: zero on success or error code otherwise.
  568. */
  569. static int vmballoon_alloc_page_list(struct vmballoon *b,
  570. struct vmballoon_ctl *ctl,
  571. unsigned int req_n_pages)
  572. {
  573. struct page *page;
  574. unsigned int i;
  575. for (i = 0; i < req_n_pages; i++) {
  576. /*
  577. * First check if we happen to have pages that were allocated
  578. * before. This happens when 2MB page rejected during inflation
  579. * by the hypervisor, and then split into 4KB pages.
  580. */
  581. if (!list_empty(&ctl->prealloc_pages)) {
  582. page = list_first_entry(&ctl->prealloc_pages,
  583. struct page, lru);
  584. list_del(&page->lru);
  585. } else {
  586. if (ctl->page_size == VMW_BALLOON_2M_PAGE)
  587. page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN|
  588. __GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER);
  589. else
  590. page = balloon_page_alloc();
  591. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
  592. ctl->page_size);
  593. }
  594. if (page) {
  595. /* Success. Add the page to the list and continue. */
  596. list_add(&page->lru, &ctl->pages);
  597. continue;
  598. }
  599. /* Allocation failed. Update statistics and stop. */
  600. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
  601. ctl->page_size);
  602. break;
  603. }
  604. ctl->n_pages = i;
  605. return req_n_pages == ctl->n_pages ? 0 : -ENOMEM;
  606. }
  607. /**
  608. * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
  609. *
  610. * @b: pointer for %struct vmballoon.
  611. * @page: pointer for the page whose result should be handled.
  612. * @page_size: size of the page.
  613. * @status: status of the operation as provided by the hypervisor.
  614. */
  615. static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page,
  616. enum vmballoon_page_size_type page_size,
  617. unsigned long status)
  618. {
  619. /* On success do nothing. The page is already on the balloon list. */
  620. if (likely(status == VMW_BALLOON_SUCCESS))
  621. return 0;
  622. pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
  623. page_to_pfn(page), status,
  624. vmballoon_page_size_names[page_size]);
  625. /* Error occurred */
  626. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
  627. page_size);
  628. return -EIO;
  629. }
  630. /**
  631. * vmballoon_status_page - returns the status of (un)lock operation
  632. *
  633. * @b: pointer to the balloon.
  634. * @idx: index for the page for which the operation is performed.
  635. * @p: pointer to where the page struct is returned.
  636. *
  637. * Following a lock or unlock operation, returns the status of the operation for
  638. * an individual page. Provides the page that the operation was performed on on
  639. * the @page argument.
  640. *
  641. * Returns: The status of a lock or unlock operation for an individual page.
  642. */
  643. static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
  644. struct page **p)
  645. {
  646. if (static_branch_likely(&vmw_balloon_batching)) {
  647. /* batching mode */
  648. *p = pfn_to_page(b->batch_page[idx].pfn);
  649. return b->batch_page[idx].status;
  650. }
  651. /* non-batching mode */
  652. *p = b->page;
  653. /*
  654. * If a failure occurs, the indication will be provided in the status
  655. * of the entire operation, which is considered before the individual
  656. * page status. So for non-batching mode, the indication is always of
  657. * success.
  658. */
  659. return VMW_BALLOON_SUCCESS;
  660. }
  661. /**
  662. * vmballoon_lock_op - notifies the host about inflated/deflated pages.
  663. * @b: pointer to the balloon.
  664. * @num_pages: number of inflated/deflated pages.
  665. * @page_size: size of the page.
  666. * @op: the type of operation (lock or unlock).
  667. *
  668. * Notify the host about page(s) that were ballooned (or removed from the
  669. * balloon) so that host can use it without fear that guest will need it (or
  670. * stop using them since the VM does). Host may reject some pages, we need to
  671. * check the return value and maybe submit a different page. The pages that are
  672. * inflated/deflated are pointed by @b->page.
  673. *
  674. * Return: result as provided by the hypervisor.
  675. */
  676. static unsigned long vmballoon_lock_op(struct vmballoon *b,
  677. unsigned int num_pages,
  678. enum vmballoon_page_size_type page_size,
  679. enum vmballoon_op op)
  680. {
  681. unsigned long cmd, pfn;
  682. lockdep_assert_held(&b->comm_lock);
  683. if (static_branch_likely(&vmw_balloon_batching)) {
  684. if (op == VMW_BALLOON_INFLATE)
  685. cmd = page_size == VMW_BALLOON_2M_PAGE ?
  686. VMW_BALLOON_CMD_BATCHED_2M_LOCK :
  687. VMW_BALLOON_CMD_BATCHED_LOCK;
  688. else
  689. cmd = page_size == VMW_BALLOON_2M_PAGE ?
  690. VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
  691. VMW_BALLOON_CMD_BATCHED_UNLOCK;
  692. pfn = PHYS_PFN(virt_to_phys(b->batch_page));
  693. } else {
  694. cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK :
  695. VMW_BALLOON_CMD_UNLOCK;
  696. pfn = page_to_pfn(b->page);
  697. /* In non-batching mode, PFNs must fit in 32-bit */
  698. if (unlikely(pfn != (u32)pfn))
  699. return VMW_BALLOON_ERROR_PPN_INVALID;
  700. }
  701. return vmballoon_cmd(b, cmd, pfn, num_pages);
  702. }
  703. /**
  704. * vmballoon_add_page - adds a page towards lock/unlock operation.
  705. *
  706. * @b: pointer to the balloon.
  707. * @idx: index of the page to be ballooned in this batch.
  708. * @p: pointer to the page that is about to be ballooned.
  709. *
  710. * Adds the page to be ballooned. Must be called while holding @comm_lock.
  711. */
  712. static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
  713. struct page *p)
  714. {
  715. lockdep_assert_held(&b->comm_lock);
  716. if (static_branch_likely(&vmw_balloon_batching))
  717. b->batch_page[idx] = (struct vmballoon_batch_entry)
  718. { .pfn = page_to_pfn(p) };
  719. else
  720. b->page = p;
  721. }
  722. /**
  723. * vmballoon_lock - lock or unlock a batch of pages.
  724. *
  725. * @b: pointer to the balloon.
  726. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
  727. *
  728. * Notifies the host of about ballooned pages (after inflation or deflation,
  729. * according to @ctl). If the host rejects the page put it on the
  730. * @ctl refuse list. These refused page are then released when moving to the
  731. * next size of pages.
  732. *
  733. * Note that we neither free any @page here nor put them back on the ballooned
  734. * pages list. Instead we queue it for later processing. We do that for several
  735. * reasons. First, we do not want to free the page under the lock. Second, it
  736. * allows us to unify the handling of lock and unlock. In the inflate case, the
  737. * caller will check if there are too many refused pages and release them.
  738. * Although it is not identical to the past behavior, it should not affect
  739. * performance.
  740. */
  741. static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
  742. {
  743. unsigned long batch_status;
  744. struct page *page;
  745. unsigned int i, num_pages;
  746. num_pages = ctl->n_pages;
  747. if (num_pages == 0)
  748. return 0;
  749. /* communication with the host is done under the communication lock */
  750. spin_lock(&b->comm_lock);
  751. i = 0;
  752. list_for_each_entry(page, &ctl->pages, lru)
  753. vmballoon_add_page(b, i++, page);
  754. batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size,
  755. ctl->op);
  756. /*
  757. * Iterate over the pages in the provided list. Since we are changing
  758. * @ctl->n_pages we are saving the original value in @num_pages and
  759. * use this value to bound the loop.
  760. */
  761. for (i = 0; i < num_pages; i++) {
  762. unsigned long status;
  763. status = vmballoon_status_page(b, i, &page);
  764. /*
  765. * Failure of the whole batch overrides a single operation
  766. * results.
  767. */
  768. if (batch_status != VMW_BALLOON_SUCCESS)
  769. status = batch_status;
  770. /* Continue if no error happened */
  771. if (!vmballoon_handle_one_result(b, page, ctl->page_size,
  772. status))
  773. continue;
  774. /*
  775. * Error happened. Move the pages to the refused list and update
  776. * the pages number.
  777. */
  778. list_move(&page->lru, &ctl->refused_pages);
  779. ctl->n_pages--;
  780. ctl->n_refused_pages++;
  781. }
  782. spin_unlock(&b->comm_lock);
  783. return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  784. }
  785. /**
  786. * vmballoon_release_page_list() - Releases a page list
  787. *
  788. * @page_list: list of pages to release.
  789. * @n_pages: pointer to the number of pages.
  790. * @page_size: whether the pages in the list are 2MB (or else 4KB).
  791. *
  792. * Releases the list of pages and zeros the number of pages.
  793. */
  794. static void vmballoon_release_page_list(struct list_head *page_list,
  795. int *n_pages,
  796. enum vmballoon_page_size_type page_size)
  797. {
  798. struct page *page, *tmp;
  799. list_for_each_entry_safe(page, tmp, page_list, lru) {
  800. list_del(&page->lru);
  801. __free_pages(page, vmballoon_page_order(page_size));
  802. }
  803. if (n_pages)
  804. *n_pages = 0;
  805. }
  806. /*
  807. * Release pages that were allocated while attempting to inflate the
  808. * balloon but were refused by the host for one reason or another.
  809. */
  810. static void vmballoon_release_refused_pages(struct vmballoon *b,
  811. struct vmballoon_ctl *ctl)
  812. {
  813. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
  814. ctl->page_size);
  815. vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages,
  816. ctl->page_size);
  817. }
  818. /**
  819. * vmballoon_change - retrieve the required balloon change
  820. *
  821. * @b: pointer for the balloon.
  822. *
  823. * Return: the required change for the balloon size. A positive number
  824. * indicates inflation, a negative number indicates a deflation.
  825. */
  826. static int64_t vmballoon_change(struct vmballoon *b)
  827. {
  828. int64_t size, target;
  829. size = atomic64_read(&b->size);
  830. target = READ_ONCE(b->target);
  831. /*
  832. * We must cast first because of int sizes
  833. * Otherwise we might get huge positives instead of negatives
  834. */
  835. if (b->reset_required)
  836. return 0;
  837. /* consider a 2MB slack on deflate, unless the balloon is emptied */
  838. if (target < size && target != 0 &&
  839. size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
  840. return 0;
  841. /* If an out-of-memory recently occurred, inflation is disallowed. */
  842. if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout)))
  843. return 0;
  844. return target - size;
  845. }
  846. /**
  847. * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
  848. *
  849. * @b: pointer to balloon.
  850. * @pages: list of pages to enqueue.
  851. * @n_pages: pointer to number of pages in list. The value is zeroed.
  852. * @page_size: whether the pages are 2MB or 4KB pages.
  853. *
  854. * Enqueues the provides list of pages in the ballooned page list, clears the
  855. * list and zeroes the number of pages that was provided.
  856. */
  857. static void vmballoon_enqueue_page_list(struct vmballoon *b,
  858. struct list_head *pages,
  859. unsigned int *n_pages,
  860. enum vmballoon_page_size_type page_size)
  861. {
  862. unsigned long flags;
  863. struct page *page;
  864. if (page_size == VMW_BALLOON_4K_PAGE) {
  865. balloon_page_list_enqueue(&b->b_dev_info, pages);
  866. } else {
  867. /*
  868. * Keep the huge pages in a local list which is not available
  869. * for the balloon compaction mechanism.
  870. */
  871. spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
  872. list_for_each_entry(page, pages, lru) {
  873. vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE);
  874. }
  875. list_splice_init(pages, &b->huge_pages);
  876. __count_vm_events(BALLOON_INFLATE, *n_pages *
  877. vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
  878. spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
  879. }
  880. *n_pages = 0;
  881. }
  882. /**
  883. * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
  884. *
  885. * @b: pointer to balloon.
  886. * @pages: list of pages to enqueue.
  887. * @n_pages: pointer to number of pages in list. The value is zeroed.
  888. * @page_size: whether the pages are 2MB or 4KB pages.
  889. * @n_req_pages: the number of requested pages.
  890. *
  891. * Dequeues the number of requested pages from the balloon for deflation. The
  892. * number of dequeued pages may be lower, if not enough pages in the requested
  893. * size are available.
  894. */
  895. static void vmballoon_dequeue_page_list(struct vmballoon *b,
  896. struct list_head *pages,
  897. unsigned int *n_pages,
  898. enum vmballoon_page_size_type page_size,
  899. unsigned int n_req_pages)
  900. {
  901. struct page *page, *tmp;
  902. unsigned int i = 0;
  903. unsigned long flags;
  904. /* In the case of 4k pages, use the compaction infrastructure */
  905. if (page_size == VMW_BALLOON_4K_PAGE) {
  906. *n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
  907. n_req_pages);
  908. return;
  909. }
  910. /* 2MB pages */
  911. spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
  912. list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
  913. vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE);
  914. list_move(&page->lru, pages);
  915. if (++i == n_req_pages)
  916. break;
  917. }
  918. __count_vm_events(BALLOON_DEFLATE,
  919. i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
  920. spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
  921. *n_pages = i;
  922. }
  923. /**
  924. * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
  925. *
  926. * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
  927. * due to one or few 4KB pages. These 2MB pages may keep being allocated and
  928. * then being refused. To prevent this case, this function splits the refused
  929. * pages into 4KB pages and adds them into @prealloc_pages list.
  930. *
  931. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
  932. */
  933. static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
  934. {
  935. struct page *page, *tmp;
  936. unsigned int i, order;
  937. order = vmballoon_page_order(ctl->page_size);
  938. list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
  939. list_del(&page->lru);
  940. split_page(page, order);
  941. for (i = 0; i < (1 << order); i++)
  942. list_add(&page[i].lru, &ctl->prealloc_pages);
  943. }
  944. ctl->n_refused_pages = 0;
  945. }
  946. /**
  947. * vmballoon_inflate() - Inflate the balloon towards its target size.
  948. *
  949. * @b: pointer to the balloon.
  950. */
  951. static void vmballoon_inflate(struct vmballoon *b)
  952. {
  953. int64_t to_inflate_frames;
  954. struct vmballoon_ctl ctl = {
  955. .pages = LIST_HEAD_INIT(ctl.pages),
  956. .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
  957. .prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
  958. .page_size = b->max_page_size,
  959. .op = VMW_BALLOON_INFLATE
  960. };
  961. while ((to_inflate_frames = vmballoon_change(b)) > 0) {
  962. unsigned int to_inflate_pages, page_in_frames;
  963. int alloc_error, lock_error = 0;
  964. VM_BUG_ON(!list_empty(&ctl.pages));
  965. VM_BUG_ON(ctl.n_pages != 0);
  966. page_in_frames = vmballoon_page_in_frames(ctl.page_size);
  967. to_inflate_pages = min_t(unsigned long, b->batch_max_pages,
  968. DIV_ROUND_UP_ULL(to_inflate_frames,
  969. page_in_frames));
  970. /* Start by allocating */
  971. alloc_error = vmballoon_alloc_page_list(b, &ctl,
  972. to_inflate_pages);
  973. /* Actually lock the pages by telling the hypervisor */
  974. lock_error = vmballoon_lock(b, &ctl);
  975. /*
  976. * If an error indicates that something serious went wrong,
  977. * stop the inflation.
  978. */
  979. if (lock_error)
  980. break;
  981. /* Update the balloon size */
  982. atomic64_add(ctl.n_pages * page_in_frames, &b->size);
  983. vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages,
  984. ctl.page_size);
  985. /*
  986. * If allocation failed or the number of refused pages exceeds
  987. * the maximum allowed, move to the next page size.
  988. */
  989. if (alloc_error ||
  990. ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) {
  991. if (ctl.page_size == VMW_BALLOON_4K_PAGE)
  992. break;
  993. /*
  994. * Split the refused pages to 4k. This will also empty
  995. * the refused pages list.
  996. */
  997. vmballoon_split_refused_pages(&ctl);
  998. ctl.page_size--;
  999. }
  1000. cond_resched();
  1001. }
  1002. /*
  1003. * Release pages that were allocated while attempting to inflate the
  1004. * balloon but were refused by the host for one reason or another,
  1005. * and update the statistics.
  1006. */
  1007. if (ctl.n_refused_pages != 0)
  1008. vmballoon_release_refused_pages(b, &ctl);
  1009. vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size);
  1010. }
  1011. /**
  1012. * vmballoon_deflate() - Decrease the size of the balloon.
  1013. *
  1014. * @b: pointer to the balloon
  1015. * @n_frames: the number of frames to deflate. If zero, automatically
  1016. * calculated according to the target size.
  1017. * @coordinated: whether to coordinate with the host
  1018. *
  1019. * Decrease the size of the balloon allowing guest to use more memory.
  1020. *
  1021. * Return: The number of deflated frames (i.e., basic page size units)
  1022. */
  1023. static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
  1024. bool coordinated)
  1025. {
  1026. unsigned long deflated_frames = 0;
  1027. unsigned long tried_frames = 0;
  1028. struct vmballoon_ctl ctl = {
  1029. .pages = LIST_HEAD_INIT(ctl.pages),
  1030. .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
  1031. .page_size = VMW_BALLOON_4K_PAGE,
  1032. .op = VMW_BALLOON_DEFLATE
  1033. };
  1034. /* free pages to reach target */
  1035. while (true) {
  1036. unsigned int to_deflate_pages, n_unlocked_frames;
  1037. unsigned int page_in_frames;
  1038. int64_t to_deflate_frames;
  1039. bool deflated_all;
  1040. page_in_frames = vmballoon_page_in_frames(ctl.page_size);
  1041. VM_BUG_ON(!list_empty(&ctl.pages));
  1042. VM_BUG_ON(ctl.n_pages);
  1043. VM_BUG_ON(!list_empty(&ctl.refused_pages));
  1044. VM_BUG_ON(ctl.n_refused_pages);
  1045. /*
  1046. * If we were requested a specific number of frames, we try to
  1047. * deflate this number of frames. Otherwise, deflation is
  1048. * performed according to the target and balloon size.
  1049. */
  1050. to_deflate_frames = n_frames ? n_frames - tried_frames :
  1051. -vmballoon_change(b);
  1052. /* break if no work to do */
  1053. if (to_deflate_frames <= 0)
  1054. break;
  1055. /*
  1056. * Calculate the number of frames based on current page size,
  1057. * but limit the deflated frames to a single chunk
  1058. */
  1059. to_deflate_pages = min_t(unsigned long, b->batch_max_pages,
  1060. DIV_ROUND_UP_ULL(to_deflate_frames,
  1061. page_in_frames));
  1062. /* First take the pages from the balloon pages. */
  1063. vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
  1064. ctl.page_size, to_deflate_pages);
  1065. /*
  1066. * Before pages are moving to the refused list, count their
  1067. * frames as frames that we tried to deflate.
  1068. */
  1069. tried_frames += ctl.n_pages * page_in_frames;
  1070. /*
  1071. * Unlock the pages by communicating with the hypervisor if the
  1072. * communication is coordinated (i.e., not pop). We ignore the
  1073. * return code. Instead we check if all the pages we manage to
  1074. * unlock all the pages. If we failed, we will move to the next
  1075. * page size, and would eventually try again later.
  1076. */
  1077. if (coordinated)
  1078. vmballoon_lock(b, &ctl);
  1079. /*
  1080. * Check if we deflated enough. We will move to the next page
  1081. * size if we did not manage to do so. This calculation takes
  1082. * place now, as once the pages are released, the number of
  1083. * pages is zeroed.
  1084. */
  1085. deflated_all = (ctl.n_pages == to_deflate_pages);
  1086. /* Update local and global counters */
  1087. n_unlocked_frames = ctl.n_pages * page_in_frames;
  1088. atomic64_sub(n_unlocked_frames, &b->size);
  1089. deflated_frames += n_unlocked_frames;
  1090. vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE,
  1091. ctl.page_size, ctl.n_pages);
  1092. /* free the ballooned pages */
  1093. vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
  1094. ctl.page_size);
  1095. /* Return the refused pages to the ballooned list. */
  1096. vmballoon_enqueue_page_list(b, &ctl.refused_pages,
  1097. &ctl.n_refused_pages,
  1098. ctl.page_size);
  1099. /* If we failed to unlock all the pages, move to next size. */
  1100. if (!deflated_all) {
  1101. if (ctl.page_size == b->max_page_size)
  1102. break;
  1103. ctl.page_size++;
  1104. }
  1105. cond_resched();
  1106. }
  1107. return deflated_frames;
  1108. }
  1109. /**
  1110. * vmballoon_deinit_batching - disables batching mode.
  1111. *
  1112. * @b: pointer to &struct vmballoon.
  1113. *
  1114. * Disables batching, by deallocating the page for communication with the
  1115. * hypervisor and disabling the static key to indicate that batching is off.
  1116. */
  1117. static void vmballoon_deinit_batching(struct vmballoon *b)
  1118. {
  1119. free_page((unsigned long)b->batch_page);
  1120. b->batch_page = NULL;
  1121. static_branch_disable(&vmw_balloon_batching);
  1122. b->batch_max_pages = 1;
  1123. }
  1124. /**
  1125. * vmballoon_init_batching - enable batching mode.
  1126. *
  1127. * @b: pointer to &struct vmballoon.
  1128. *
  1129. * Enables batching, by allocating a page for communication with the hypervisor
  1130. * and enabling the static_key to use batching.
  1131. *
  1132. * Return: zero on success or an appropriate error-code.
  1133. */
  1134. static int vmballoon_init_batching(struct vmballoon *b)
  1135. {
  1136. struct page *page;
  1137. page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  1138. if (!page)
  1139. return -ENOMEM;
  1140. b->batch_page = page_address(page);
  1141. b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
  1142. static_branch_enable(&vmw_balloon_batching);
  1143. return 0;
  1144. }
  1145. /*
  1146. * Receive notification and resize balloon
  1147. */
  1148. static void vmballoon_doorbell(void *client_data)
  1149. {
  1150. struct vmballoon *b = client_data;
  1151. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL);
  1152. mod_delayed_work(system_freezable_wq, &b->dwork, 0);
  1153. }
  1154. /*
  1155. * Clean up vmci doorbell
  1156. */
  1157. static void vmballoon_vmci_cleanup(struct vmballoon *b)
  1158. {
  1159. vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  1160. VMCI_INVALID_ID, VMCI_INVALID_ID);
  1161. if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
  1162. vmci_doorbell_destroy(b->vmci_doorbell);
  1163. b->vmci_doorbell = VMCI_INVALID_HANDLE;
  1164. }
  1165. }
  1166. /**
  1167. * vmballoon_vmci_init - Initialize vmci doorbell.
  1168. *
  1169. * @b: pointer to the balloon.
  1170. *
  1171. * Return: zero on success or when wakeup command not supported. Error-code
  1172. * otherwise.
  1173. *
  1174. * Initialize vmci doorbell, to get notified as soon as balloon changes.
  1175. */
  1176. static int vmballoon_vmci_init(struct vmballoon *b)
  1177. {
  1178. unsigned long error;
  1179. if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
  1180. return 0;
  1181. error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
  1182. VMCI_PRIVILEGE_FLAG_RESTRICTED,
  1183. vmballoon_doorbell, b);
  1184. if (error != VMCI_SUCCESS)
  1185. goto fail;
  1186. error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  1187. b->vmci_doorbell.context,
  1188. b->vmci_doorbell.resource, NULL);
  1189. if (error != VMW_BALLOON_SUCCESS)
  1190. goto fail;
  1191. return 0;
  1192. fail:
  1193. vmballoon_vmci_cleanup(b);
  1194. return -EIO;
  1195. }
  1196. /**
  1197. * vmballoon_pop - Quickly release all pages allocate for the balloon.
  1198. *
  1199. * @b: pointer to the balloon.
  1200. *
  1201. * This function is called when host decides to "reset" balloon for one reason
  1202. * or another. Unlike normal "deflate" we do not (shall not) notify host of the
  1203. * pages being released.
  1204. */
  1205. static void vmballoon_pop(struct vmballoon *b)
  1206. {
  1207. unsigned long size;
  1208. while ((size = atomic64_read(&b->size)))
  1209. vmballoon_deflate(b, size, false);
  1210. }
  1211. /*
  1212. * Perform standard reset sequence by popping the balloon (in case it
  1213. * is not empty) and then restarting protocol. This operation normally
  1214. * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
  1215. */
  1216. static void vmballoon_reset(struct vmballoon *b)
  1217. {
  1218. int error;
  1219. down_write(&b->conf_sem);
  1220. vmballoon_vmci_cleanup(b);
  1221. /* free all pages, skipping monitor unlock */
  1222. vmballoon_pop(b);
  1223. if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
  1224. goto unlock;
  1225. if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
  1226. if (vmballoon_init_batching(b)) {
  1227. /*
  1228. * We failed to initialize batching, inform the monitor
  1229. * about it by sending a null capability.
  1230. *
  1231. * The guest will retry in one second.
  1232. */
  1233. vmballoon_send_start(b, 0);
  1234. goto unlock;
  1235. }
  1236. } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
  1237. vmballoon_deinit_batching(b);
  1238. }
  1239. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET);
  1240. b->reset_required = false;
  1241. error = vmballoon_vmci_init(b);
  1242. if (error)
  1243. pr_err_once("failed to initialize vmci doorbell\n");
  1244. if (vmballoon_send_guest_id(b))
  1245. pr_err_once("failed to send guest ID to the host\n");
  1246. unlock:
  1247. up_write(&b->conf_sem);
  1248. }
  1249. /**
  1250. * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
  1251. *
  1252. * @work: pointer to the &work_struct which is provided by the workqueue.
  1253. *
  1254. * Resets the protocol if needed, gets the new size and adjusts balloon as
  1255. * needed. Repeat in 1 sec.
  1256. */
  1257. static void vmballoon_work(struct work_struct *work)
  1258. {
  1259. struct delayed_work *dwork = to_delayed_work(work);
  1260. struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
  1261. int64_t change = 0;
  1262. if (b->reset_required)
  1263. vmballoon_reset(b);
  1264. down_read(&b->conf_sem);
  1265. /*
  1266. * Update the stats while holding the semaphore to ensure that
  1267. * @stats_enabled is consistent with whether the stats are actually
  1268. * enabled
  1269. */
  1270. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
  1271. if (!vmballoon_send_get_target(b))
  1272. change = vmballoon_change(b);
  1273. if (change != 0) {
  1274. pr_debug("%s - size: %llu, target %lu\n", __func__,
  1275. atomic64_read(&b->size), READ_ONCE(b->target));
  1276. if (change > 0)
  1277. vmballoon_inflate(b);
  1278. else /* (change < 0) */
  1279. vmballoon_deflate(b, 0, true);
  1280. }
  1281. up_read(&b->conf_sem);
  1282. /*
  1283. * We are using a freezable workqueue so that balloon operations are
  1284. * stopped while the system transitions to/from sleep/hibernation.
  1285. */
  1286. queue_delayed_work(system_freezable_wq,
  1287. dwork, round_jiffies_relative(HZ));
  1288. }
  1289. /**
  1290. * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
  1291. * @shrinker: pointer to the balloon shrinker.
  1292. * @sc: page reclaim information.
  1293. *
  1294. * Returns: number of pages that were freed during deflation.
  1295. */
  1296. static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
  1297. struct shrink_control *sc)
  1298. {
  1299. struct vmballoon *b = &balloon;
  1300. unsigned long deflated_frames;
  1301. pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size));
  1302. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK);
  1303. /*
  1304. * If the lock is also contended for read, we cannot easily reclaim and
  1305. * we bail out.
  1306. */
  1307. if (!down_read_trylock(&b->conf_sem))
  1308. return 0;
  1309. deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true);
  1310. vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE,
  1311. deflated_frames);
  1312. /*
  1313. * Delay future inflation for some time to mitigate the situations in
  1314. * which balloon continuously grows and shrinks. Use WRITE_ONCE() since
  1315. * the access is asynchronous.
  1316. */
  1317. WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
  1318. up_read(&b->conf_sem);
  1319. return deflated_frames;
  1320. }
  1321. /**
  1322. * vmballoon_shrinker_count() - return the number of ballooned pages.
  1323. * @shrinker: pointer to the balloon shrinker.
  1324. * @sc: page reclaim information.
  1325. *
  1326. * Returns: number of 4k pages that are allocated for the balloon and can
  1327. * therefore be reclaimed under pressure.
  1328. */
  1329. static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
  1330. struct shrink_control *sc)
  1331. {
  1332. struct vmballoon *b = &balloon;
  1333. return atomic64_read(&b->size);
  1334. }
  1335. static void vmballoon_unregister_shrinker(struct vmballoon *b)
  1336. {
  1337. if (b->shrinker_registered)
  1338. unregister_shrinker(&b->shrinker);
  1339. b->shrinker_registered = false;
  1340. }
  1341. static int vmballoon_register_shrinker(struct vmballoon *b)
  1342. {
  1343. int r;
  1344. /* Do nothing if the shrinker is not enabled */
  1345. if (!vmwballoon_shrinker_enable)
  1346. return 0;
  1347. b->shrinker.scan_objects = vmballoon_shrinker_scan;
  1348. b->shrinker.count_objects = vmballoon_shrinker_count;
  1349. b->shrinker.seeks = DEFAULT_SEEKS;
  1350. r = register_shrinker(&b->shrinker, "vmw-balloon");
  1351. if (r == 0)
  1352. b->shrinker_registered = true;
  1353. return r;
  1354. }
  1355. /*
  1356. * DEBUGFS Interface
  1357. */
  1358. #ifdef CONFIG_DEBUG_FS
  1359. static const char * const vmballoon_stat_page_names[] = {
  1360. [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc",
  1361. [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail",
  1362. [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc",
  1363. [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree",
  1364. [VMW_BALLOON_PAGE_STAT_FREE] = "free"
  1365. };
  1366. static const char * const vmballoon_stat_names[] = {
  1367. [VMW_BALLOON_STAT_TIMER] = "timer",
  1368. [VMW_BALLOON_STAT_DOORBELL] = "doorbell",
  1369. [VMW_BALLOON_STAT_RESET] = "reset",
  1370. [VMW_BALLOON_STAT_SHRINK] = "shrink",
  1371. [VMW_BALLOON_STAT_SHRINK_FREE] = "shrinkFree"
  1372. };
  1373. static int vmballoon_enable_stats(struct vmballoon *b)
  1374. {
  1375. int r = 0;
  1376. down_write(&b->conf_sem);
  1377. /* did we somehow race with another reader which enabled stats? */
  1378. if (b->stats)
  1379. goto out;
  1380. b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL);
  1381. if (!b->stats) {
  1382. /* allocation failed */
  1383. r = -ENOMEM;
  1384. goto out;
  1385. }
  1386. static_key_enable(&balloon_stat_enabled.key);
  1387. out:
  1388. up_write(&b->conf_sem);
  1389. return r;
  1390. }
  1391. /**
  1392. * vmballoon_debug_show - shows statistics of balloon operations.
  1393. * @f: pointer to the &struct seq_file.
  1394. * @offset: ignored.
  1395. *
  1396. * Provides the statistics that can be accessed in vmmemctl in the debugfs.
  1397. * To avoid the overhead - mainly that of memory - of collecting the statistics,
  1398. * we only collect statistics after the first time the counters are read.
  1399. *
  1400. * Return: zero on success or an error code.
  1401. */
  1402. static int vmballoon_debug_show(struct seq_file *f, void *offset)
  1403. {
  1404. struct vmballoon *b = f->private;
  1405. int i, j;
  1406. /* enables stats if they are disabled */
  1407. if (!b->stats) {
  1408. int r = vmballoon_enable_stats(b);
  1409. if (r)
  1410. return r;
  1411. }
  1412. /* format capabilities info */
  1413. seq_printf(f, "%-22s: %#16x\n", "balloon capabilities",
  1414. VMW_BALLOON_CAPABILITIES);
  1415. seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities);
  1416. seq_printf(f, "%-22s: %16s\n", "is resetting",
  1417. b->reset_required ? "y" : "n");
  1418. /* format size info */
  1419. seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target));
  1420. seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size));
  1421. for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
  1422. if (vmballoon_cmd_names[i] == NULL)
  1423. continue;
  1424. seq_printf(f, "%-22s: %16llu (%llu failed)\n",
  1425. vmballoon_cmd_names[i],
  1426. atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]),
  1427. atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT]));
  1428. }
  1429. for (i = 0; i < VMW_BALLOON_STAT_NUM; i++)
  1430. seq_printf(f, "%-22s: %16llu\n",
  1431. vmballoon_stat_names[i],
  1432. atomic64_read(&b->stats->general_stat[i]));
  1433. for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) {
  1434. for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++)
  1435. seq_printf(f, "%-18s(%s): %16llu\n",
  1436. vmballoon_stat_page_names[i],
  1437. vmballoon_page_size_names[j],
  1438. atomic64_read(&b->stats->page_stat[i][j]));
  1439. }
  1440. return 0;
  1441. }
  1442. DEFINE_SHOW_ATTRIBUTE(vmballoon_debug);
  1443. static void __init vmballoon_debugfs_init(struct vmballoon *b)
  1444. {
  1445. debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
  1446. &vmballoon_debug_fops);
  1447. }
  1448. static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
  1449. {
  1450. static_key_disable(&balloon_stat_enabled.key);
  1451. debugfs_lookup_and_remove("vmmemctl", NULL);
  1452. kfree(b->stats);
  1453. b->stats = NULL;
  1454. }
  1455. #else
  1456. static inline void vmballoon_debugfs_init(struct vmballoon *b)
  1457. {
  1458. }
  1459. static inline void vmballoon_debugfs_exit(struct vmballoon *b)
  1460. {
  1461. }
  1462. #endif /* CONFIG_DEBUG_FS */
  1463. #ifdef CONFIG_BALLOON_COMPACTION
  1464. /**
  1465. * vmballoon_migratepage() - migrates a balloon page.
  1466. * @b_dev_info: balloon device information descriptor.
  1467. * @newpage: the page to which @page should be migrated.
  1468. * @page: a ballooned page that should be migrated.
  1469. * @mode: migration mode, ignored.
  1470. *
  1471. * This function is really open-coded, but that is according to the interface
  1472. * that balloon_compaction provides.
  1473. *
  1474. * Return: zero on success, -EAGAIN when migration cannot be performed
  1475. * momentarily, and -EBUSY if migration failed and should be retried
  1476. * with that specific page.
  1477. */
  1478. static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
  1479. struct page *newpage, struct page *page,
  1480. enum migrate_mode mode)
  1481. {
  1482. unsigned long status, flags;
  1483. struct vmballoon *b;
  1484. int ret;
  1485. b = container_of(b_dev_info, struct vmballoon, b_dev_info);
  1486. /*
  1487. * If the semaphore is taken, there is ongoing configuration change
  1488. * (i.e., balloon reset), so try again.
  1489. */
  1490. if (!down_read_trylock(&b->conf_sem))
  1491. return -EAGAIN;
  1492. spin_lock(&b->comm_lock);
  1493. /*
  1494. * We must start by deflating and not inflating, as otherwise the
  1495. * hypervisor may tell us that it has enough memory and the new page is
  1496. * not needed. Since the old page is isolated, we cannot use the list
  1497. * interface to unlock it, as the LRU field is used for isolation.
  1498. * Instead, we use the native interface directly.
  1499. */
  1500. vmballoon_add_page(b, 0, page);
  1501. status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
  1502. VMW_BALLOON_DEFLATE);
  1503. if (status == VMW_BALLOON_SUCCESS)
  1504. status = vmballoon_status_page(b, 0, &page);
  1505. /*
  1506. * If a failure happened, let the migration mechanism know that it
  1507. * should not retry.
  1508. */
  1509. if (status != VMW_BALLOON_SUCCESS) {
  1510. spin_unlock(&b->comm_lock);
  1511. ret = -EBUSY;
  1512. goto out_unlock;
  1513. }
  1514. /*
  1515. * The page is isolated, so it is safe to delete it without holding
  1516. * @pages_lock . We keep holding @comm_lock since we will need it in a
  1517. * second.
  1518. */
  1519. balloon_page_delete(page);
  1520. put_page(page);
  1521. /* Inflate */
  1522. vmballoon_add_page(b, 0, newpage);
  1523. status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
  1524. VMW_BALLOON_INFLATE);
  1525. if (status == VMW_BALLOON_SUCCESS)
  1526. status = vmballoon_status_page(b, 0, &newpage);
  1527. spin_unlock(&b->comm_lock);
  1528. if (status != VMW_BALLOON_SUCCESS) {
  1529. /*
  1530. * A failure happened. While we can deflate the page we just
  1531. * inflated, this deflation can also encounter an error. Instead
  1532. * we will decrease the size of the balloon to reflect the
  1533. * change and report failure.
  1534. */
  1535. atomic64_dec(&b->size);
  1536. ret = -EBUSY;
  1537. } else {
  1538. /*
  1539. * Success. Take a reference for the page, and we will add it to
  1540. * the list after acquiring the lock.
  1541. */
  1542. get_page(newpage);
  1543. ret = MIGRATEPAGE_SUCCESS;
  1544. }
  1545. /* Update the balloon list under the @pages_lock */
  1546. spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
  1547. /*
  1548. * On inflation success, we already took a reference for the @newpage.
  1549. * If we succeed just insert it to the list and update the statistics
  1550. * under the lock.
  1551. */
  1552. if (ret == MIGRATEPAGE_SUCCESS) {
  1553. balloon_page_insert(&b->b_dev_info, newpage);
  1554. __count_vm_event(BALLOON_MIGRATE);
  1555. }
  1556. /*
  1557. * We deflated successfully, so regardless to the inflation success, we
  1558. * need to reduce the number of isolated_pages.
  1559. */
  1560. b->b_dev_info.isolated_pages--;
  1561. spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
  1562. out_unlock:
  1563. up_read(&b->conf_sem);
  1564. return ret;
  1565. }
  1566. /**
  1567. * vmballoon_compaction_init() - initialized compaction for the balloon.
  1568. *
  1569. * @b: pointer to the balloon.
  1570. *
  1571. * If during the initialization a failure occurred, this function does not
  1572. * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
  1573. * case.
  1574. *
  1575. * Return: zero on success or error code on failure.
  1576. */
  1577. static __init void vmballoon_compaction_init(struct vmballoon *b)
  1578. {
  1579. b->b_dev_info.migratepage = vmballoon_migratepage;
  1580. }
  1581. #else /* CONFIG_BALLOON_COMPACTION */
  1582. static inline void vmballoon_compaction_init(struct vmballoon *b)
  1583. {
  1584. }
  1585. #endif /* CONFIG_BALLOON_COMPACTION */
  1586. static int __init vmballoon_init(void)
  1587. {
  1588. int error;
  1589. /*
  1590. * Check if we are running on VMware's hypervisor and bail out
  1591. * if we are not.
  1592. */
  1593. if (x86_hyper_type != X86_HYPER_VMWARE)
  1594. return -ENODEV;
  1595. INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
  1596. error = vmballoon_register_shrinker(&balloon);
  1597. if (error)
  1598. goto fail;
  1599. /*
  1600. * Initialization of compaction must be done after the call to
  1601. * balloon_devinfo_init() .
  1602. */
  1603. balloon_devinfo_init(&balloon.b_dev_info);
  1604. vmballoon_compaction_init(&balloon);
  1605. INIT_LIST_HEAD(&balloon.huge_pages);
  1606. spin_lock_init(&balloon.comm_lock);
  1607. init_rwsem(&balloon.conf_sem);
  1608. balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
  1609. balloon.batch_page = NULL;
  1610. balloon.page = NULL;
  1611. balloon.reset_required = true;
  1612. queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
  1613. vmballoon_debugfs_init(&balloon);
  1614. return 0;
  1615. fail:
  1616. vmballoon_unregister_shrinker(&balloon);
  1617. return error;
  1618. }
  1619. /*
  1620. * Using late_initcall() instead of module_init() allows the balloon to use the
  1621. * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
  1622. * VMCI is probed only after the balloon is initialized. If the balloon is used
  1623. * as a module, late_initcall() is equivalent to module_init().
  1624. */
  1625. late_initcall(vmballoon_init);
  1626. static void __exit vmballoon_exit(void)
  1627. {
  1628. vmballoon_unregister_shrinker(&balloon);
  1629. vmballoon_vmci_cleanup(&balloon);
  1630. cancel_delayed_work_sync(&balloon.dwork);
  1631. vmballoon_debugfs_exit(&balloon);
  1632. /*
  1633. * Deallocate all reserved memory, and reset connection with monitor.
  1634. * Reset connection before deallocating memory to avoid potential for
  1635. * additional spurious resets from guest touching deallocated pages.
  1636. */
  1637. vmballoon_send_start(&balloon, 0);
  1638. vmballoon_pop(&balloon);
  1639. }
  1640. module_exit(vmballoon_exit);