grant-table.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701
  1. /******************************************************************************
  2. * grant_table.c
  3. *
  4. * Granting foreign access to our memory reservation.
  5. *
  6. * Copyright (c) 2005-2006, Christopher Clark
  7. * Copyright (c) 2004-2005, K A Fraser
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License version 2
  11. * as published by the Free Software Foundation; or, when distributed
  12. * separately from the Linux kernel or incorporated into other
  13. * software packages, subject to the following license:
  14. *
  15. * Permission is hereby granted, free of charge, to any person obtaining a copy
  16. * of this source file (the "Software"), to deal in the Software without
  17. * restriction, including without limitation the rights to use, copy, modify,
  18. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  19. * and to permit persons to whom the Software is furnished to do so, subject to
  20. * the following conditions:
  21. *
  22. * The above copyright notice and this permission notice shall be included in
  23. * all copies or substantial portions of the Software.
  24. *
  25. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  26. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  27. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  28. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  29. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  30. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  31. * IN THE SOFTWARE.
  32. */
  33. #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  34. #include <linux/bitmap.h>
  35. #include <linux/memblock.h>
  36. #include <linux/sched.h>
  37. #include <linux/mm.h>
  38. #include <linux/slab.h>
  39. #include <linux/vmalloc.h>
  40. #include <linux/uaccess.h>
  41. #include <linux/io.h>
  42. #include <linux/delay.h>
  43. #include <linux/hardirq.h>
  44. #include <linux/workqueue.h>
  45. #include <linux/ratelimit.h>
  46. #include <linux/moduleparam.h>
  47. #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
  48. #include <linux/dma-mapping.h>
  49. #endif
  50. #include <xen/xen.h>
  51. #include <xen/interface/xen.h>
  52. #include <xen/page.h>
  53. #include <xen/grant_table.h>
  54. #include <xen/interface/memory.h>
  55. #include <xen/hvc-console.h>
  56. #include <xen/swiotlb-xen.h>
  57. #include <xen/balloon.h>
  58. #ifdef CONFIG_X86
  59. #include <asm/xen/cpuid.h>
  60. #endif
  61. #include <xen/mem-reservation.h>
  62. #include <asm/xen/hypercall.h>
  63. #include <asm/xen/interface.h>
  64. #include <asm/sync_bitops.h>
  65. #define GNTTAB_LIST_END 0xffffffff
  66. static grant_ref_t **gnttab_list;
  67. static unsigned int nr_grant_frames;
  68. /*
  69. * Handling of free grants:
  70. *
  71. * Free grants are in a simple list anchored in gnttab_free_head. They are
  72. * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
  73. * of free entries is stored in gnttab_free_count.
  74. * Additionally there is a bitmap of free entries anchored in
  75. * gnttab_free_bitmap. This is being used for simplifying allocation of
  76. * multiple consecutive grants, which is needed e.g. for support of virtio.
  77. * gnttab_last_free is used to add free entries of new frames at the end of
  78. * the free list.
  79. * gnttab_free_tail_ptr specifies the variable which references the start
  80. * of consecutive free grants ending with gnttab_last_free. This pointer is
  81. * updated in a rather defensive way, in order to avoid performance hits in
  82. * hot paths.
  83. * All those variables are protected by gnttab_list_lock.
  84. */
  85. static int gnttab_free_count;
  86. static unsigned int gnttab_size;
  87. static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
  88. static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
  89. static grant_ref_t *gnttab_free_tail_ptr;
  90. static unsigned long *gnttab_free_bitmap;
  91. static DEFINE_SPINLOCK(gnttab_list_lock);
  92. struct grant_frames xen_auto_xlat_grant_frames;
  93. static unsigned int xen_gnttab_version;
  94. module_param_named(version, xen_gnttab_version, uint, 0);
  95. static union {
  96. struct grant_entry_v1 *v1;
  97. union grant_entry_v2 *v2;
  98. void *addr;
  99. } gnttab_shared;
  100. /*This is a structure of function pointers for grant table*/
  101. struct gnttab_ops {
  102. /*
  103. * Version of the grant interface.
  104. */
  105. unsigned int version;
  106. /*
  107. * Grant refs per grant frame.
  108. */
  109. unsigned int grefs_per_grant_frame;
  110. /*
  111. * Mapping a list of frames for storing grant entries. Frames parameter
  112. * is used to store grant table address when grant table being setup,
  113. * nr_gframes is the number of frames to map grant table. Returning
  114. * GNTST_okay means success and negative value means failure.
  115. */
  116. int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
  117. /*
  118. * Release a list of frames which are mapped in map_frames for grant
  119. * entry status.
  120. */
  121. void (*unmap_frames)(void);
  122. /*
  123. * Introducing a valid entry into the grant table, granting the frame of
  124. * this grant entry to domain for accessing. Ref
  125. * parameter is reference of this introduced grant entry, domid is id of
  126. * granted domain, frame is the page frame to be granted, and flags is
  127. * status of the grant entry to be updated.
  128. */
  129. void (*update_entry)(grant_ref_t ref, domid_t domid,
  130. unsigned long frame, unsigned flags);
  131. /*
  132. * Stop granting a grant entry to domain for accessing. Ref parameter is
  133. * reference of a grant entry whose grant access will be stopped.
  134. * If the grant entry is currently mapped for reading or writing, just
  135. * return failure(==0) directly and don't tear down the grant access.
  136. * Otherwise, stop grant access for this entry and return success(==1).
  137. */
  138. int (*end_foreign_access_ref)(grant_ref_t ref);
  139. /*
  140. * Read the frame number related to a given grant reference.
  141. */
  142. unsigned long (*read_frame)(grant_ref_t ref);
  143. };
  144. struct unmap_refs_callback_data {
  145. struct completion completion;
  146. int result;
  147. };
  148. static const struct gnttab_ops *gnttab_interface;
  149. /* This reflects status of grant entries, so act as a global value. */
  150. static grant_status_t *grstatus;
  151. static struct gnttab_free_callback *gnttab_free_callback_list;
  152. static int gnttab_expand(unsigned int req_entries);
  153. #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
  154. #define SPP (PAGE_SIZE / sizeof(grant_status_t))
  155. static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
  156. {
  157. return &gnttab_list[(entry) / RPP][(entry) % RPP];
  158. }
  159. /* This can be used as an l-value */
  160. #define gnttab_entry(entry) (*__gnttab_entry(entry))
  161. static int get_free_entries(unsigned count)
  162. {
  163. unsigned long flags;
  164. int ref, rc = 0;
  165. grant_ref_t head;
  166. spin_lock_irqsave(&gnttab_list_lock, flags);
  167. if ((gnttab_free_count < count) &&
  168. ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
  169. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  170. return rc;
  171. }
  172. ref = head = gnttab_free_head;
  173. gnttab_free_count -= count;
  174. while (count--) {
  175. bitmap_clear(gnttab_free_bitmap, head, 1);
  176. if (gnttab_free_tail_ptr == __gnttab_entry(head))
  177. gnttab_free_tail_ptr = &gnttab_free_head;
  178. if (count)
  179. head = gnttab_entry(head);
  180. }
  181. gnttab_free_head = gnttab_entry(head);
  182. gnttab_entry(head) = GNTTAB_LIST_END;
  183. if (!gnttab_free_count) {
  184. gnttab_last_free = GNTTAB_LIST_END;
  185. gnttab_free_tail_ptr = NULL;
  186. }
  187. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  188. return ref;
  189. }
  190. static int get_seq_entry_count(void)
  191. {
  192. if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
  193. *gnttab_free_tail_ptr == GNTTAB_LIST_END)
  194. return 0;
  195. return gnttab_last_free - *gnttab_free_tail_ptr + 1;
  196. }
  197. /* Rebuilds the free grant list and tries to find count consecutive entries. */
  198. static int get_free_seq(unsigned int count)
  199. {
  200. int ret = -ENOSPC;
  201. unsigned int from, to;
  202. grant_ref_t *last;
  203. gnttab_free_tail_ptr = &gnttab_free_head;
  204. last = &gnttab_free_head;
  205. for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
  206. from < gnttab_size;
  207. from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
  208. to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
  209. from + 1);
  210. if (ret < 0 && to - from >= count) {
  211. ret = from;
  212. bitmap_clear(gnttab_free_bitmap, ret, count);
  213. from += count;
  214. gnttab_free_count -= count;
  215. if (from == to)
  216. continue;
  217. }
  218. /*
  219. * Recreate the free list in order to have it properly sorted.
  220. * This is needed to make sure that the free tail has the maximum
  221. * possible size.
  222. */
  223. while (from < to) {
  224. *last = from;
  225. last = __gnttab_entry(from);
  226. gnttab_last_free = from;
  227. from++;
  228. }
  229. if (to < gnttab_size)
  230. gnttab_free_tail_ptr = __gnttab_entry(to - 1);
  231. }
  232. *last = GNTTAB_LIST_END;
  233. if (gnttab_last_free != gnttab_size - 1)
  234. gnttab_free_tail_ptr = NULL;
  235. return ret;
  236. }
  237. static int get_free_entries_seq(unsigned int count)
  238. {
  239. unsigned long flags;
  240. int ret = 0;
  241. spin_lock_irqsave(&gnttab_list_lock, flags);
  242. if (gnttab_free_count < count) {
  243. ret = gnttab_expand(count - gnttab_free_count);
  244. if (ret < 0)
  245. goto out;
  246. }
  247. if (get_seq_entry_count() < count) {
  248. ret = get_free_seq(count);
  249. if (ret >= 0)
  250. goto out;
  251. ret = gnttab_expand(count - get_seq_entry_count());
  252. if (ret < 0)
  253. goto out;
  254. }
  255. ret = *gnttab_free_tail_ptr;
  256. *gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
  257. gnttab_free_count -= count;
  258. if (!gnttab_free_count)
  259. gnttab_free_tail_ptr = NULL;
  260. bitmap_clear(gnttab_free_bitmap, ret, count);
  261. out:
  262. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  263. return ret;
  264. }
  265. static void do_free_callbacks(void)
  266. {
  267. struct gnttab_free_callback *callback, *next;
  268. callback = gnttab_free_callback_list;
  269. gnttab_free_callback_list = NULL;
  270. while (callback != NULL) {
  271. next = callback->next;
  272. if (gnttab_free_count >= callback->count) {
  273. callback->next = NULL;
  274. callback->fn(callback->arg);
  275. } else {
  276. callback->next = gnttab_free_callback_list;
  277. gnttab_free_callback_list = callback;
  278. }
  279. callback = next;
  280. }
  281. }
  282. static inline void check_free_callbacks(void)
  283. {
  284. if (unlikely(gnttab_free_callback_list))
  285. do_free_callbacks();
  286. }
  287. static void put_free_entry_locked(grant_ref_t ref)
  288. {
  289. if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
  290. return;
  291. gnttab_entry(ref) = gnttab_free_head;
  292. gnttab_free_head = ref;
  293. if (!gnttab_free_count)
  294. gnttab_last_free = ref;
  295. if (gnttab_free_tail_ptr == &gnttab_free_head)
  296. gnttab_free_tail_ptr = __gnttab_entry(ref);
  297. gnttab_free_count++;
  298. bitmap_set(gnttab_free_bitmap, ref, 1);
  299. }
  300. static void put_free_entry(grant_ref_t ref)
  301. {
  302. unsigned long flags;
  303. spin_lock_irqsave(&gnttab_list_lock, flags);
  304. put_free_entry_locked(ref);
  305. check_free_callbacks();
  306. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  307. }
  308. static void gnttab_set_free(unsigned int start, unsigned int n)
  309. {
  310. unsigned int i;
  311. for (i = start; i < start + n - 1; i++)
  312. gnttab_entry(i) = i + 1;
  313. gnttab_entry(i) = GNTTAB_LIST_END;
  314. if (!gnttab_free_count) {
  315. gnttab_free_head = start;
  316. gnttab_free_tail_ptr = &gnttab_free_head;
  317. } else {
  318. gnttab_entry(gnttab_last_free) = start;
  319. }
  320. gnttab_free_count += n;
  321. gnttab_last_free = i;
  322. bitmap_set(gnttab_free_bitmap, start, n);
  323. }
  324. /*
  325. * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
  326. * Introducing a valid entry into the grant table:
  327. * 1. Write ent->domid.
  328. * 2. Write ent->frame: Frame to which access is permitted.
  329. * 3. Write memory barrier (WMB).
  330. * 4. Write ent->flags, inc. valid type.
  331. */
  332. static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
  333. unsigned long frame, unsigned flags)
  334. {
  335. gnttab_shared.v1[ref].domid = domid;
  336. gnttab_shared.v1[ref].frame = frame;
  337. wmb();
  338. gnttab_shared.v1[ref].flags = flags;
  339. }
  340. static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
  341. unsigned long frame, unsigned int flags)
  342. {
  343. gnttab_shared.v2[ref].hdr.domid = domid;
  344. gnttab_shared.v2[ref].full_page.frame = frame;
  345. wmb(); /* Hypervisor concurrent accesses. */
  346. gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
  347. }
  348. /*
  349. * Public grant-issuing interface functions
  350. */
  351. void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
  352. unsigned long frame, int readonly)
  353. {
  354. gnttab_interface->update_entry(ref, domid, frame,
  355. GTF_permit_access | (readonly ? GTF_readonly : 0));
  356. }
  357. EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
  358. int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
  359. int readonly)
  360. {
  361. int ref;
  362. ref = get_free_entries(1);
  363. if (unlikely(ref < 0))
  364. return -ENOSPC;
  365. gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
  366. return ref;
  367. }
  368. EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
  369. static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref)
  370. {
  371. u16 flags, nflags;
  372. u16 *pflags;
  373. pflags = &gnttab_shared.v1[ref].flags;
  374. nflags = *pflags;
  375. do {
  376. flags = nflags;
  377. if (flags & (GTF_reading|GTF_writing))
  378. return 0;
  379. } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
  380. return 1;
  381. }
  382. static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref)
  383. {
  384. gnttab_shared.v2[ref].hdr.flags = 0;
  385. mb(); /* Concurrent access by hypervisor. */
  386. if (grstatus[ref] & (GTF_reading|GTF_writing)) {
  387. return 0;
  388. } else {
  389. /*
  390. * The read of grstatus needs to have acquire semantics.
  391. * On x86, reads already have that, and we just need to
  392. * protect against compiler reorderings.
  393. * On other architectures we may need a full barrier.
  394. */
  395. #ifdef CONFIG_X86
  396. barrier();
  397. #else
  398. mb();
  399. #endif
  400. }
  401. return 1;
  402. }
  403. static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref)
  404. {
  405. return gnttab_interface->end_foreign_access_ref(ref);
  406. }
  407. int gnttab_end_foreign_access_ref(grant_ref_t ref)
  408. {
  409. if (_gnttab_end_foreign_access_ref(ref))
  410. return 1;
  411. pr_warn("WARNING: g.e. %#x still in use!\n", ref);
  412. return 0;
  413. }
  414. EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
  415. static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
  416. {
  417. return gnttab_shared.v1[ref].frame;
  418. }
  419. static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
  420. {
  421. return gnttab_shared.v2[ref].full_page.frame;
  422. }
  423. struct deferred_entry {
  424. struct list_head list;
  425. grant_ref_t ref;
  426. uint16_t warn_delay;
  427. struct page *page;
  428. };
  429. static LIST_HEAD(deferred_list);
  430. static void gnttab_handle_deferred(struct timer_list *);
  431. static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
  432. static atomic64_t deferred_count;
  433. static atomic64_t leaked_count;
  434. static unsigned int free_per_iteration = 10;
  435. module_param(free_per_iteration, uint, 0600);
  436. static void gnttab_handle_deferred(struct timer_list *unused)
  437. {
  438. unsigned int nr = READ_ONCE(free_per_iteration);
  439. const bool ignore_limit = nr == 0;
  440. struct deferred_entry *first = NULL;
  441. unsigned long flags;
  442. size_t freed = 0;
  443. spin_lock_irqsave(&gnttab_list_lock, flags);
  444. while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
  445. struct deferred_entry *entry
  446. = list_first_entry(&deferred_list,
  447. struct deferred_entry, list);
  448. if (entry == first)
  449. break;
  450. list_del(&entry->list);
  451. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  452. if (_gnttab_end_foreign_access_ref(entry->ref)) {
  453. uint64_t ret = atomic64_dec_return(&deferred_count);
  454. put_free_entry(entry->ref);
  455. pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
  456. entry->ref, page_to_pfn(entry->page),
  457. (unsigned long long)ret);
  458. put_page(entry->page);
  459. freed++;
  460. kfree(entry);
  461. entry = NULL;
  462. } else {
  463. if (!--entry->warn_delay)
  464. pr_info("g.e. %#x still pending\n", entry->ref);
  465. if (!first)
  466. first = entry;
  467. }
  468. spin_lock_irqsave(&gnttab_list_lock, flags);
  469. if (entry)
  470. list_add_tail(&entry->list, &deferred_list);
  471. }
  472. if (list_empty(&deferred_list))
  473. WARN_ON(atomic64_read(&deferred_count));
  474. else if (!timer_pending(&deferred_timer)) {
  475. deferred_timer.expires = jiffies + HZ;
  476. add_timer(&deferred_timer);
  477. }
  478. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  479. pr_debug("Freed %zu references", freed);
  480. }
  481. static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
  482. {
  483. struct deferred_entry *entry;
  484. gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
  485. uint64_t leaked, deferred;
  486. entry = kmalloc(sizeof(*entry), gfp);
  487. if (!page) {
  488. unsigned long gfn = gnttab_interface->read_frame(ref);
  489. page = pfn_to_page(gfn_to_pfn(gfn));
  490. get_page(page);
  491. }
  492. if (entry) {
  493. unsigned long flags;
  494. entry->ref = ref;
  495. entry->page = page;
  496. entry->warn_delay = 60;
  497. spin_lock_irqsave(&gnttab_list_lock, flags);
  498. list_add_tail(&entry->list, &deferred_list);
  499. if (!timer_pending(&deferred_timer)) {
  500. deferred_timer.expires = jiffies + HZ;
  501. add_timer(&deferred_timer);
  502. }
  503. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  504. deferred = atomic64_inc_return(&deferred_count);
  505. leaked = atomic64_read(&leaked_count);
  506. pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
  507. ref, page ? page_to_pfn(page) : -1, deferred, leaked);
  508. } else {
  509. deferred = atomic64_read(&deferred_count);
  510. leaked = atomic64_inc_return(&leaked_count);
  511. pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
  512. ref, page ? page_to_pfn(page) : -1, deferred, leaked);
  513. }
  514. }
  515. int gnttab_try_end_foreign_access(grant_ref_t ref)
  516. {
  517. int ret = _gnttab_end_foreign_access_ref(ref);
  518. if (ret)
  519. put_free_entry(ref);
  520. return ret;
  521. }
  522. EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
  523. void gnttab_end_foreign_access(grant_ref_t ref, struct page *page)
  524. {
  525. if (gnttab_try_end_foreign_access(ref)) {
  526. if (page)
  527. put_page(page);
  528. } else
  529. gnttab_add_deferred(ref, page);
  530. }
  531. EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
  532. void gnttab_free_grant_reference(grant_ref_t ref)
  533. {
  534. put_free_entry(ref);
  535. }
  536. EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
  537. void gnttab_free_grant_references(grant_ref_t head)
  538. {
  539. grant_ref_t ref;
  540. unsigned long flags;
  541. spin_lock_irqsave(&gnttab_list_lock, flags);
  542. while (head != GNTTAB_LIST_END) {
  543. ref = gnttab_entry(head);
  544. put_free_entry_locked(head);
  545. head = ref;
  546. }
  547. check_free_callbacks();
  548. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  549. }
  550. EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
  551. void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
  552. {
  553. unsigned long flags;
  554. unsigned int i;
  555. spin_lock_irqsave(&gnttab_list_lock, flags);
  556. for (i = count; i > 0; i--)
  557. put_free_entry_locked(head + i - 1);
  558. check_free_callbacks();
  559. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  560. }
  561. EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
  562. int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
  563. {
  564. int h = get_free_entries(count);
  565. if (h < 0)
  566. return -ENOSPC;
  567. *head = h;
  568. return 0;
  569. }
  570. EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
  571. int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
  572. {
  573. int h;
  574. if (count == 1)
  575. h = get_free_entries(1);
  576. else
  577. h = get_free_entries_seq(count);
  578. if (h < 0)
  579. return -ENOSPC;
  580. *first = h;
  581. return 0;
  582. }
  583. EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
  584. int gnttab_empty_grant_references(const grant_ref_t *private_head)
  585. {
  586. return (*private_head == GNTTAB_LIST_END);
  587. }
  588. EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
  589. int gnttab_claim_grant_reference(grant_ref_t *private_head)
  590. {
  591. grant_ref_t g = *private_head;
  592. if (unlikely(g == GNTTAB_LIST_END))
  593. return -ENOSPC;
  594. *private_head = gnttab_entry(g);
  595. return g;
  596. }
  597. EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
  598. void gnttab_release_grant_reference(grant_ref_t *private_head,
  599. grant_ref_t release)
  600. {
  601. gnttab_entry(release) = *private_head;
  602. *private_head = release;
  603. }
  604. EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
  605. void gnttab_request_free_callback(struct gnttab_free_callback *callback,
  606. void (*fn)(void *), void *arg, u16 count)
  607. {
  608. unsigned long flags;
  609. struct gnttab_free_callback *cb;
  610. spin_lock_irqsave(&gnttab_list_lock, flags);
  611. /* Check if the callback is already on the list */
  612. cb = gnttab_free_callback_list;
  613. while (cb) {
  614. if (cb == callback)
  615. goto out;
  616. cb = cb->next;
  617. }
  618. callback->fn = fn;
  619. callback->arg = arg;
  620. callback->count = count;
  621. callback->next = gnttab_free_callback_list;
  622. gnttab_free_callback_list = callback;
  623. check_free_callbacks();
  624. out:
  625. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  626. }
  627. EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
  628. void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
  629. {
  630. struct gnttab_free_callback **pcb;
  631. unsigned long flags;
  632. spin_lock_irqsave(&gnttab_list_lock, flags);
  633. for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
  634. if (*pcb == callback) {
  635. *pcb = callback->next;
  636. break;
  637. }
  638. }
  639. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  640. }
  641. EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
  642. static unsigned int gnttab_frames(unsigned int frames, unsigned int align)
  643. {
  644. return (frames * gnttab_interface->grefs_per_grant_frame + align - 1) /
  645. align;
  646. }
  647. static int grow_gnttab_list(unsigned int more_frames)
  648. {
  649. unsigned int new_nr_grant_frames, extra_entries, i;
  650. unsigned int nr_glist_frames, new_nr_glist_frames;
  651. unsigned int grefs_per_frame;
  652. grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
  653. new_nr_grant_frames = nr_grant_frames + more_frames;
  654. extra_entries = more_frames * grefs_per_frame;
  655. nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
  656. new_nr_glist_frames = gnttab_frames(new_nr_grant_frames, RPP);
  657. for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
  658. gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
  659. if (!gnttab_list[i])
  660. goto grow_nomem;
  661. }
  662. gnttab_set_free(gnttab_size, extra_entries);
  663. if (!gnttab_free_tail_ptr)
  664. gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
  665. nr_grant_frames = new_nr_grant_frames;
  666. gnttab_size += extra_entries;
  667. check_free_callbacks();
  668. return 0;
  669. grow_nomem:
  670. while (i-- > nr_glist_frames)
  671. free_page((unsigned long) gnttab_list[i]);
  672. return -ENOMEM;
  673. }
  674. static unsigned int __max_nr_grant_frames(void)
  675. {
  676. struct gnttab_query_size query;
  677. int rc;
  678. query.dom = DOMID_SELF;
  679. rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
  680. if ((rc < 0) || (query.status != GNTST_okay))
  681. return 4; /* Legacy max supported number of frames */
  682. return query.max_nr_frames;
  683. }
  684. unsigned int gnttab_max_grant_frames(void)
  685. {
  686. unsigned int xen_max = __max_nr_grant_frames();
  687. static unsigned int boot_max_nr_grant_frames;
  688. /* First time, initialize it properly. */
  689. if (!boot_max_nr_grant_frames)
  690. boot_max_nr_grant_frames = __max_nr_grant_frames();
  691. if (xen_max > boot_max_nr_grant_frames)
  692. return boot_max_nr_grant_frames;
  693. return xen_max;
  694. }
  695. EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
  696. int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
  697. {
  698. xen_pfn_t *pfn;
  699. unsigned int max_nr_gframes = __max_nr_grant_frames();
  700. unsigned int i;
  701. void *vaddr;
  702. if (xen_auto_xlat_grant_frames.count)
  703. return -EINVAL;
  704. vaddr = memremap(addr, XEN_PAGE_SIZE * max_nr_gframes, MEMREMAP_WB);
  705. if (vaddr == NULL) {
  706. pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n",
  707. &addr);
  708. return -ENOMEM;
  709. }
  710. pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
  711. if (!pfn) {
  712. memunmap(vaddr);
  713. return -ENOMEM;
  714. }
  715. for (i = 0; i < max_nr_gframes; i++)
  716. pfn[i] = XEN_PFN_DOWN(addr) + i;
  717. xen_auto_xlat_grant_frames.vaddr = vaddr;
  718. xen_auto_xlat_grant_frames.pfn = pfn;
  719. xen_auto_xlat_grant_frames.count = max_nr_gframes;
  720. return 0;
  721. }
  722. EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
  723. void gnttab_free_auto_xlat_frames(void)
  724. {
  725. if (!xen_auto_xlat_grant_frames.count)
  726. return;
  727. kfree(xen_auto_xlat_grant_frames.pfn);
  728. memunmap(xen_auto_xlat_grant_frames.vaddr);
  729. xen_auto_xlat_grant_frames.pfn = NULL;
  730. xen_auto_xlat_grant_frames.count = 0;
  731. xen_auto_xlat_grant_frames.vaddr = NULL;
  732. }
  733. EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
  734. int gnttab_pages_set_private(int nr_pages, struct page **pages)
  735. {
  736. int i;
  737. for (i = 0; i < nr_pages; i++) {
  738. #if BITS_PER_LONG < 64
  739. struct xen_page_foreign *foreign;
  740. foreign = kzalloc(sizeof(*foreign), GFP_KERNEL);
  741. if (!foreign)
  742. return -ENOMEM;
  743. set_page_private(pages[i], (unsigned long)foreign);
  744. #endif
  745. SetPagePrivate(pages[i]);
  746. }
  747. return 0;
  748. }
  749. EXPORT_SYMBOL_GPL(gnttab_pages_set_private);
  750. /**
  751. * gnttab_alloc_pages - alloc pages suitable for grant mapping into
  752. * @nr_pages: number of pages to alloc
  753. * @pages: returns the pages
  754. */
  755. int gnttab_alloc_pages(int nr_pages, struct page **pages)
  756. {
  757. int ret;
  758. ret = xen_alloc_unpopulated_pages(nr_pages, pages);
  759. if (ret < 0)
  760. return ret;
  761. ret = gnttab_pages_set_private(nr_pages, pages);
  762. if (ret < 0)
  763. gnttab_free_pages(nr_pages, pages);
  764. return ret;
  765. }
  766. EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
  767. #ifdef CONFIG_XEN_UNPOPULATED_ALLOC
  768. static inline void cache_init(struct gnttab_page_cache *cache)
  769. {
  770. cache->pages = NULL;
  771. }
  772. static inline bool cache_empty(struct gnttab_page_cache *cache)
  773. {
  774. return !cache->pages;
  775. }
  776. static inline struct page *cache_deq(struct gnttab_page_cache *cache)
  777. {
  778. struct page *page;
  779. page = cache->pages;
  780. cache->pages = page->zone_device_data;
  781. return page;
  782. }
  783. static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
  784. {
  785. page->zone_device_data = cache->pages;
  786. cache->pages = page;
  787. }
  788. #else
  789. static inline void cache_init(struct gnttab_page_cache *cache)
  790. {
  791. INIT_LIST_HEAD(&cache->pages);
  792. }
  793. static inline bool cache_empty(struct gnttab_page_cache *cache)
  794. {
  795. return list_empty(&cache->pages);
  796. }
  797. static inline struct page *cache_deq(struct gnttab_page_cache *cache)
  798. {
  799. struct page *page;
  800. page = list_first_entry(&cache->pages, struct page, lru);
  801. list_del(&page->lru);
  802. return page;
  803. }
  804. static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
  805. {
  806. list_add(&page->lru, &cache->pages);
  807. }
  808. #endif
  809. void gnttab_page_cache_init(struct gnttab_page_cache *cache)
  810. {
  811. spin_lock_init(&cache->lock);
  812. cache_init(cache);
  813. cache->num_pages = 0;
  814. }
  815. EXPORT_SYMBOL_GPL(gnttab_page_cache_init);
  816. int gnttab_page_cache_get(struct gnttab_page_cache *cache, struct page **page)
  817. {
  818. unsigned long flags;
  819. spin_lock_irqsave(&cache->lock, flags);
  820. if (cache_empty(cache)) {
  821. spin_unlock_irqrestore(&cache->lock, flags);
  822. return gnttab_alloc_pages(1, page);
  823. }
  824. page[0] = cache_deq(cache);
  825. cache->num_pages--;
  826. spin_unlock_irqrestore(&cache->lock, flags);
  827. return 0;
  828. }
  829. EXPORT_SYMBOL_GPL(gnttab_page_cache_get);
  830. void gnttab_page_cache_put(struct gnttab_page_cache *cache, struct page **page,
  831. unsigned int num)
  832. {
  833. unsigned long flags;
  834. unsigned int i;
  835. spin_lock_irqsave(&cache->lock, flags);
  836. for (i = 0; i < num; i++)
  837. cache_enq(cache, page[i]);
  838. cache->num_pages += num;
  839. spin_unlock_irqrestore(&cache->lock, flags);
  840. }
  841. EXPORT_SYMBOL_GPL(gnttab_page_cache_put);
  842. void gnttab_page_cache_shrink(struct gnttab_page_cache *cache, unsigned int num)
  843. {
  844. struct page *page[10];
  845. unsigned int i = 0;
  846. unsigned long flags;
  847. spin_lock_irqsave(&cache->lock, flags);
  848. while (cache->num_pages > num) {
  849. page[i] = cache_deq(cache);
  850. cache->num_pages--;
  851. if (++i == ARRAY_SIZE(page)) {
  852. spin_unlock_irqrestore(&cache->lock, flags);
  853. gnttab_free_pages(i, page);
  854. i = 0;
  855. spin_lock_irqsave(&cache->lock, flags);
  856. }
  857. }
  858. spin_unlock_irqrestore(&cache->lock, flags);
  859. if (i != 0)
  860. gnttab_free_pages(i, page);
  861. }
  862. EXPORT_SYMBOL_GPL(gnttab_page_cache_shrink);
  863. void gnttab_pages_clear_private(int nr_pages, struct page **pages)
  864. {
  865. int i;
  866. for (i = 0; i < nr_pages; i++) {
  867. if (PagePrivate(pages[i])) {
  868. #if BITS_PER_LONG < 64
  869. kfree((void *)page_private(pages[i]));
  870. #endif
  871. ClearPagePrivate(pages[i]);
  872. }
  873. }
  874. }
  875. EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
  876. /**
  877. * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
  878. * @nr_pages; number of pages to free
  879. * @pages: the pages
  880. */
  881. void gnttab_free_pages(int nr_pages, struct page **pages)
  882. {
  883. gnttab_pages_clear_private(nr_pages, pages);
  884. xen_free_unpopulated_pages(nr_pages, pages);
  885. }
  886. EXPORT_SYMBOL_GPL(gnttab_free_pages);
  887. #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
  888. /**
  889. * gnttab_dma_alloc_pages - alloc DMAable pages suitable for grant mapping into
  890. * @args: arguments to the function
  891. */
  892. int gnttab_dma_alloc_pages(struct gnttab_dma_alloc_args *args)
  893. {
  894. unsigned long pfn, start_pfn;
  895. size_t size;
  896. int i, ret;
  897. if (args->nr_pages < 0 || args->nr_pages > (INT_MAX >> PAGE_SHIFT))
  898. return -ENOMEM;
  899. size = args->nr_pages << PAGE_SHIFT;
  900. if (args->coherent)
  901. args->vaddr = dma_alloc_coherent(args->dev, size,
  902. &args->dev_bus_addr,
  903. GFP_KERNEL | __GFP_NOWARN);
  904. else
  905. args->vaddr = dma_alloc_wc(args->dev, size,
  906. &args->dev_bus_addr,
  907. GFP_KERNEL | __GFP_NOWARN);
  908. if (!args->vaddr) {
  909. pr_debug("Failed to allocate DMA buffer of size %zu\n", size);
  910. return -ENOMEM;
  911. }
  912. start_pfn = __phys_to_pfn(args->dev_bus_addr);
  913. for (pfn = start_pfn, i = 0; pfn < start_pfn + args->nr_pages;
  914. pfn++, i++) {
  915. struct page *page = pfn_to_page(pfn);
  916. args->pages[i] = page;
  917. args->frames[i] = xen_page_to_gfn(page);
  918. xenmem_reservation_scrub_page(page);
  919. }
  920. xenmem_reservation_va_mapping_reset(args->nr_pages, args->pages);
  921. ret = xenmem_reservation_decrease(args->nr_pages, args->frames);
  922. if (ret != args->nr_pages) {
  923. pr_debug("Failed to decrease reservation for DMA buffer\n");
  924. ret = -EFAULT;
  925. goto fail;
  926. }
  927. ret = gnttab_pages_set_private(args->nr_pages, args->pages);
  928. if (ret < 0)
  929. goto fail;
  930. return 0;
  931. fail:
  932. gnttab_dma_free_pages(args);
  933. return ret;
  934. }
  935. EXPORT_SYMBOL_GPL(gnttab_dma_alloc_pages);
  936. /**
  937. * gnttab_dma_free_pages - free DMAable pages
  938. * @args: arguments to the function
  939. */
  940. int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args)
  941. {
  942. size_t size;
  943. int i, ret;
  944. gnttab_pages_clear_private(args->nr_pages, args->pages);
  945. for (i = 0; i < args->nr_pages; i++)
  946. args->frames[i] = page_to_xen_pfn(args->pages[i]);
  947. ret = xenmem_reservation_increase(args->nr_pages, args->frames);
  948. if (ret != args->nr_pages) {
  949. pr_debug("Failed to increase reservation for DMA buffer\n");
  950. ret = -EFAULT;
  951. } else {
  952. ret = 0;
  953. }
  954. xenmem_reservation_va_mapping_update(args->nr_pages, args->pages,
  955. args->frames);
  956. size = args->nr_pages << PAGE_SHIFT;
  957. if (args->coherent)
  958. dma_free_coherent(args->dev, size,
  959. args->vaddr, args->dev_bus_addr);
  960. else
  961. dma_free_wc(args->dev, size,
  962. args->vaddr, args->dev_bus_addr);
  963. return ret;
  964. }
  965. EXPORT_SYMBOL_GPL(gnttab_dma_free_pages);
  966. #endif
  967. /* Handling of paged out grant targets (GNTST_eagain) */
  968. #define MAX_DELAY 256
  969. static inline void
  970. gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
  971. const char *func)
  972. {
  973. unsigned delay = 1;
  974. do {
  975. BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
  976. if (*status == GNTST_eagain)
  977. msleep(delay++);
  978. } while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
  979. if (delay >= MAX_DELAY) {
  980. pr_err("%s: %s eagain grant\n", func, current->comm);
  981. *status = GNTST_bad_page;
  982. }
  983. }
  984. void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
  985. {
  986. struct gnttab_map_grant_ref *op;
  987. if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
  988. BUG();
  989. for (op = batch; op < batch + count; op++)
  990. if (op->status == GNTST_eagain)
  991. gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
  992. &op->status, __func__);
  993. }
  994. EXPORT_SYMBOL_GPL(gnttab_batch_map);
  995. void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
  996. {
  997. struct gnttab_copy *op;
  998. if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
  999. BUG();
  1000. for (op = batch; op < batch + count; op++)
  1001. if (op->status == GNTST_eagain)
  1002. gnttab_retry_eagain_gop(GNTTABOP_copy, op,
  1003. &op->status, __func__);
  1004. }
  1005. EXPORT_SYMBOL_GPL(gnttab_batch_copy);
  1006. void gnttab_foreach_grant_in_range(struct page *page,
  1007. unsigned int offset,
  1008. unsigned int len,
  1009. xen_grant_fn_t fn,
  1010. void *data)
  1011. {
  1012. unsigned int goffset;
  1013. unsigned int glen;
  1014. unsigned long xen_pfn;
  1015. len = min_t(unsigned int, PAGE_SIZE - offset, len);
  1016. goffset = xen_offset_in_page(offset);
  1017. xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(offset);
  1018. while (len) {
  1019. glen = min_t(unsigned int, XEN_PAGE_SIZE - goffset, len);
  1020. fn(pfn_to_gfn(xen_pfn), goffset, glen, data);
  1021. goffset = 0;
  1022. xen_pfn++;
  1023. len -= glen;
  1024. }
  1025. }
  1026. EXPORT_SYMBOL_GPL(gnttab_foreach_grant_in_range);
  1027. void gnttab_foreach_grant(struct page **pages,
  1028. unsigned int nr_grefs,
  1029. xen_grant_fn_t fn,
  1030. void *data)
  1031. {
  1032. unsigned int goffset = 0;
  1033. unsigned long xen_pfn = 0;
  1034. unsigned int i;
  1035. for (i = 0; i < nr_grefs; i++) {
  1036. if ((i % XEN_PFN_PER_PAGE) == 0) {
  1037. xen_pfn = page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]);
  1038. goffset = 0;
  1039. }
  1040. fn(pfn_to_gfn(xen_pfn), goffset, XEN_PAGE_SIZE, data);
  1041. goffset += XEN_PAGE_SIZE;
  1042. xen_pfn++;
  1043. }
  1044. }
  1045. int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
  1046. struct gnttab_map_grant_ref *kmap_ops,
  1047. struct page **pages, unsigned int count)
  1048. {
  1049. int i, ret;
  1050. ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
  1051. if (ret)
  1052. return ret;
  1053. for (i = 0; i < count; i++) {
  1054. switch (map_ops[i].status) {
  1055. case GNTST_okay:
  1056. {
  1057. struct xen_page_foreign *foreign;
  1058. SetPageForeign(pages[i]);
  1059. foreign = xen_page_foreign(pages[i]);
  1060. foreign->domid = map_ops[i].dom;
  1061. foreign->gref = map_ops[i].ref;
  1062. break;
  1063. }
  1064. case GNTST_no_device_space:
  1065. pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n");
  1066. break;
  1067. case GNTST_eagain:
  1068. /* Retry eagain maps */
  1069. gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref,
  1070. map_ops + i,
  1071. &map_ops[i].status, __func__);
  1072. /* Test status in next loop iteration. */
  1073. i--;
  1074. break;
  1075. default:
  1076. break;
  1077. }
  1078. }
  1079. return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count);
  1080. }
  1081. EXPORT_SYMBOL_GPL(gnttab_map_refs);
  1082. int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
  1083. struct gnttab_unmap_grant_ref *kunmap_ops,
  1084. struct page **pages, unsigned int count)
  1085. {
  1086. unsigned int i;
  1087. int ret;
  1088. ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
  1089. if (ret)
  1090. return ret;
  1091. for (i = 0; i < count; i++)
  1092. ClearPageForeign(pages[i]);
  1093. return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count);
  1094. }
  1095. EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
  1096. #define GNTTAB_UNMAP_REFS_DELAY 5
  1097. static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item);
  1098. static void gnttab_unmap_work(struct work_struct *work)
  1099. {
  1100. struct gntab_unmap_queue_data
  1101. *unmap_data = container_of(work,
  1102. struct gntab_unmap_queue_data,
  1103. gnttab_work.work);
  1104. if (unmap_data->age != UINT_MAX)
  1105. unmap_data->age++;
  1106. __gnttab_unmap_refs_async(unmap_data);
  1107. }
  1108. static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
  1109. {
  1110. int ret;
  1111. int pc;
  1112. for (pc = 0; pc < item->count; pc++) {
  1113. if (page_count(item->pages[pc]) > 1) {
  1114. unsigned long delay = GNTTAB_UNMAP_REFS_DELAY * (item->age + 1);
  1115. schedule_delayed_work(&item->gnttab_work,
  1116. msecs_to_jiffies(delay));
  1117. return;
  1118. }
  1119. }
  1120. ret = gnttab_unmap_refs(item->unmap_ops, item->kunmap_ops,
  1121. item->pages, item->count);
  1122. item->done(ret, item);
  1123. }
  1124. void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
  1125. {
  1126. INIT_DELAYED_WORK(&item->gnttab_work, gnttab_unmap_work);
  1127. item->age = 0;
  1128. __gnttab_unmap_refs_async(item);
  1129. }
  1130. EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async);
  1131. static void unmap_refs_callback(int result,
  1132. struct gntab_unmap_queue_data *data)
  1133. {
  1134. struct unmap_refs_callback_data *d = data->data;
  1135. d->result = result;
  1136. complete(&d->completion);
  1137. }
  1138. int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item)
  1139. {
  1140. struct unmap_refs_callback_data data;
  1141. init_completion(&data.completion);
  1142. item->data = &data;
  1143. item->done = &unmap_refs_callback;
  1144. gnttab_unmap_refs_async(item);
  1145. wait_for_completion(&data.completion);
  1146. return data.result;
  1147. }
  1148. EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
  1149. static unsigned int nr_status_frames(unsigned int nr_grant_frames)
  1150. {
  1151. return gnttab_frames(nr_grant_frames, SPP);
  1152. }
  1153. static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
  1154. {
  1155. int rc;
  1156. rc = arch_gnttab_map_shared(frames, nr_gframes,
  1157. gnttab_max_grant_frames(),
  1158. &gnttab_shared.addr);
  1159. BUG_ON(rc);
  1160. return 0;
  1161. }
  1162. static void gnttab_unmap_frames_v1(void)
  1163. {
  1164. arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
  1165. }
  1166. static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
  1167. {
  1168. uint64_t *sframes;
  1169. unsigned int nr_sframes;
  1170. struct gnttab_get_status_frames getframes;
  1171. int rc;
  1172. nr_sframes = nr_status_frames(nr_gframes);
  1173. /* No need for kzalloc as it is initialized in following hypercall
  1174. * GNTTABOP_get_status_frames.
  1175. */
  1176. sframes = kmalloc_array(nr_sframes, sizeof(uint64_t), GFP_ATOMIC);
  1177. if (!sframes)
  1178. return -ENOMEM;
  1179. getframes.dom = DOMID_SELF;
  1180. getframes.nr_frames = nr_sframes;
  1181. set_xen_guest_handle(getframes.frame_list, sframes);
  1182. rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
  1183. &getframes, 1);
  1184. if (rc == -ENOSYS) {
  1185. kfree(sframes);
  1186. return -ENOSYS;
  1187. }
  1188. BUG_ON(rc || getframes.status);
  1189. rc = arch_gnttab_map_status(sframes, nr_sframes,
  1190. nr_status_frames(gnttab_max_grant_frames()),
  1191. &grstatus);
  1192. BUG_ON(rc);
  1193. kfree(sframes);
  1194. rc = arch_gnttab_map_shared(frames, nr_gframes,
  1195. gnttab_max_grant_frames(),
  1196. &gnttab_shared.addr);
  1197. BUG_ON(rc);
  1198. return 0;
  1199. }
  1200. static void gnttab_unmap_frames_v2(void)
  1201. {
  1202. arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
  1203. arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
  1204. }
  1205. static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
  1206. {
  1207. struct gnttab_setup_table setup;
  1208. xen_pfn_t *frames;
  1209. unsigned int nr_gframes = end_idx + 1;
  1210. int rc;
  1211. if (xen_feature(XENFEAT_auto_translated_physmap)) {
  1212. struct xen_add_to_physmap xatp;
  1213. unsigned int i = end_idx;
  1214. rc = 0;
  1215. BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
  1216. /*
  1217. * Loop backwards, so that the first hypercall has the largest
  1218. * index, ensuring that the table will grow only once.
  1219. */
  1220. do {
  1221. xatp.domid = DOMID_SELF;
  1222. xatp.idx = i;
  1223. xatp.space = XENMAPSPACE_grant_table;
  1224. xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
  1225. rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
  1226. if (rc != 0) {
  1227. pr_warn("grant table add_to_physmap failed, err=%d\n",
  1228. rc);
  1229. break;
  1230. }
  1231. } while (i-- > start_idx);
  1232. return rc;
  1233. }
  1234. /* No need for kzalloc as it is initialized in following hypercall
  1235. * GNTTABOP_setup_table.
  1236. */
  1237. frames = kmalloc_array(nr_gframes, sizeof(unsigned long), GFP_ATOMIC);
  1238. if (!frames)
  1239. return -ENOMEM;
  1240. setup.dom = DOMID_SELF;
  1241. setup.nr_frames = nr_gframes;
  1242. set_xen_guest_handle(setup.frame_list, frames);
  1243. rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
  1244. if (rc == -ENOSYS) {
  1245. kfree(frames);
  1246. return -ENOSYS;
  1247. }
  1248. BUG_ON(rc || setup.status);
  1249. rc = gnttab_interface->map_frames(frames, nr_gframes);
  1250. kfree(frames);
  1251. return rc;
  1252. }
  1253. static const struct gnttab_ops gnttab_v1_ops = {
  1254. .version = 1,
  1255. .grefs_per_grant_frame = XEN_PAGE_SIZE /
  1256. sizeof(struct grant_entry_v1),
  1257. .map_frames = gnttab_map_frames_v1,
  1258. .unmap_frames = gnttab_unmap_frames_v1,
  1259. .update_entry = gnttab_update_entry_v1,
  1260. .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1,
  1261. .read_frame = gnttab_read_frame_v1,
  1262. };
  1263. static const struct gnttab_ops gnttab_v2_ops = {
  1264. .version = 2,
  1265. .grefs_per_grant_frame = XEN_PAGE_SIZE /
  1266. sizeof(union grant_entry_v2),
  1267. .map_frames = gnttab_map_frames_v2,
  1268. .unmap_frames = gnttab_unmap_frames_v2,
  1269. .update_entry = gnttab_update_entry_v2,
  1270. .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
  1271. .read_frame = gnttab_read_frame_v2,
  1272. };
  1273. static bool gnttab_need_v2(void)
  1274. {
  1275. #ifdef CONFIG_X86
  1276. uint32_t base, width;
  1277. if (xen_pv_domain()) {
  1278. base = xen_cpuid_base();
  1279. if (cpuid_eax(base) < 5)
  1280. return false; /* Information not available, use V1. */
  1281. width = cpuid_ebx(base + 5) &
  1282. XEN_CPUID_MACHINE_ADDRESS_WIDTH_MASK;
  1283. return width > 32 + PAGE_SHIFT;
  1284. }
  1285. #endif
  1286. return !!(max_possible_pfn >> 32);
  1287. }
  1288. static void gnttab_request_version(void)
  1289. {
  1290. long rc;
  1291. struct gnttab_set_version gsv;
  1292. if (gnttab_need_v2())
  1293. gsv.version = 2;
  1294. else
  1295. gsv.version = 1;
  1296. /* Boot parameter overrides automatic selection. */
  1297. if (xen_gnttab_version >= 1 && xen_gnttab_version <= 2)
  1298. gsv.version = xen_gnttab_version;
  1299. rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
  1300. if (rc == 0 && gsv.version == 2)
  1301. gnttab_interface = &gnttab_v2_ops;
  1302. else
  1303. gnttab_interface = &gnttab_v1_ops;
  1304. pr_info("Grant tables using version %d layout\n",
  1305. gnttab_interface->version);
  1306. }
  1307. static int gnttab_setup(void)
  1308. {
  1309. unsigned int max_nr_gframes;
  1310. max_nr_gframes = gnttab_max_grant_frames();
  1311. if (max_nr_gframes < nr_grant_frames)
  1312. return -ENOSYS;
  1313. if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
  1314. gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
  1315. if (gnttab_shared.addr == NULL) {
  1316. pr_warn("gnttab share frames is not mapped!\n");
  1317. return -ENOMEM;
  1318. }
  1319. }
  1320. return gnttab_map(0, nr_grant_frames - 1);
  1321. }
  1322. int gnttab_resume(void)
  1323. {
  1324. gnttab_request_version();
  1325. return gnttab_setup();
  1326. }
  1327. int gnttab_suspend(void)
  1328. {
  1329. if (!xen_feature(XENFEAT_auto_translated_physmap))
  1330. gnttab_interface->unmap_frames();
  1331. return 0;
  1332. }
  1333. static int gnttab_expand(unsigned int req_entries)
  1334. {
  1335. int rc;
  1336. unsigned int cur, extra;
  1337. cur = nr_grant_frames;
  1338. extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
  1339. gnttab_interface->grefs_per_grant_frame);
  1340. if (cur + extra > gnttab_max_grant_frames()) {
  1341. pr_warn_ratelimited("xen/grant-table: max_grant_frames reached"
  1342. " cur=%u extra=%u limit=%u"
  1343. " gnttab_free_count=%u req_entries=%u\n",
  1344. cur, extra, gnttab_max_grant_frames(),
  1345. gnttab_free_count, req_entries);
  1346. return -ENOSPC;
  1347. }
  1348. rc = gnttab_map(cur, cur + extra - 1);
  1349. if (rc == 0)
  1350. rc = grow_gnttab_list(extra);
  1351. return rc;
  1352. }
  1353. int gnttab_init(void)
  1354. {
  1355. int i;
  1356. unsigned long max_nr_grant_frames, max_nr_grefs;
  1357. unsigned int max_nr_glist_frames, nr_glist_frames;
  1358. int ret;
  1359. gnttab_request_version();
  1360. max_nr_grant_frames = gnttab_max_grant_frames();
  1361. max_nr_grefs = max_nr_grant_frames *
  1362. gnttab_interface->grefs_per_grant_frame;
  1363. nr_grant_frames = 1;
  1364. /* Determine the maximum number of frames required for the
  1365. * grant reference free list on the current hypervisor.
  1366. */
  1367. max_nr_glist_frames = max_nr_grefs / RPP;
  1368. gnttab_list = kmalloc_array(max_nr_glist_frames,
  1369. sizeof(grant_ref_t *),
  1370. GFP_KERNEL);
  1371. if (gnttab_list == NULL)
  1372. return -ENOMEM;
  1373. nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
  1374. for (i = 0; i < nr_glist_frames; i++) {
  1375. gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
  1376. if (gnttab_list[i] == NULL) {
  1377. ret = -ENOMEM;
  1378. goto ini_nomem;
  1379. }
  1380. }
  1381. gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
  1382. if (!gnttab_free_bitmap) {
  1383. ret = -ENOMEM;
  1384. goto ini_nomem;
  1385. }
  1386. ret = arch_gnttab_init(max_nr_grant_frames,
  1387. nr_status_frames(max_nr_grant_frames));
  1388. if (ret < 0)
  1389. goto ini_nomem;
  1390. if (gnttab_setup() < 0) {
  1391. ret = -ENODEV;
  1392. goto ini_nomem;
  1393. }
  1394. gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
  1395. gnttab_set_free(GNTTAB_NR_RESERVED_ENTRIES,
  1396. gnttab_size - GNTTAB_NR_RESERVED_ENTRIES);
  1397. printk("Grant table initialized\n");
  1398. return 0;
  1399. ini_nomem:
  1400. for (i--; i >= 0; i--)
  1401. free_page((unsigned long)gnttab_list[i]);
  1402. kfree(gnttab_list);
  1403. bitmap_free(gnttab_free_bitmap);
  1404. return ret;
  1405. }
  1406. EXPORT_SYMBOL_GPL(gnttab_init);
  1407. static int __gnttab_init(void)
  1408. {
  1409. if (!xen_domain())
  1410. return -ENODEV;
  1411. /* Delay grant-table initialization in the PV on HVM case */
  1412. if (xen_hvm_domain() && !xen_pvh_domain())
  1413. return 0;
  1414. return gnttab_init();
  1415. }
  1416. /* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
  1417. * beforehand to initialize xen_auto_xlat_grant_frames. */
  1418. core_initcall_sync(__gnttab_init);