mmu.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * KVM/MIPS MMU handling in the KVM module.
  7. *
  8. * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
  9. * Authors: Sanjay Lal <[email protected]>
  10. */
  11. #include <linux/highmem.h>
  12. #include <linux/kvm_host.h>
  13. #include <linux/uaccess.h>
  14. #include <asm/mmu_context.h>
  15. #include <asm/pgalloc.h>
  16. /*
  17. * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
  18. * for which pages need to be cached.
  19. */
  20. #if defined(__PAGETABLE_PMD_FOLDED)
  21. #define KVM_MMU_CACHE_MIN_PAGES 1
  22. #else
  23. #define KVM_MMU_CACHE_MIN_PAGES 2
  24. #endif
  25. void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
  26. {
  27. kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
  28. }
  29. /**
  30. * kvm_pgd_init() - Initialise KVM GPA page directory.
  31. * @page: Pointer to page directory (PGD) for KVM GPA.
  32. *
  33. * Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
  34. * representing no mappings. This is similar to pgd_init(), however it
  35. * initialises all the page directory pointers, not just the ones corresponding
  36. * to the userland address space (since it is for the guest physical address
  37. * space rather than a virtual address space).
  38. */
  39. static void kvm_pgd_init(void *page)
  40. {
  41. unsigned long *p, *end;
  42. unsigned long entry;
  43. #ifdef __PAGETABLE_PMD_FOLDED
  44. entry = (unsigned long)invalid_pte_table;
  45. #else
  46. entry = (unsigned long)invalid_pmd_table;
  47. #endif
  48. p = (unsigned long *)page;
  49. end = p + PTRS_PER_PGD;
  50. do {
  51. p[0] = entry;
  52. p[1] = entry;
  53. p[2] = entry;
  54. p[3] = entry;
  55. p[4] = entry;
  56. p += 8;
  57. p[-3] = entry;
  58. p[-2] = entry;
  59. p[-1] = entry;
  60. } while (p != end);
  61. }
  62. /**
  63. * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
  64. *
  65. * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
  66. * to host physical page mappings.
  67. *
  68. * Returns: Pointer to new KVM GPA page directory.
  69. * NULL on allocation failure.
  70. */
  71. pgd_t *kvm_pgd_alloc(void)
  72. {
  73. pgd_t *ret;
  74. ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
  75. if (ret)
  76. kvm_pgd_init(ret);
  77. return ret;
  78. }
  79. /**
  80. * kvm_mips_walk_pgd() - Walk page table with optional allocation.
  81. * @pgd: Page directory pointer.
  82. * @addr: Address to index page table using.
  83. * @cache: MMU page cache to allocate new page tables from, or NULL.
  84. *
  85. * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
  86. * address @addr. If page tables don't exist for @addr, they will be created
  87. * from the MMU cache if @cache is not NULL.
  88. *
  89. * Returns: Pointer to pte_t corresponding to @addr.
  90. * NULL if a page table doesn't exist for @addr and !@cache.
  91. * NULL if a page table allocation failed.
  92. */
  93. static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
  94. unsigned long addr)
  95. {
  96. p4d_t *p4d;
  97. pud_t *pud;
  98. pmd_t *pmd;
  99. pgd += pgd_index(addr);
  100. if (pgd_none(*pgd)) {
  101. /* Not used on MIPS yet */
  102. BUG();
  103. return NULL;
  104. }
  105. p4d = p4d_offset(pgd, addr);
  106. pud = pud_offset(p4d, addr);
  107. if (pud_none(*pud)) {
  108. pmd_t *new_pmd;
  109. if (!cache)
  110. return NULL;
  111. new_pmd = kvm_mmu_memory_cache_alloc(cache);
  112. pmd_init((unsigned long)new_pmd,
  113. (unsigned long)invalid_pte_table);
  114. pud_populate(NULL, pud, new_pmd);
  115. }
  116. pmd = pmd_offset(pud, addr);
  117. if (pmd_none(*pmd)) {
  118. pte_t *new_pte;
  119. if (!cache)
  120. return NULL;
  121. new_pte = kvm_mmu_memory_cache_alloc(cache);
  122. clear_page(new_pte);
  123. pmd_populate_kernel(NULL, pmd, new_pte);
  124. }
  125. return pte_offset_kernel(pmd, addr);
  126. }
  127. /* Caller must hold kvm->mm_lock */
  128. static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
  129. struct kvm_mmu_memory_cache *cache,
  130. unsigned long addr)
  131. {
  132. return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
  133. }
  134. /*
  135. * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
  136. * Flush a range of guest physical address space from the VM's GPA page tables.
  137. */
  138. static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
  139. unsigned long end_gpa)
  140. {
  141. int i_min = pte_index(start_gpa);
  142. int i_max = pte_index(end_gpa);
  143. bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
  144. int i;
  145. for (i = i_min; i <= i_max; ++i) {
  146. if (!pte_present(pte[i]))
  147. continue;
  148. set_pte(pte + i, __pte(0));
  149. }
  150. return safe_to_remove;
  151. }
  152. static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
  153. unsigned long end_gpa)
  154. {
  155. pte_t *pte;
  156. unsigned long end = ~0ul;
  157. int i_min = pmd_index(start_gpa);
  158. int i_max = pmd_index(end_gpa);
  159. bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
  160. int i;
  161. for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
  162. if (!pmd_present(pmd[i]))
  163. continue;
  164. pte = pte_offset_kernel(pmd + i, 0);
  165. if (i == i_max)
  166. end = end_gpa;
  167. if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
  168. pmd_clear(pmd + i);
  169. pte_free_kernel(NULL, pte);
  170. } else {
  171. safe_to_remove = false;
  172. }
  173. }
  174. return safe_to_remove;
  175. }
  176. static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
  177. unsigned long end_gpa)
  178. {
  179. pmd_t *pmd;
  180. unsigned long end = ~0ul;
  181. int i_min = pud_index(start_gpa);
  182. int i_max = pud_index(end_gpa);
  183. bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
  184. int i;
  185. for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
  186. if (!pud_present(pud[i]))
  187. continue;
  188. pmd = pmd_offset(pud + i, 0);
  189. if (i == i_max)
  190. end = end_gpa;
  191. if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
  192. pud_clear(pud + i);
  193. pmd_free(NULL, pmd);
  194. } else {
  195. safe_to_remove = false;
  196. }
  197. }
  198. return safe_to_remove;
  199. }
  200. static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
  201. unsigned long end_gpa)
  202. {
  203. p4d_t *p4d;
  204. pud_t *pud;
  205. unsigned long end = ~0ul;
  206. int i_min = pgd_index(start_gpa);
  207. int i_max = pgd_index(end_gpa);
  208. bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
  209. int i;
  210. for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
  211. if (!pgd_present(pgd[i]))
  212. continue;
  213. p4d = p4d_offset(pgd, 0);
  214. pud = pud_offset(p4d + i, 0);
  215. if (i == i_max)
  216. end = end_gpa;
  217. if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
  218. pgd_clear(pgd + i);
  219. pud_free(NULL, pud);
  220. } else {
  221. safe_to_remove = false;
  222. }
  223. }
  224. return safe_to_remove;
  225. }
  226. /**
  227. * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
  228. * @kvm: KVM pointer.
  229. * @start_gfn: Guest frame number of first page in GPA range to flush.
  230. * @end_gfn: Guest frame number of last page in GPA range to flush.
  231. *
  232. * Flushes a range of GPA mappings from the GPA page tables.
  233. *
  234. * The caller must hold the @kvm->mmu_lock spinlock.
  235. *
  236. * Returns: Whether its safe to remove the top level page directory because
  237. * all lower levels have been removed.
  238. */
  239. bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
  240. {
  241. return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
  242. start_gfn << PAGE_SHIFT,
  243. end_gfn << PAGE_SHIFT);
  244. }
  245. #define BUILD_PTE_RANGE_OP(name, op) \
  246. static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \
  247. unsigned long end) \
  248. { \
  249. int ret = 0; \
  250. int i_min = pte_index(start); \
  251. int i_max = pte_index(end); \
  252. int i; \
  253. pte_t old, new; \
  254. \
  255. for (i = i_min; i <= i_max; ++i) { \
  256. if (!pte_present(pte[i])) \
  257. continue; \
  258. \
  259. old = pte[i]; \
  260. new = op(old); \
  261. if (pte_val(new) == pte_val(old)) \
  262. continue; \
  263. set_pte(pte + i, new); \
  264. ret = 1; \
  265. } \
  266. return ret; \
  267. } \
  268. \
  269. /* returns true if anything was done */ \
  270. static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \
  271. unsigned long end) \
  272. { \
  273. int ret = 0; \
  274. pte_t *pte; \
  275. unsigned long cur_end = ~0ul; \
  276. int i_min = pmd_index(start); \
  277. int i_max = pmd_index(end); \
  278. int i; \
  279. \
  280. for (i = i_min; i <= i_max; ++i, start = 0) { \
  281. if (!pmd_present(pmd[i])) \
  282. continue; \
  283. \
  284. pte = pte_offset_kernel(pmd + i, 0); \
  285. if (i == i_max) \
  286. cur_end = end; \
  287. \
  288. ret |= kvm_mips_##name##_pte(pte, start, cur_end); \
  289. } \
  290. return ret; \
  291. } \
  292. \
  293. static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \
  294. unsigned long end) \
  295. { \
  296. int ret = 0; \
  297. pmd_t *pmd; \
  298. unsigned long cur_end = ~0ul; \
  299. int i_min = pud_index(start); \
  300. int i_max = pud_index(end); \
  301. int i; \
  302. \
  303. for (i = i_min; i <= i_max; ++i, start = 0) { \
  304. if (!pud_present(pud[i])) \
  305. continue; \
  306. \
  307. pmd = pmd_offset(pud + i, 0); \
  308. if (i == i_max) \
  309. cur_end = end; \
  310. \
  311. ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \
  312. } \
  313. return ret; \
  314. } \
  315. \
  316. static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \
  317. unsigned long end) \
  318. { \
  319. int ret = 0; \
  320. p4d_t *p4d; \
  321. pud_t *pud; \
  322. unsigned long cur_end = ~0ul; \
  323. int i_min = pgd_index(start); \
  324. int i_max = pgd_index(end); \
  325. int i; \
  326. \
  327. for (i = i_min; i <= i_max; ++i, start = 0) { \
  328. if (!pgd_present(pgd[i])) \
  329. continue; \
  330. \
  331. p4d = p4d_offset(pgd, 0); \
  332. pud = pud_offset(p4d + i, 0); \
  333. if (i == i_max) \
  334. cur_end = end; \
  335. \
  336. ret |= kvm_mips_##name##_pud(pud, start, cur_end); \
  337. } \
  338. return ret; \
  339. }
  340. /*
  341. * kvm_mips_mkclean_gpa_pt.
  342. * Mark a range of guest physical address space clean (writes fault) in the VM's
  343. * GPA page table to allow dirty page tracking.
  344. */
  345. BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
  346. /**
  347. * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
  348. * @kvm: KVM pointer.
  349. * @start_gfn: Guest frame number of first page in GPA range to flush.
  350. * @end_gfn: Guest frame number of last page in GPA range to flush.
  351. *
  352. * Make a range of GPA mappings clean so that guest writes will fault and
  353. * trigger dirty page logging.
  354. *
  355. * The caller must hold the @kvm->mmu_lock spinlock.
  356. *
  357. * Returns: Whether any GPA mappings were modified, which would require
  358. * derived mappings (GVA page tables & TLB enties) to be
  359. * invalidated.
  360. */
  361. int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
  362. {
  363. return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
  364. start_gfn << PAGE_SHIFT,
  365. end_gfn << PAGE_SHIFT);
  366. }
  367. /**
  368. * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
  369. * @kvm: The KVM pointer
  370. * @slot: The memory slot associated with mask
  371. * @gfn_offset: The gfn offset in memory slot
  372. * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
  373. * slot to be write protected
  374. *
  375. * Walks bits set in mask write protects the associated pte's. Caller must
  376. * acquire @kvm->mmu_lock.
  377. */
  378. void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
  379. struct kvm_memory_slot *slot,
  380. gfn_t gfn_offset, unsigned long mask)
  381. {
  382. gfn_t base_gfn = slot->base_gfn + gfn_offset;
  383. gfn_t start = base_gfn + __ffs(mask);
  384. gfn_t end = base_gfn + __fls(mask);
  385. kvm_mips_mkclean_gpa_pt(kvm, start, end);
  386. }
  387. /*
  388. * kvm_mips_mkold_gpa_pt.
  389. * Mark a range of guest physical address space old (all accesses fault) in the
  390. * VM's GPA page table to allow detection of commonly used pages.
  391. */
  392. BUILD_PTE_RANGE_OP(mkold, pte_mkold)
  393. static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
  394. gfn_t end_gfn)
  395. {
  396. return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
  397. start_gfn << PAGE_SHIFT,
  398. end_gfn << PAGE_SHIFT);
  399. }
  400. bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
  401. {
  402. kvm_mips_flush_gpa_pt(kvm, range->start, range->end);
  403. return true;
  404. }
  405. bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
  406. {
  407. gpa_t gpa = range->start << PAGE_SHIFT;
  408. pte_t hva_pte = range->pte;
  409. pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
  410. pte_t old_pte;
  411. if (!gpa_pte)
  412. return false;
  413. /* Mapping may need adjusting depending on memslot flags */
  414. old_pte = *gpa_pte;
  415. if (range->slot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
  416. hva_pte = pte_mkclean(hva_pte);
  417. else if (range->slot->flags & KVM_MEM_READONLY)
  418. hva_pte = pte_wrprotect(hva_pte);
  419. set_pte(gpa_pte, hva_pte);
  420. /* Replacing an absent or old page doesn't need flushes */
  421. if (!pte_present(old_pte) || !pte_young(old_pte))
  422. return false;
  423. /* Pages swapped, aged, moved, or cleaned require flushes */
  424. return !pte_present(hva_pte) ||
  425. !pte_young(hva_pte) ||
  426. pte_pfn(old_pte) != pte_pfn(hva_pte) ||
  427. (pte_dirty(old_pte) && !pte_dirty(hva_pte));
  428. }
  429. bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
  430. {
  431. return kvm_mips_mkold_gpa_pt(kvm, range->start, range->end);
  432. }
  433. bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
  434. {
  435. gpa_t gpa = range->start << PAGE_SHIFT;
  436. pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
  437. if (!gpa_pte)
  438. return false;
  439. return pte_young(*gpa_pte);
  440. }
  441. /**
  442. * _kvm_mips_map_page_fast() - Fast path GPA fault handler.
  443. * @vcpu: VCPU pointer.
  444. * @gpa: Guest physical address of fault.
  445. * @write_fault: Whether the fault was due to a write.
  446. * @out_entry: New PTE for @gpa (written on success unless NULL).
  447. * @out_buddy: New PTE for @gpa's buddy (written on success unless
  448. * NULL).
  449. *
  450. * Perform fast path GPA fault handling, doing all that can be done without
  451. * calling into KVM. This handles marking old pages young (for idle page
  452. * tracking), and dirtying of clean pages (for dirty page logging).
  453. *
  454. * Returns: 0 on success, in which case we can update derived mappings and
  455. * resume guest execution.
  456. * -EFAULT on failure due to absent GPA mapping or write to
  457. * read-only page, in which case KVM must be consulted.
  458. */
  459. static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
  460. bool write_fault,
  461. pte_t *out_entry, pte_t *out_buddy)
  462. {
  463. struct kvm *kvm = vcpu->kvm;
  464. gfn_t gfn = gpa >> PAGE_SHIFT;
  465. pte_t *ptep;
  466. kvm_pfn_t pfn = 0; /* silence bogus GCC warning */
  467. bool pfn_valid = false;
  468. int ret = 0;
  469. spin_lock(&kvm->mmu_lock);
  470. /* Fast path - just check GPA page table for an existing entry */
  471. ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
  472. if (!ptep || !pte_present(*ptep)) {
  473. ret = -EFAULT;
  474. goto out;
  475. }
  476. /* Track access to pages marked old */
  477. if (!pte_young(*ptep)) {
  478. set_pte(ptep, pte_mkyoung(*ptep));
  479. pfn = pte_pfn(*ptep);
  480. pfn_valid = true;
  481. /* call kvm_set_pfn_accessed() after unlock */
  482. }
  483. if (write_fault && !pte_dirty(*ptep)) {
  484. if (!pte_write(*ptep)) {
  485. ret = -EFAULT;
  486. goto out;
  487. }
  488. /* Track dirtying of writeable pages */
  489. set_pte(ptep, pte_mkdirty(*ptep));
  490. pfn = pte_pfn(*ptep);
  491. mark_page_dirty(kvm, gfn);
  492. kvm_set_pfn_dirty(pfn);
  493. }
  494. if (out_entry)
  495. *out_entry = *ptep;
  496. if (out_buddy)
  497. *out_buddy = *ptep_buddy(ptep);
  498. out:
  499. spin_unlock(&kvm->mmu_lock);
  500. if (pfn_valid)
  501. kvm_set_pfn_accessed(pfn);
  502. return ret;
  503. }
  504. /**
  505. * kvm_mips_map_page() - Map a guest physical page.
  506. * @vcpu: VCPU pointer.
  507. * @gpa: Guest physical address of fault.
  508. * @write_fault: Whether the fault was due to a write.
  509. * @out_entry: New PTE for @gpa (written on success unless NULL).
  510. * @out_buddy: New PTE for @gpa's buddy (written on success unless
  511. * NULL).
  512. *
  513. * Handle GPA faults by creating a new GPA mapping (or updating an existing
  514. * one).
  515. *
  516. * This takes care of marking pages young or dirty (idle/dirty page tracking),
  517. * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
  518. * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
  519. * caller.
  520. *
  521. * Returns: 0 on success, in which case the caller may use the @out_entry
  522. * and @out_buddy PTEs to update derived mappings and resume guest
  523. * execution.
  524. * -EFAULT if there is no memory region at @gpa or a write was
  525. * attempted to a read-only memory region. This is usually handled
  526. * as an MMIO access.
  527. */
  528. static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
  529. bool write_fault,
  530. pte_t *out_entry, pte_t *out_buddy)
  531. {
  532. struct kvm *kvm = vcpu->kvm;
  533. struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
  534. gfn_t gfn = gpa >> PAGE_SHIFT;
  535. int srcu_idx, err;
  536. kvm_pfn_t pfn;
  537. pte_t *ptep, entry;
  538. bool writeable;
  539. unsigned long prot_bits;
  540. unsigned long mmu_seq;
  541. /* Try the fast path to handle old / clean pages */
  542. srcu_idx = srcu_read_lock(&kvm->srcu);
  543. err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
  544. out_buddy);
  545. if (!err)
  546. goto out;
  547. /* We need a minimum of cached pages ready for page table creation */
  548. err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
  549. if (err)
  550. goto out;
  551. retry:
  552. /*
  553. * Used to check for invalidations in progress, of the pfn that is
  554. * returned by pfn_to_pfn_prot below.
  555. */
  556. mmu_seq = kvm->mmu_invalidate_seq;
  557. /*
  558. * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads
  559. * in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
  560. * risk the page we get a reference to getting unmapped before we have a
  561. * chance to grab the mmu_lock without mmu_invalidate_retry() noticing.
  562. *
  563. * This smp_rmb() pairs with the effective smp_wmb() of the combination
  564. * of the pte_unmap_unlock() after the PTE is zapped, and the
  565. * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
  566. * mmu_invalidate_seq is incremented.
  567. */
  568. smp_rmb();
  569. /* Slow path - ask KVM core whether we can access this GPA */
  570. pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
  571. if (is_error_noslot_pfn(pfn)) {
  572. err = -EFAULT;
  573. goto out;
  574. }
  575. spin_lock(&kvm->mmu_lock);
  576. /* Check if an invalidation has taken place since we got pfn */
  577. if (mmu_invalidate_retry(kvm, mmu_seq)) {
  578. /*
  579. * This can happen when mappings are changed asynchronously, but
  580. * also synchronously if a COW is triggered by
  581. * gfn_to_pfn_prot().
  582. */
  583. spin_unlock(&kvm->mmu_lock);
  584. kvm_release_pfn_clean(pfn);
  585. goto retry;
  586. }
  587. /* Ensure page tables are allocated */
  588. ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
  589. /* Set up the PTE */
  590. prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
  591. if (writeable) {
  592. prot_bits |= _PAGE_WRITE;
  593. if (write_fault) {
  594. prot_bits |= __WRITEABLE;
  595. mark_page_dirty(kvm, gfn);
  596. kvm_set_pfn_dirty(pfn);
  597. }
  598. }
  599. entry = pfn_pte(pfn, __pgprot(prot_bits));
  600. /* Write the PTE */
  601. set_pte(ptep, entry);
  602. err = 0;
  603. if (out_entry)
  604. *out_entry = *ptep;
  605. if (out_buddy)
  606. *out_buddy = *ptep_buddy(ptep);
  607. spin_unlock(&kvm->mmu_lock);
  608. kvm_release_pfn_clean(pfn);
  609. kvm_set_pfn_accessed(pfn);
  610. out:
  611. srcu_read_unlock(&kvm->srcu, srcu_idx);
  612. return err;
  613. }
  614. int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
  615. struct kvm_vcpu *vcpu,
  616. bool write_fault)
  617. {
  618. int ret;
  619. ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
  620. if (ret)
  621. return ret;
  622. /* Invalidate this entry in the TLB */
  623. return kvm_vz_host_tlb_inv(vcpu, badvaddr);
  624. }
  625. /**
  626. * kvm_mips_migrate_count() - Migrate timer.
  627. * @vcpu: Virtual CPU.
  628. *
  629. * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
  630. * if it was running prior to being cancelled.
  631. *
  632. * Must be called when the VCPU is migrated to a different CPU to ensure that
  633. * timer expiry during guest execution interrupts the guest and causes the
  634. * interrupt to be delivered in a timely manner.
  635. */
  636. static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
  637. {
  638. if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
  639. hrtimer_restart(&vcpu->arch.comparecount_timer);
  640. }
  641. /* Restore ASID once we are scheduled back after preemption */
  642. void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  643. {
  644. unsigned long flags;
  645. kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
  646. local_irq_save(flags);
  647. vcpu->cpu = cpu;
  648. if (vcpu->arch.last_sched_cpu != cpu) {
  649. kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
  650. vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
  651. /*
  652. * Migrate the timer interrupt to the current CPU so that it
  653. * always interrupts the guest and synchronously triggers a
  654. * guest timer interrupt.
  655. */
  656. kvm_mips_migrate_count(vcpu);
  657. }
  658. /* restore guest state to registers */
  659. kvm_mips_callbacks->vcpu_load(vcpu, cpu);
  660. local_irq_restore(flags);
  661. }
  662. /* ASID can change if another task is scheduled during preemption */
  663. void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  664. {
  665. unsigned long flags;
  666. int cpu;
  667. local_irq_save(flags);
  668. cpu = smp_processor_id();
  669. vcpu->arch.last_sched_cpu = cpu;
  670. vcpu->cpu = -1;
  671. /* save guest state in registers */
  672. kvm_mips_callbacks->vcpu_put(vcpu, cpu);
  673. local_irq_restore(flags);
  674. }