trans_pgd.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Transitional page tables for kexec and hibernate
  4. *
  5. * This file derived from: arch/arm64/kernel/hibernate.c
  6. *
  7. * Copyright (c) 2021, Microsoft Corporation.
  8. * Pasha Tatashin <[email protected]>
  9. *
  10. */
  11. /*
  12. * Transitional tables are used during system transferring from one world to
  13. * another: such as during hibernate restore, and kexec reboots. During these
  14. * phases one cannot rely on page table not being overwritten. This is because
  15. * hibernate and kexec can overwrite the current page tables during transition.
  16. */
  17. #include <asm/trans_pgd.h>
  18. #include <asm/pgalloc.h>
  19. #include <asm/pgtable.h>
  20. #include <linux/suspend.h>
  21. #include <linux/bug.h>
  22. #include <linux/mm.h>
  23. #include <linux/mmzone.h>
  24. static void *trans_alloc(struct trans_pgd_info *info)
  25. {
  26. return info->trans_alloc_page(info->trans_alloc_arg);
  27. }
  28. static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
  29. {
  30. pte_t pte = READ_ONCE(*src_ptep);
  31. if (pte_valid(pte)) {
  32. /*
  33. * Resume will overwrite areas that may be marked
  34. * read only (code, rodata). Clear the RDONLY bit from
  35. * the temporary mappings we use during restore.
  36. */
  37. set_pte(dst_ptep, pte_mkwrite(pte));
  38. } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
  39. /*
  40. * debug_pagealloc will removed the PTE_VALID bit if
  41. * the page isn't in use by the resume kernel. It may have
  42. * been in use by the original kernel, in which case we need
  43. * to put it back in our copy to do the restore.
  44. *
  45. * Before marking this entry valid, check the pfn should
  46. * be mapped.
  47. */
  48. BUG_ON(!pfn_valid(pte_pfn(pte)));
  49. set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
  50. }
  51. }
  52. static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp,
  53. pmd_t *src_pmdp, unsigned long start, unsigned long end)
  54. {
  55. pte_t *src_ptep;
  56. pte_t *dst_ptep;
  57. unsigned long addr = start;
  58. dst_ptep = trans_alloc(info);
  59. if (!dst_ptep)
  60. return -ENOMEM;
  61. pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
  62. dst_ptep = pte_offset_kernel(dst_pmdp, start);
  63. src_ptep = pte_offset_kernel(src_pmdp, start);
  64. do {
  65. _copy_pte(dst_ptep, src_ptep, addr);
  66. } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
  67. return 0;
  68. }
  69. static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp,
  70. pud_t *src_pudp, unsigned long start, unsigned long end)
  71. {
  72. pmd_t *src_pmdp;
  73. pmd_t *dst_pmdp;
  74. unsigned long next;
  75. unsigned long addr = start;
  76. if (pud_none(READ_ONCE(*dst_pudp))) {
  77. dst_pmdp = trans_alloc(info);
  78. if (!dst_pmdp)
  79. return -ENOMEM;
  80. pud_populate(NULL, dst_pudp, dst_pmdp);
  81. }
  82. dst_pmdp = pmd_offset(dst_pudp, start);
  83. src_pmdp = pmd_offset(src_pudp, start);
  84. do {
  85. pmd_t pmd = READ_ONCE(*src_pmdp);
  86. next = pmd_addr_end(addr, end);
  87. if (pmd_none(pmd))
  88. continue;
  89. if (pmd_table(pmd)) {
  90. if (copy_pte(info, dst_pmdp, src_pmdp, addr, next))
  91. return -ENOMEM;
  92. } else {
  93. set_pmd(dst_pmdp,
  94. __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
  95. }
  96. } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
  97. return 0;
  98. }
  99. static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp,
  100. p4d_t *src_p4dp, unsigned long start,
  101. unsigned long end)
  102. {
  103. pud_t *dst_pudp;
  104. pud_t *src_pudp;
  105. unsigned long next;
  106. unsigned long addr = start;
  107. if (p4d_none(READ_ONCE(*dst_p4dp))) {
  108. dst_pudp = trans_alloc(info);
  109. if (!dst_pudp)
  110. return -ENOMEM;
  111. p4d_populate(NULL, dst_p4dp, dst_pudp);
  112. }
  113. dst_pudp = pud_offset(dst_p4dp, start);
  114. src_pudp = pud_offset(src_p4dp, start);
  115. do {
  116. pud_t pud = READ_ONCE(*src_pudp);
  117. next = pud_addr_end(addr, end);
  118. if (pud_none(pud))
  119. continue;
  120. if (pud_table(pud)) {
  121. if (copy_pmd(info, dst_pudp, src_pudp, addr, next))
  122. return -ENOMEM;
  123. } else {
  124. set_pud(dst_pudp,
  125. __pud(pud_val(pud) & ~PUD_SECT_RDONLY));
  126. }
  127. } while (dst_pudp++, src_pudp++, addr = next, addr != end);
  128. return 0;
  129. }
  130. static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp,
  131. pgd_t *src_pgdp, unsigned long start,
  132. unsigned long end)
  133. {
  134. p4d_t *dst_p4dp;
  135. p4d_t *src_p4dp;
  136. unsigned long next;
  137. unsigned long addr = start;
  138. dst_p4dp = p4d_offset(dst_pgdp, start);
  139. src_p4dp = p4d_offset(src_pgdp, start);
  140. do {
  141. next = p4d_addr_end(addr, end);
  142. if (p4d_none(READ_ONCE(*src_p4dp)))
  143. continue;
  144. if (copy_pud(info, dst_p4dp, src_p4dp, addr, next))
  145. return -ENOMEM;
  146. } while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
  147. return 0;
  148. }
  149. static int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp,
  150. unsigned long start, unsigned long end)
  151. {
  152. unsigned long next;
  153. unsigned long addr = start;
  154. pgd_t *src_pgdp = pgd_offset_k(start);
  155. dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
  156. do {
  157. next = pgd_addr_end(addr, end);
  158. if (pgd_none(READ_ONCE(*src_pgdp)))
  159. continue;
  160. if (copy_p4d(info, dst_pgdp, src_pgdp, addr, next))
  161. return -ENOMEM;
  162. } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
  163. return 0;
  164. }
  165. /*
  166. * Create trans_pgd and copy linear map.
  167. * info: contains allocator and its argument
  168. * dst_pgdp: new page table that is created, and to which map is copied.
  169. * start: Start of the interval (inclusive).
  170. * end: End of the interval (exclusive).
  171. *
  172. * Returns 0 on success, and -ENOMEM on failure.
  173. */
  174. int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp,
  175. unsigned long start, unsigned long end)
  176. {
  177. int rc;
  178. pgd_t *trans_pgd = trans_alloc(info);
  179. if (!trans_pgd) {
  180. pr_err("Failed to allocate memory for temporary page tables.\n");
  181. return -ENOMEM;
  182. }
  183. rc = copy_page_tables(info, trans_pgd, start, end);
  184. if (!rc)
  185. *dst_pgdp = trans_pgd;
  186. return rc;
  187. }
  188. /*
  189. * The page we want to idmap may be outside the range covered by VA_BITS that
  190. * can be built using the kernel's p?d_populate() helpers. As a one off, for a
  191. * single page, we build these page tables bottom up and just assume that will
  192. * need the maximum T0SZ.
  193. *
  194. * Returns 0 on success, and -ENOMEM on failure.
  195. * On success trans_ttbr0 contains page table with idmapped page, t0sz is set to
  196. * maximum T0SZ for this page.
  197. */
  198. int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
  199. unsigned long *t0sz, void *page)
  200. {
  201. phys_addr_t dst_addr = virt_to_phys(page);
  202. unsigned long pfn = __phys_to_pfn(dst_addr);
  203. int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47;
  204. int bits_mapped = PAGE_SHIFT - 4;
  205. unsigned long level_mask, prev_level_entry, *levels[4];
  206. int this_level, index, level_lsb, level_msb;
  207. dst_addr &= PAGE_MASK;
  208. prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_ROX));
  209. for (this_level = 3; this_level >= 0; this_level--) {
  210. levels[this_level] = trans_alloc(info);
  211. if (!levels[this_level])
  212. return -ENOMEM;
  213. level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level);
  214. level_msb = min(level_lsb + bits_mapped, max_msb);
  215. level_mask = GENMASK_ULL(level_msb, level_lsb);
  216. index = (dst_addr & level_mask) >> level_lsb;
  217. *(levels[this_level] + index) = prev_level_entry;
  218. pfn = virt_to_pfn(levels[this_level]);
  219. prev_level_entry = pte_val(pfn_pte(pfn,
  220. __pgprot(PMD_TYPE_TABLE)));
  221. if (level_msb == max_msb)
  222. break;
  223. }
  224. *trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn));
  225. *t0sz = TCR_T0SZ(max_msb + 1);
  226. return 0;
  227. }
  228. /*
  229. * Create a copy of the vector table so we can call HVC_SET_VECTORS or
  230. * HVC_SOFT_RESTART from contexts where the table may be overwritten.
  231. */
  232. int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info,
  233. phys_addr_t *el2_vectors)
  234. {
  235. void *hyp_stub = trans_alloc(info);
  236. if (!hyp_stub)
  237. return -ENOMEM;
  238. *el2_vectors = virt_to_phys(hyp_stub);
  239. memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN);
  240. caches_clean_inval_pou((unsigned long)hyp_stub,
  241. (unsigned long)hyp_stub +
  242. ARM64_VECTOR_TABLE_LEN);
  243. dcache_clean_inval_poc((unsigned long)hyp_stub,
  244. (unsigned long)hyp_stub +
  245. ARM64_VECTOR_TABLE_LEN);
  246. return 0;
  247. }