dump_pagetables.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/set_memory.h>
  3. #include <linux/ptdump.h>
  4. #include <linux/seq_file.h>
  5. #include <linux/debugfs.h>
  6. #include <linux/mm.h>
  7. #include <linux/kfence.h>
  8. #include <linux/kasan.h>
  9. #include <asm/ptdump.h>
  10. #include <asm/kasan.h>
  11. #include <asm/abs_lowcore.h>
  12. #include <asm/nospec-branch.h>
  13. #include <asm/sections.h>
  14. #include <asm/maccess.h>
  15. static unsigned long max_addr;
  16. struct addr_marker {
  17. unsigned long start_address;
  18. const char *name;
  19. };
  20. enum address_markers_idx {
  21. IDENTITY_BEFORE_NR = 0,
  22. IDENTITY_BEFORE_END_NR,
  23. AMODE31_START_NR,
  24. AMODE31_END_NR,
  25. KERNEL_START_NR,
  26. KERNEL_END_NR,
  27. #ifdef CONFIG_KFENCE
  28. KFENCE_START_NR,
  29. KFENCE_END_NR,
  30. #endif
  31. IDENTITY_AFTER_NR,
  32. IDENTITY_AFTER_END_NR,
  33. VMEMMAP_NR,
  34. VMEMMAP_END_NR,
  35. VMALLOC_NR,
  36. VMALLOC_END_NR,
  37. MODULES_NR,
  38. MODULES_END_NR,
  39. ABS_LOWCORE_NR,
  40. ABS_LOWCORE_END_NR,
  41. MEMCPY_REAL_NR,
  42. MEMCPY_REAL_END_NR,
  43. #ifdef CONFIG_KASAN
  44. KASAN_SHADOW_START_NR,
  45. KASAN_SHADOW_END_NR,
  46. #endif
  47. };
  48. static struct addr_marker address_markers[] = {
  49. [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"},
  50. [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
  51. [AMODE31_START_NR] = {0, "Amode31 Area Start"},
  52. [AMODE31_END_NR] = {0, "Amode31 Area End"},
  53. [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
  54. [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
  55. #ifdef CONFIG_KFENCE
  56. [KFENCE_START_NR] = {0, "KFence Pool Start"},
  57. [KFENCE_END_NR] = {0, "KFence Pool End"},
  58. #endif
  59. [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
  60. [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
  61. [VMEMMAP_NR] = {0, "vmemmap Area Start"},
  62. [VMEMMAP_END_NR] = {0, "vmemmap Area End"},
  63. [VMALLOC_NR] = {0, "vmalloc Area Start"},
  64. [VMALLOC_END_NR] = {0, "vmalloc Area End"},
  65. [MODULES_NR] = {0, "Modules Area Start"},
  66. [MODULES_END_NR] = {0, "Modules Area End"},
  67. [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"},
  68. [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
  69. [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
  70. [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
  71. #ifdef CONFIG_KASAN
  72. [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
  73. [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
  74. #endif
  75. { -1, NULL }
  76. };
  77. struct pg_state {
  78. struct ptdump_state ptdump;
  79. struct seq_file *seq;
  80. int level;
  81. unsigned int current_prot;
  82. bool check_wx;
  83. unsigned long wx_pages;
  84. unsigned long start_address;
  85. const struct addr_marker *marker;
  86. };
  87. #define pt_dump_seq_printf(m, fmt, args...) \
  88. ({ \
  89. struct seq_file *__m = (m); \
  90. \
  91. if (__m) \
  92. seq_printf(__m, fmt, ##args); \
  93. })
  94. #define pt_dump_seq_puts(m, fmt) \
  95. ({ \
  96. struct seq_file *__m = (m); \
  97. \
  98. if (__m) \
  99. seq_printf(__m, fmt); \
  100. })
  101. static void print_prot(struct seq_file *m, unsigned int pr, int level)
  102. {
  103. static const char * const level_name[] =
  104. { "ASCE", "PGD", "PUD", "PMD", "PTE" };
  105. pt_dump_seq_printf(m, "%s ", level_name[level]);
  106. if (pr & _PAGE_INVALID) {
  107. pt_dump_seq_printf(m, "I\n");
  108. return;
  109. }
  110. pt_dump_seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
  111. pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
  112. }
  113. static void note_prot_wx(struct pg_state *st, unsigned long addr)
  114. {
  115. #ifdef CONFIG_DEBUG_WX
  116. if (!st->check_wx)
  117. return;
  118. if (st->current_prot & _PAGE_INVALID)
  119. return;
  120. if (st->current_prot & _PAGE_PROTECT)
  121. return;
  122. if (st->current_prot & _PAGE_NOEXEC)
  123. return;
  124. /*
  125. * The first lowcore page is W+X if spectre mitigations are using
  126. * trampolines or the BEAR enhancements facility is not installed,
  127. * in which case we have two lpswe instructions in lowcore that need
  128. * to be executable.
  129. */
  130. if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
  131. return;
  132. WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
  133. (void *)st->start_address);
  134. st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
  135. #endif /* CONFIG_DEBUG_WX */
  136. }
  137. static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
  138. {
  139. int width = sizeof(unsigned long) * 2;
  140. static const char units[] = "KMGTPE";
  141. const char *unit = units;
  142. unsigned long delta;
  143. struct pg_state *st;
  144. struct seq_file *m;
  145. unsigned int prot;
  146. st = container_of(pt_st, struct pg_state, ptdump);
  147. m = st->seq;
  148. prot = val & (_PAGE_PROTECT | _PAGE_NOEXEC);
  149. if (level == 4 && (val & _PAGE_INVALID))
  150. prot = _PAGE_INVALID;
  151. /* For pmd_none() & friends val gets passed as zero. */
  152. if (level != 4 && !val)
  153. prot = _PAGE_INVALID;
  154. /* Final flush from generic code. */
  155. if (level == -1)
  156. addr = max_addr;
  157. if (st->level == -1) {
  158. pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
  159. st->start_address = addr;
  160. st->current_prot = prot;
  161. st->level = level;
  162. } else if (prot != st->current_prot || level != st->level ||
  163. addr >= st->marker[1].start_address) {
  164. note_prot_wx(st, addr);
  165. pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ",
  166. width, st->start_address,
  167. width, addr);
  168. delta = (addr - st->start_address) >> 10;
  169. while (!(delta & 0x3ff) && unit[1]) {
  170. delta >>= 10;
  171. unit++;
  172. }
  173. pt_dump_seq_printf(m, "%9lu%c ", delta, *unit);
  174. print_prot(m, st->current_prot, st->level);
  175. while (addr >= st->marker[1].start_address) {
  176. st->marker++;
  177. pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
  178. }
  179. st->start_address = addr;
  180. st->current_prot = prot;
  181. st->level = level;
  182. }
  183. }
  184. #ifdef CONFIG_DEBUG_WX
  185. void ptdump_check_wx(void)
  186. {
  187. struct pg_state st = {
  188. .ptdump = {
  189. .note_page = note_page,
  190. .range = (struct ptdump_range[]) {
  191. {.start = 0, .end = max_addr},
  192. {.start = 0, .end = 0},
  193. }
  194. },
  195. .seq = NULL,
  196. .level = -1,
  197. .current_prot = 0,
  198. .check_wx = true,
  199. .wx_pages = 0,
  200. .start_address = 0,
  201. .marker = (struct addr_marker[]) {
  202. { .start_address = 0, .name = NULL},
  203. { .start_address = -1, .name = NULL},
  204. },
  205. };
  206. if (!MACHINE_HAS_NX)
  207. return;
  208. ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
  209. if (st.wx_pages)
  210. pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
  211. else
  212. pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
  213. (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
  214. "unexpected " : "");
  215. }
  216. #endif /* CONFIG_DEBUG_WX */
  217. #ifdef CONFIG_PTDUMP_DEBUGFS
  218. static int ptdump_show(struct seq_file *m, void *v)
  219. {
  220. struct pg_state st = {
  221. .ptdump = {
  222. .note_page = note_page,
  223. .range = (struct ptdump_range[]) {
  224. {.start = 0, .end = max_addr},
  225. {.start = 0, .end = 0},
  226. }
  227. },
  228. .seq = m,
  229. .level = -1,
  230. .current_prot = 0,
  231. .check_wx = false,
  232. .wx_pages = 0,
  233. .start_address = 0,
  234. .marker = address_markers,
  235. };
  236. get_online_mems();
  237. mutex_lock(&cpa_mutex);
  238. ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
  239. mutex_unlock(&cpa_mutex);
  240. put_online_mems();
  241. return 0;
  242. }
  243. DEFINE_SHOW_ATTRIBUTE(ptdump);
  244. #endif /* CONFIG_PTDUMP_DEBUGFS */
  245. /*
  246. * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple
  247. * insertion sort to preserve the original order of markers with the same
  248. * start address.
  249. */
  250. static void sort_address_markers(void)
  251. {
  252. struct addr_marker tmp;
  253. int i, j;
  254. for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) {
  255. tmp = address_markers[i];
  256. for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--)
  257. address_markers[j + 1] = address_markers[j];
  258. address_markers[j + 1] = tmp;
  259. }
  260. }
  261. static int pt_dump_init(void)
  262. {
  263. #ifdef CONFIG_KFENCE
  264. unsigned long kfence_start = (unsigned long)__kfence_pool;
  265. #endif
  266. /*
  267. * Figure out the maximum virtual address being accessible with the
  268. * kernel ASCE. We need this to keep the page table walker functions
  269. * from accessing non-existent entries.
  270. */
  271. max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
  272. max_addr = 1UL << (max_addr * 11 + 31);
  273. address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
  274. address_markers[AMODE31_START_NR].start_address = __samode31;
  275. address_markers[AMODE31_END_NR].start_address = __eamode31;
  276. address_markers[MODULES_NR].start_address = MODULES_VADDR;
  277. address_markers[MODULES_END_NR].start_address = MODULES_END;
  278. address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
  279. address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
  280. address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
  281. address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE;
  282. address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
  283. address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
  284. address_markers[VMALLOC_NR].start_address = VMALLOC_START;
  285. address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
  286. #ifdef CONFIG_KFENCE
  287. address_markers[KFENCE_START_NR].start_address = kfence_start;
  288. address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE;
  289. #endif
  290. sort_address_markers();
  291. #ifdef CONFIG_PTDUMP_DEBUGFS
  292. debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
  293. #endif /* CONFIG_PTDUMP_DEBUGFS */
  294. return 0;
  295. }
  296. device_initcall(pt_dump_init);