page.c 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/memblock.h>
  3. #include <linux/compiler.h>
  4. #include <linux/fs.h>
  5. #include <linux/init.h>
  6. #include <linux/ksm.h>
  7. #include <linux/mm.h>
  8. #include <linux/mmzone.h>
  9. #include <linux/huge_mm.h>
  10. #include <linux/proc_fs.h>
  11. #include <linux/seq_file.h>
  12. #include <linux/hugetlb.h>
  13. #include <linux/memremap.h>
  14. #include <linux/memcontrol.h>
  15. #include <linux/mmu_notifier.h>
  16. #include <linux/page_idle.h>
  17. #include <linux/kernel-page-flags.h>
  18. #include <linux/uaccess.h>
  19. #include "internal.h"
  20. #define KPMSIZE sizeof(u64)
  21. #define KPMMASK (KPMSIZE - 1)
  22. #define KPMBITS (KPMSIZE * BITS_PER_BYTE)
  23. static inline unsigned long get_max_dump_pfn(void)
  24. {
  25. #ifdef CONFIG_SPARSEMEM
  26. /*
  27. * The memmap of early sections is completely populated and marked
  28. * online even if max_pfn does not fall on a section boundary -
  29. * pfn_to_online_page() will succeed on all pages. Allow inspecting
  30. * these memmaps.
  31. */
  32. return round_up(max_pfn, PAGES_PER_SECTION);
  33. #else
  34. return max_pfn;
  35. #endif
  36. }
  37. /* /proc/kpagecount - an array exposing page counts
  38. *
  39. * Each entry is a u64 representing the corresponding
  40. * physical page count.
  41. */
  42. static ssize_t kpagecount_read(struct file *file, char __user *buf,
  43. size_t count, loff_t *ppos)
  44. {
  45. const unsigned long max_dump_pfn = get_max_dump_pfn();
  46. u64 __user *out = (u64 __user *)buf;
  47. struct page *ppage;
  48. unsigned long src = *ppos;
  49. unsigned long pfn;
  50. ssize_t ret = 0;
  51. u64 pcount;
  52. pfn = src / KPMSIZE;
  53. if (src & KPMMASK || count & KPMMASK)
  54. return -EINVAL;
  55. if (src >= max_dump_pfn * KPMSIZE)
  56. return 0;
  57. count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
  58. while (count > 0) {
  59. /*
  60. * TODO: ZONE_DEVICE support requires to identify
  61. * memmaps that were actually initialized.
  62. */
  63. ppage = pfn_to_online_page(pfn);
  64. if (!ppage || PageSlab(ppage) || page_has_type(ppage))
  65. pcount = 0;
  66. else
  67. pcount = page_mapcount(ppage);
  68. if (put_user(pcount, out)) {
  69. ret = -EFAULT;
  70. break;
  71. }
  72. pfn++;
  73. out++;
  74. count -= KPMSIZE;
  75. cond_resched();
  76. }
  77. *ppos += (char __user *)out - buf;
  78. if (!ret)
  79. ret = (char __user *)out - buf;
  80. return ret;
  81. }
  82. static const struct proc_ops kpagecount_proc_ops = {
  83. .proc_flags = PROC_ENTRY_PERMANENT,
  84. .proc_lseek = mem_lseek,
  85. .proc_read = kpagecount_read,
  86. };
  87. /* /proc/kpageflags - an array exposing page flags
  88. *
  89. * Each entry is a u64 representing the corresponding
  90. * physical page flags.
  91. */
  92. static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
  93. {
  94. return ((kflags >> kbit) & 1) << ubit;
  95. }
  96. u64 stable_page_flags(struct page *page)
  97. {
  98. u64 k;
  99. u64 u;
  100. /*
  101. * pseudo flag: KPF_NOPAGE
  102. * it differentiates a memory hole from a page with no flags
  103. */
  104. if (!page)
  105. return 1 << KPF_NOPAGE;
  106. k = page->flags;
  107. u = 0;
  108. /*
  109. * pseudo flags for the well known (anonymous) memory mapped pages
  110. *
  111. * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
  112. * simple test in page_mapped() is not enough.
  113. */
  114. if (!PageSlab(page) && page_mapped(page))
  115. u |= 1 << KPF_MMAP;
  116. if (PageAnon(page))
  117. u |= 1 << KPF_ANON;
  118. if (PageKsm(page))
  119. u |= 1 << KPF_KSM;
  120. /*
  121. * compound pages: export both head/tail info
  122. * they together define a compound page's start/end pos and order
  123. */
  124. if (PageHead(page))
  125. u |= 1 << KPF_COMPOUND_HEAD;
  126. if (PageTail(page))
  127. u |= 1 << KPF_COMPOUND_TAIL;
  128. if (PageHuge(page))
  129. u |= 1 << KPF_HUGE;
  130. /*
  131. * PageTransCompound can be true for non-huge compound pages (slab
  132. * pages or pages allocated by drivers with __GFP_COMP) because it
  133. * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
  134. * to make sure a given page is a thp, not a non-huge compound page.
  135. */
  136. else if (PageTransCompound(page)) {
  137. struct page *head = compound_head(page);
  138. if (PageLRU(head) || PageAnon(head))
  139. u |= 1 << KPF_THP;
  140. else if (is_huge_zero_page(head)) {
  141. u |= 1 << KPF_ZERO_PAGE;
  142. u |= 1 << KPF_THP;
  143. }
  144. } else if (is_zero_pfn(page_to_pfn(page)))
  145. u |= 1 << KPF_ZERO_PAGE;
  146. /*
  147. * Caveats on high order pages: page->_refcount will only be set
  148. * -1 on the head page; SLUB/SLQB do the same for PG_slab;
  149. * SLOB won't set PG_slab at all on compound pages.
  150. */
  151. if (PageBuddy(page))
  152. u |= 1 << KPF_BUDDY;
  153. else if (page_count(page) == 0 && is_free_buddy_page(page))
  154. u |= 1 << KPF_BUDDY;
  155. if (PageOffline(page))
  156. u |= 1 << KPF_OFFLINE;
  157. if (PageTable(page))
  158. u |= 1 << KPF_PGTABLE;
  159. if (page_is_idle(page))
  160. u |= 1 << KPF_IDLE;
  161. u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
  162. u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
  163. if (PageTail(page) && PageSlab(compound_head(page)))
  164. u |= 1 << KPF_SLAB;
  165. u |= kpf_copy_bit(k, KPF_ERROR, PG_error);
  166. u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
  167. u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate);
  168. u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback);
  169. u |= kpf_copy_bit(k, KPF_LRU, PG_lru);
  170. u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced);
  171. u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active);
  172. u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim);
  173. if (PageSwapCache(page))
  174. u |= 1 << KPF_SWAPCACHE;
  175. u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked);
  176. u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable);
  177. u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked);
  178. #ifdef CONFIG_MEMORY_FAILURE
  179. u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
  180. #endif
  181. #ifdef CONFIG_ARCH_USES_PG_UNCACHED
  182. u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached);
  183. #endif
  184. u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved);
  185. u |= kpf_copy_bit(k, KPF_MAPPEDTODISK, PG_mappedtodisk);
  186. u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private);
  187. u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2);
  188. u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1);
  189. u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1);
  190. #ifdef CONFIG_64BIT
  191. u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2);
  192. #endif
  193. return u;
  194. };
  195. static ssize_t kpageflags_read(struct file *file, char __user *buf,
  196. size_t count, loff_t *ppos)
  197. {
  198. const unsigned long max_dump_pfn = get_max_dump_pfn();
  199. u64 __user *out = (u64 __user *)buf;
  200. struct page *ppage;
  201. unsigned long src = *ppos;
  202. unsigned long pfn;
  203. ssize_t ret = 0;
  204. pfn = src / KPMSIZE;
  205. if (src & KPMMASK || count & KPMMASK)
  206. return -EINVAL;
  207. if (src >= max_dump_pfn * KPMSIZE)
  208. return 0;
  209. count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
  210. while (count > 0) {
  211. /*
  212. * TODO: ZONE_DEVICE support requires to identify
  213. * memmaps that were actually initialized.
  214. */
  215. ppage = pfn_to_online_page(pfn);
  216. if (put_user(stable_page_flags(ppage), out)) {
  217. ret = -EFAULT;
  218. break;
  219. }
  220. pfn++;
  221. out++;
  222. count -= KPMSIZE;
  223. cond_resched();
  224. }
  225. *ppos += (char __user *)out - buf;
  226. if (!ret)
  227. ret = (char __user *)out - buf;
  228. return ret;
  229. }
  230. static const struct proc_ops kpageflags_proc_ops = {
  231. .proc_flags = PROC_ENTRY_PERMANENT,
  232. .proc_lseek = mem_lseek,
  233. .proc_read = kpageflags_read,
  234. };
  235. #ifdef CONFIG_MEMCG
  236. static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
  237. size_t count, loff_t *ppos)
  238. {
  239. const unsigned long max_dump_pfn = get_max_dump_pfn();
  240. u64 __user *out = (u64 __user *)buf;
  241. struct page *ppage;
  242. unsigned long src = *ppos;
  243. unsigned long pfn;
  244. ssize_t ret = 0;
  245. u64 ino;
  246. pfn = src / KPMSIZE;
  247. if (src & KPMMASK || count & KPMMASK)
  248. return -EINVAL;
  249. if (src >= max_dump_pfn * KPMSIZE)
  250. return 0;
  251. count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
  252. while (count > 0) {
  253. /*
  254. * TODO: ZONE_DEVICE support requires to identify
  255. * memmaps that were actually initialized.
  256. */
  257. ppage = pfn_to_online_page(pfn);
  258. if (ppage)
  259. ino = page_cgroup_ino(ppage);
  260. else
  261. ino = 0;
  262. if (put_user(ino, out)) {
  263. ret = -EFAULT;
  264. break;
  265. }
  266. pfn++;
  267. out++;
  268. count -= KPMSIZE;
  269. cond_resched();
  270. }
  271. *ppos += (char __user *)out - buf;
  272. if (!ret)
  273. ret = (char __user *)out - buf;
  274. return ret;
  275. }
  276. static const struct proc_ops kpagecgroup_proc_ops = {
  277. .proc_flags = PROC_ENTRY_PERMANENT,
  278. .proc_lseek = mem_lseek,
  279. .proc_read = kpagecgroup_read,
  280. };
  281. #endif /* CONFIG_MEMCG */
  282. static int __init proc_page_init(void)
  283. {
  284. proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops);
  285. proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops);
  286. #ifdef CONFIG_MEMCG
  287. proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops);
  288. #endif
  289. return 0;
  290. }
  291. fs_initcall(proc_page_init);