huge_mm.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _LINUX_HUGE_MM_H
  3. #define _LINUX_HUGE_MM_H
  4. #include <linux/sched/coredump.h>
  5. #include <linux/mm_types.h>
  6. #include <linux/fs.h> /* only for vma_is_dax() */
  7. vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
  8. int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  9. pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
  10. struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
  11. void huge_pmd_set_accessed(struct vm_fault *vmf);
  12. int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  13. pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
  14. struct vm_area_struct *vma);
  15. #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
  16. void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
  17. #else
  18. static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
  19. {
  20. }
  21. #endif
  22. vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
  23. struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
  24. unsigned long addr, pmd_t *pmd,
  25. unsigned int flags);
  26. bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
  27. pmd_t *pmd, unsigned long addr, unsigned long next);
  28. int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
  29. unsigned long addr);
  30. int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud,
  31. unsigned long addr);
  32. bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
  33. unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd);
  34. int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
  35. pmd_t *pmd, unsigned long addr, pgprot_t newprot,
  36. unsigned long cp_flags);
  37. vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
  38. pgprot_t pgprot, bool write);
  39. /**
  40. * vmf_insert_pfn_pmd - insert a pmd size pfn
  41. * @vmf: Structure describing the fault
  42. * @pfn: pfn to insert
  43. * @pgprot: page protection to use
  44. * @write: whether it's a write fault
  45. *
  46. * Insert a pmd size pfn. See vmf_insert_pfn() for additional info.
  47. *
  48. * Return: vm_fault_t value.
  49. */
  50. static inline vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn,
  51. bool write)
  52. {
  53. return vmf_insert_pfn_pmd_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
  54. }
  55. vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
  56. pgprot_t pgprot, bool write);
  57. /**
  58. * vmf_insert_pfn_pud - insert a pud size pfn
  59. * @vmf: Structure describing the fault
  60. * @pfn: pfn to insert
  61. * @pgprot: page protection to use
  62. * @write: whether it's a write fault
  63. *
  64. * Insert a pud size pfn. See vmf_insert_pfn() for additional info.
  65. *
  66. * Return: vm_fault_t value.
  67. */
  68. static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn,
  69. bool write)
  70. {
  71. return vmf_insert_pfn_pud_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
  72. }
  73. enum transparent_hugepage_flag {
  74. TRANSPARENT_HUGEPAGE_NEVER_DAX,
  75. TRANSPARENT_HUGEPAGE_FLAG,
  76. TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
  77. TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
  78. TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
  79. TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
  80. TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
  81. TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
  82. TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
  83. };
  84. struct kobject;
  85. struct kobj_attribute;
  86. ssize_t single_hugepage_flag_store(struct kobject *kobj,
  87. struct kobj_attribute *attr,
  88. const char *buf, size_t count,
  89. enum transparent_hugepage_flag flag);
  90. ssize_t single_hugepage_flag_show(struct kobject *kobj,
  91. struct kobj_attribute *attr, char *buf,
  92. enum transparent_hugepage_flag flag);
  93. extern struct kobj_attribute shmem_enabled_attr;
  94. #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
  95. #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
  96. #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  97. #define HPAGE_PMD_SHIFT PMD_SHIFT
  98. #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
  99. #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
  100. #define HPAGE_PUD_SHIFT PUD_SHIFT
  101. #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
  102. #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
  103. extern unsigned long transparent_hugepage_flags;
  104. #define hugepage_flags_enabled() \
  105. (transparent_hugepage_flags & \
  106. ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \
  107. (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
  108. #define hugepage_flags_always() \
  109. (transparent_hugepage_flags & \
  110. (1<<TRANSPARENT_HUGEPAGE_FLAG))
  111. /*
  112. * Do the below checks:
  113. * - For file vma, check if the linear page offset of vma is
  114. * HPAGE_PMD_NR aligned within the file. The hugepage is
  115. * guaranteed to be hugepage-aligned within the file, but we must
  116. * check that the PMD-aligned addresses in the VMA map to
  117. * PMD-aligned offsets within the file, else the hugepage will
  118. * not be PMD-mappable.
  119. * - For all vmas, check if the haddr is in an aligned HPAGE_PMD_SIZE
  120. * area.
  121. */
  122. static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
  123. unsigned long addr)
  124. {
  125. unsigned long haddr;
  126. /* Don't have to check pgoff for anonymous vma */
  127. if (!vma_is_anonymous(vma)) {
  128. if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
  129. HPAGE_PMD_NR))
  130. return false;
  131. }
  132. haddr = addr & HPAGE_PMD_MASK;
  133. if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
  134. return false;
  135. return true;
  136. }
  137. static inline bool file_thp_enabled(struct vm_area_struct *vma)
  138. {
  139. struct inode *inode;
  140. if (!vma->vm_file)
  141. return false;
  142. inode = vma->vm_file->f_inode;
  143. return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) &&
  144. (vma->vm_flags & VM_EXEC) &&
  145. !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
  146. }
  147. bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags,
  148. bool smaps, bool in_pf, bool enforce_sysfs);
  149. #define transparent_hugepage_use_zero_page() \
  150. (transparent_hugepage_flags & \
  151. (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
  152. unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
  153. unsigned long len, unsigned long pgoff, unsigned long flags);
  154. void prep_transhuge_page(struct page *page);
  155. void free_transhuge_page(struct page *page);
  156. bool can_split_folio(struct folio *folio, int *pextra_pins);
  157. int split_huge_page_to_list(struct page *page, struct list_head *list);
  158. static inline int split_huge_page(struct page *page)
  159. {
  160. return split_huge_page_to_list(page, NULL);
  161. }
  162. void deferred_split_huge_page(struct page *page);
  163. void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  164. unsigned long address, bool freeze, struct folio *folio);
  165. #define split_huge_pmd(__vma, __pmd, __address) \
  166. do { \
  167. pmd_t *____pmd = (__pmd); \
  168. if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \
  169. || pmd_devmap(*____pmd)) \
  170. __split_huge_pmd(__vma, __pmd, __address, \
  171. false, NULL); \
  172. } while (0)
  173. void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
  174. bool freeze, struct folio *folio);
  175. void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
  176. unsigned long address);
  177. #define split_huge_pud(__vma, __pud, __address) \
  178. do { \
  179. pud_t *____pud = (__pud); \
  180. if (pud_trans_huge(*____pud) \
  181. || pud_devmap(*____pud)) \
  182. __split_huge_pud(__vma, __pud, __address); \
  183. } while (0)
  184. int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
  185. int advice);
  186. int madvise_collapse(struct vm_area_struct *vma,
  187. struct vm_area_struct **prev,
  188. unsigned long start, unsigned long end);
  189. void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start,
  190. unsigned long end, long adjust_next);
  191. spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma);
  192. spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma);
  193. static inline int is_swap_pmd(pmd_t pmd)
  194. {
  195. return !pmd_none(pmd) && !pmd_present(pmd);
  196. }
  197. /* mmap_lock must be held on entry */
  198. static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
  199. struct vm_area_struct *vma)
  200. {
  201. if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
  202. return __pmd_trans_huge_lock(pmd, vma);
  203. else
  204. return NULL;
  205. }
  206. static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
  207. struct vm_area_struct *vma)
  208. {
  209. if (pud_trans_huge(*pud) || pud_devmap(*pud))
  210. return __pud_trans_huge_lock(pud, vma);
  211. else
  212. return NULL;
  213. }
  214. /**
  215. * folio_test_pmd_mappable - Can we map this folio with a PMD?
  216. * @folio: The folio to test
  217. */
  218. static inline bool folio_test_pmd_mappable(struct folio *folio)
  219. {
  220. return folio_order(folio) >= HPAGE_PMD_ORDER;
  221. }
  222. struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
  223. pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
  224. struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
  225. pud_t *pud, int flags, struct dev_pagemap **pgmap);
  226. vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
  227. extern struct page *huge_zero_page;
  228. extern unsigned long huge_zero_pfn;
  229. static inline bool is_huge_zero_page(struct page *page)
  230. {
  231. return READ_ONCE(huge_zero_page) == page;
  232. }
  233. static inline bool is_huge_zero_pmd(pmd_t pmd)
  234. {
  235. return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
  236. }
  237. static inline bool is_huge_zero_pud(pud_t pud)
  238. {
  239. return false;
  240. }
  241. struct page *mm_get_huge_zero_page(struct mm_struct *mm);
  242. void mm_put_huge_zero_page(struct mm_struct *mm);
  243. #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
  244. static inline bool thp_migration_supported(void)
  245. {
  246. return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
  247. }
  248. static inline struct list_head *page_deferred_list(struct page *page)
  249. {
  250. /*
  251. * See organization of tail pages of compound page in
  252. * "struct page" definition.
  253. */
  254. return &page[2].deferred_list;
  255. }
  256. #else /* CONFIG_TRANSPARENT_HUGEPAGE */
  257. #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
  258. #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
  259. #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
  260. #define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
  261. #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
  262. #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
  263. static inline bool folio_test_pmd_mappable(struct folio *folio)
  264. {
  265. return false;
  266. }
  267. static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
  268. unsigned long addr)
  269. {
  270. return false;
  271. }
  272. static inline bool hugepage_vma_check(struct vm_area_struct *vma,
  273. unsigned long vm_flags, bool smaps,
  274. bool in_pf, bool enforce_sysfs)
  275. {
  276. return false;
  277. }
  278. static inline void prep_transhuge_page(struct page *page) {}
  279. #define transparent_hugepage_flags 0UL
  280. #define thp_get_unmapped_area NULL
  281. static inline bool
  282. can_split_folio(struct folio *folio, int *pextra_pins)
  283. {
  284. return false;
  285. }
  286. static inline int
  287. split_huge_page_to_list(struct page *page, struct list_head *list)
  288. {
  289. return 0;
  290. }
  291. static inline int split_huge_page(struct page *page)
  292. {
  293. return 0;
  294. }
  295. static inline void deferred_split_huge_page(struct page *page) {}
  296. #define split_huge_pmd(__vma, __pmd, __address) \
  297. do { } while (0)
  298. static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
  299. unsigned long address, bool freeze, struct folio *folio) {}
  300. static inline void split_huge_pmd_address(struct vm_area_struct *vma,
  301. unsigned long address, bool freeze, struct folio *folio) {}
  302. #define split_huge_pud(__vma, __pmd, __address) \
  303. do { } while (0)
  304. static inline int hugepage_madvise(struct vm_area_struct *vma,
  305. unsigned long *vm_flags, int advice)
  306. {
  307. return -EINVAL;
  308. }
  309. static inline int madvise_collapse(struct vm_area_struct *vma,
  310. struct vm_area_struct **prev,
  311. unsigned long start, unsigned long end)
  312. {
  313. return -EINVAL;
  314. }
  315. static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
  316. unsigned long start,
  317. unsigned long end,
  318. long adjust_next)
  319. {
  320. }
  321. static inline int is_swap_pmd(pmd_t pmd)
  322. {
  323. return 0;
  324. }
  325. static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
  326. struct vm_area_struct *vma)
  327. {
  328. return NULL;
  329. }
  330. static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
  331. struct vm_area_struct *vma)
  332. {
  333. return NULL;
  334. }
  335. static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
  336. {
  337. return 0;
  338. }
  339. static inline bool is_huge_zero_page(struct page *page)
  340. {
  341. return false;
  342. }
  343. static inline bool is_huge_zero_pmd(pmd_t pmd)
  344. {
  345. return false;
  346. }
  347. static inline bool is_huge_zero_pud(pud_t pud)
  348. {
  349. return false;
  350. }
  351. static inline void mm_put_huge_zero_page(struct mm_struct *mm)
  352. {
  353. return;
  354. }
  355. static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
  356. unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
  357. {
  358. return NULL;
  359. }
  360. static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
  361. unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap)
  362. {
  363. return NULL;
  364. }
  365. static inline bool thp_migration_supported(void)
  366. {
  367. return false;
  368. }
  369. #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  370. static inline int split_folio_to_list(struct folio *folio,
  371. struct list_head *list)
  372. {
  373. return split_huge_page_to_list(&folio->page, list);
  374. }
  375. static inline int split_folio(struct folio *folio)
  376. {
  377. return split_folio_to_list(folio, NULL);
  378. }
  379. /*
  380. * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
  381. * limitations in the implementation like arm64 MTE can override this to
  382. * false
  383. */
  384. #ifndef arch_thp_swp_supported
  385. static inline bool arch_thp_swp_supported(void)
  386. {
  387. return true;
  388. }
  389. #endif
  390. #endif /* _LINUX_HUGE_MM_H */