internal.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. /* internal.h: mm/ internal definitions
  3. *
  4. * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells ([email protected])
  6. */
  7. #ifndef __MM_INTERNAL_H
  8. #define __MM_INTERNAL_H
  9. #include <linux/fs.h>
  10. #include <linux/mm.h>
  11. #include <linux/pagemap.h>
  12. #include <linux/rmap.h>
  13. #include <linux/tracepoint-defs.h>
  14. struct folio_batch;
  15. /*
  16. * The set of flags that only affect watermark checking and reclaim
  17. * behaviour. This is used by the MM to obey the caller constraints
  18. * about IO, FS and watermark checking while ignoring placement
  19. * hints such as HIGHMEM usage.
  20. */
  21. #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
  22. __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
  23. __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
  24. __GFP_ATOMIC|__GFP_NOLOCKDEP)
  25. /* The GFP flags allowed during early boot */
  26. #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
  27. /* Control allocation cpuset and node placement constraints */
  28. #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
  29. /* Do not use these with a slab allocator */
  30. #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
  31. /*
  32. * Different from WARN_ON_ONCE(), no warning will be issued
  33. * when we specify __GFP_NOWARN.
  34. */
  35. #define WARN_ON_ONCE_GFP(cond, gfp) ({ \
  36. static bool __section(".data.once") __warned; \
  37. int __ret_warn_once = !!(cond); \
  38. \
  39. if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
  40. __warned = true; \
  41. WARN_ON(1); \
  42. } \
  43. unlikely(__ret_warn_once); \
  44. })
  45. void page_writeback_init(void);
  46. static inline void *folio_raw_mapping(struct folio *folio)
  47. {
  48. unsigned long mapping = (unsigned long)folio->mapping;
  49. return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
  50. }
  51. void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
  52. int nr_throttled);
  53. static inline void acct_reclaim_writeback(struct folio *folio)
  54. {
  55. pg_data_t *pgdat = folio_pgdat(folio);
  56. int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled);
  57. if (nr_throttled)
  58. __acct_reclaim_writeback(pgdat, folio, nr_throttled);
  59. }
  60. static inline void wake_throttle_isolated(pg_data_t *pgdat)
  61. {
  62. wait_queue_head_t *wqh;
  63. wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED];
  64. if (waitqueue_active(wqh))
  65. wake_up(wqh);
  66. }
  67. vm_fault_t do_swap_page(struct vm_fault *vmf);
  68. void folio_rotate_reclaimable(struct folio *folio);
  69. bool __folio_end_writeback(struct folio *folio);
  70. void deactivate_file_folio(struct folio *folio);
  71. void folio_activate(struct folio *folio);
  72. void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
  73. struct vm_area_struct *start_vma, unsigned long floor,
  74. unsigned long ceiling, unsigned long start_t,
  75. bool mm_wr_locked);
  76. void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
  77. struct zap_details;
  78. void unmap_page_range(struct mmu_gather *tlb,
  79. struct vm_area_struct *vma,
  80. unsigned long addr, unsigned long end,
  81. struct zap_details *details);
  82. void page_cache_ra_order(struct readahead_control *, struct file_ra_state *,
  83. unsigned int order);
  84. void force_page_cache_ra(struct readahead_control *, unsigned long nr);
  85. static inline void force_page_cache_readahead(struct address_space *mapping,
  86. struct file *file, pgoff_t index, unsigned long nr_to_read)
  87. {
  88. DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index);
  89. force_page_cache_ra(&ractl, nr_to_read);
  90. }
  91. unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
  92. pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
  93. unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
  94. pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
  95. void filemap_free_folio(struct address_space *mapping, struct folio *folio);
  96. int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
  97. bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
  98. loff_t end);
  99. long invalidate_inode_page(struct page *page);
  100. unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
  101. pgoff_t start, pgoff_t end, unsigned long *nr_pagevec);
  102. /**
  103. * folio_evictable - Test whether a folio is evictable.
  104. * @folio: The folio to test.
  105. *
  106. * Test whether @folio is evictable -- i.e., should be placed on
  107. * active/inactive lists vs unevictable list.
  108. *
  109. * Reasons folio might not be evictable:
  110. * 1. folio's mapping marked unevictable
  111. * 2. One of the pages in the folio is part of an mlocked VMA
  112. */
  113. static inline bool folio_evictable(struct folio *folio)
  114. {
  115. bool ret;
  116. /* Prevent address_space of inode and swap cache from being freed */
  117. rcu_read_lock();
  118. ret = !mapping_unevictable(folio_mapping(folio)) &&
  119. !folio_test_mlocked(folio);
  120. rcu_read_unlock();
  121. return ret;
  122. }
  123. static inline bool page_evictable(struct page *page)
  124. {
  125. bool ret;
  126. /* Prevent address_space of inode and swap cache from being freed */
  127. rcu_read_lock();
  128. ret = !mapping_unevictable(page_mapping(page)) && !PageMlocked(page);
  129. rcu_read_unlock();
  130. return ret;
  131. }
  132. /*
  133. * Turn a non-refcounted page (->_refcount == 0) into refcounted with
  134. * a count of one.
  135. */
  136. static inline void set_page_refcounted(struct page *page)
  137. {
  138. VM_BUG_ON_PAGE(PageTail(page), page);
  139. VM_BUG_ON_PAGE(page_ref_count(page), page);
  140. set_page_count(page, 1);
  141. }
  142. extern unsigned long highest_memmap_pfn;
  143. /*
  144. * Maximum number of reclaim retries without progress before the OOM
  145. * killer is consider the only way forward.
  146. */
  147. #define MAX_RECLAIM_RETRIES 16
  148. /*
  149. * in mm/early_ioremap.c
  150. */
  151. pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
  152. unsigned long size, pgprot_t prot);
  153. /*
  154. * in mm/vmscan.c:
  155. */
  156. int isolate_lru_page(struct page *page);
  157. int folio_isolate_lru(struct folio *folio);
  158. void putback_lru_page(struct page *page);
  159. void folio_putback_lru(struct folio *folio);
  160. extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
  161. /*
  162. * in mm/rmap.c:
  163. */
  164. pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
  165. /*
  166. * in mm/page_alloc.c
  167. */
  168. /*
  169. * Structure for holding the mostly immutable allocation parameters passed
  170. * between functions involved in allocations, including the alloc_pages*
  171. * family of functions.
  172. *
  173. * nodemask, migratetype and highest_zoneidx are initialized only once in
  174. * __alloc_pages() and then never change.
  175. *
  176. * zonelist, preferred_zone and highest_zoneidx are set first in
  177. * __alloc_pages() for the fast path, and might be later changed
  178. * in __alloc_pages_slowpath(). All other functions pass the whole structure
  179. * by a const pointer.
  180. */
  181. struct alloc_context {
  182. struct zonelist *zonelist;
  183. nodemask_t *nodemask;
  184. struct zoneref *preferred_zoneref;
  185. int migratetype;
  186. /*
  187. * highest_zoneidx represents highest usable zone index of
  188. * the allocation request. Due to the nature of the zone,
  189. * memory on lower zone than the highest_zoneidx will be
  190. * protected by lowmem_reserve[highest_zoneidx].
  191. *
  192. * highest_zoneidx is also used by reclaim/compaction to limit
  193. * the target zone since higher zone than this index cannot be
  194. * usable for this allocation request.
  195. */
  196. enum zone_type highest_zoneidx;
  197. bool spread_dirty_pages;
  198. };
  199. /*
  200. * This function returns the order of a free page in the buddy system. In
  201. * general, page_zone(page)->lock must be held by the caller to prevent the
  202. * page from being allocated in parallel and returning garbage as the order.
  203. * If a caller does not hold page_zone(page)->lock, it must guarantee that the
  204. * page cannot be allocated or merged in parallel. Alternatively, it must
  205. * handle invalid values gracefully, and use buddy_order_unsafe() below.
  206. */
  207. static inline unsigned int buddy_order(struct page *page)
  208. {
  209. /* PageBuddy() must be checked by the caller */
  210. return page_private(page);
  211. }
  212. /*
  213. * Like buddy_order(), but for callers who cannot afford to hold the zone lock.
  214. * PageBuddy() should be checked first by the caller to minimize race window,
  215. * and invalid values must be handled gracefully.
  216. *
  217. * READ_ONCE is used so that if the caller assigns the result into a local
  218. * variable and e.g. tests it for valid range before using, the compiler cannot
  219. * decide to remove the variable and inline the page_private(page) multiple
  220. * times, potentially observing different values in the tests and the actual
  221. * use of the result.
  222. */
  223. #define buddy_order_unsafe(page) READ_ONCE(page_private(page))
  224. /*
  225. * This function checks whether a page is free && is the buddy
  226. * we can coalesce a page and its buddy if
  227. * (a) the buddy is not in a hole (check before calling!) &&
  228. * (b) the buddy is in the buddy system &&
  229. * (c) a page and its buddy have the same order &&
  230. * (d) a page and its buddy are in the same zone.
  231. *
  232. * For recording whether a page is in the buddy system, we set PageBuddy.
  233. * Setting, clearing, and testing PageBuddy is serialized by zone->lock.
  234. *
  235. * For recording page's order, we use page_private(page).
  236. */
  237. static inline bool page_is_buddy(struct page *page, struct page *buddy,
  238. unsigned int order)
  239. {
  240. if (!page_is_guard(buddy) && !PageBuddy(buddy))
  241. return false;
  242. if (buddy_order(buddy) != order)
  243. return false;
  244. /*
  245. * zone check is done late to avoid uselessly calculating
  246. * zone/node ids for pages that could never merge.
  247. */
  248. if (page_zone_id(page) != page_zone_id(buddy))
  249. return false;
  250. VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
  251. return true;
  252. }
  253. /*
  254. * Locate the struct page for both the matching buddy in our
  255. * pair (buddy1) and the combined O(n+1) page they form (page).
  256. *
  257. * 1) Any buddy B1 will have an order O twin B2 which satisfies
  258. * the following equation:
  259. * B2 = B1 ^ (1 << O)
  260. * For example, if the starting buddy (buddy2) is #8 its order
  261. * 1 buddy is #10:
  262. * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
  263. *
  264. * 2) Any buddy B will have an order O+1 parent P which
  265. * satisfies the following equation:
  266. * P = B & ~(1 << O)
  267. *
  268. * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
  269. */
  270. static inline unsigned long
  271. __find_buddy_pfn(unsigned long page_pfn, unsigned int order)
  272. {
  273. return page_pfn ^ (1 << order);
  274. }
  275. /*
  276. * Find the buddy of @page and validate it.
  277. * @page: The input page
  278. * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the
  279. * function is used in the performance-critical __free_one_page().
  280. * @order: The order of the page
  281. * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to
  282. * page_to_pfn().
  283. *
  284. * The found buddy can be a non PageBuddy, out of @page's zone, or its order is
  285. * not the same as @page. The validation is necessary before use it.
  286. *
  287. * Return: the found buddy page or NULL if not found.
  288. */
  289. static inline struct page *find_buddy_page_pfn(struct page *page,
  290. unsigned long pfn, unsigned int order, unsigned long *buddy_pfn)
  291. {
  292. unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order);
  293. struct page *buddy;
  294. buddy = page + (__buddy_pfn - pfn);
  295. if (buddy_pfn)
  296. *buddy_pfn = __buddy_pfn;
  297. if (page_is_buddy(page, buddy, order))
  298. return buddy;
  299. return NULL;
  300. }
  301. extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
  302. unsigned long end_pfn, struct zone *zone);
  303. static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
  304. unsigned long end_pfn, struct zone *zone)
  305. {
  306. if (zone->contiguous)
  307. return pfn_to_page(start_pfn);
  308. return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
  309. }
  310. extern int __isolate_free_page(struct page *page, unsigned int order);
  311. extern void __putback_isolated_page(struct page *page, unsigned int order,
  312. int mt);
  313. extern void memblock_free_pages(struct page *page, unsigned long pfn,
  314. unsigned int order);
  315. extern void __free_pages_core(struct page *page, unsigned int order);
  316. extern void prep_compound_page(struct page *page, unsigned int order);
  317. extern void post_alloc_hook(struct page *page, unsigned int order,
  318. gfp_t gfp_flags);
  319. extern int user_min_free_kbytes;
  320. extern void free_unref_page(struct page *page, unsigned int order);
  321. extern void free_unref_page_list(struct list_head *list);
  322. extern void zone_pcp_reset(struct zone *zone);
  323. extern void zone_pcp_disable(struct zone *zone);
  324. extern void zone_pcp_enable(struct zone *zone);
  325. extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
  326. phys_addr_t min_addr,
  327. int nid, bool exact_nid);
  328. int split_free_page(struct page *free_page,
  329. unsigned int order, unsigned long split_pfn_offset);
  330. #if defined CONFIG_COMPACTION || defined CONFIG_CMA
  331. /*
  332. * in mm/compaction.c
  333. */
  334. /*
  335. * compact_control is used to track pages being migrated and the free pages
  336. * they are being migrated to during memory compaction. The free_pfn starts
  337. * at the end of a zone and migrate_pfn begins at the start. Movable pages
  338. * are moved to the end of a zone during a compaction run and the run
  339. * completes when free_pfn <= migrate_pfn
  340. */
  341. struct compact_control {
  342. struct list_head freepages; /* List of free pages to migrate to */
  343. struct list_head migratepages; /* List of pages being migrated */
  344. unsigned int nr_freepages; /* Number of isolated free pages */
  345. unsigned int nr_migratepages; /* Number of pages to migrate */
  346. unsigned long free_pfn; /* isolate_freepages search base */
  347. /*
  348. * Acts as an in/out parameter to page isolation for migration.
  349. * isolate_migratepages uses it as a search base.
  350. * isolate_migratepages_block will update the value to the next pfn
  351. * after the last isolated one.
  352. */
  353. unsigned long migrate_pfn;
  354. unsigned long fast_start_pfn; /* a pfn to start linear scan from */
  355. struct zone *zone;
  356. unsigned long total_migrate_scanned;
  357. unsigned long total_free_scanned;
  358. unsigned short fast_search_fail;/* failures to use free list searches */
  359. short search_order; /* order to start a fast search at */
  360. const gfp_t gfp_mask; /* gfp mask of a direct compactor */
  361. int order; /* order a direct compactor needs */
  362. int migratetype; /* migratetype of direct compactor */
  363. const unsigned int alloc_flags; /* alloc flags of a direct compactor */
  364. const int highest_zoneidx; /* zone index of a direct compactor */
  365. enum migrate_mode mode; /* Async or sync migration mode */
  366. bool ignore_skip_hint; /* Scan blocks even if marked skip */
  367. bool no_set_skip_hint; /* Don't mark blocks for skipping */
  368. bool ignore_block_suitable; /* Scan blocks considered unsuitable */
  369. bool direct_compaction; /* False from kcompactd or /proc/... */
  370. bool proactive_compaction; /* kcompactd proactive compaction */
  371. bool whole_zone; /* Whole zone should/has been scanned */
  372. bool contended; /* Signal lock contention */
  373. bool rescan; /* Rescanning the same pageblock */
  374. bool alloc_contig; /* alloc_contig_range allocation */
  375. };
  376. /*
  377. * Used in direct compaction when a page should be taken from the freelists
  378. * immediately when one is created during the free path.
  379. */
  380. struct capture_control {
  381. struct compact_control *cc;
  382. struct page *page;
  383. };
  384. unsigned long
  385. isolate_freepages_range(struct compact_control *cc,
  386. unsigned long start_pfn, unsigned long end_pfn);
  387. int
  388. isolate_migratepages_range(struct compact_control *cc,
  389. unsigned long low_pfn, unsigned long end_pfn);
  390. int __alloc_contig_migrate_range(struct compact_control *cc,
  391. unsigned long start, unsigned long end,
  392. int migratetype);
  393. #endif
  394. int find_suitable_fallback(struct free_area *area, unsigned int order,
  395. int migratetype, bool only_stealable, bool *can_steal);
  396. /*
  397. * These three helpers classifies VMAs for virtual memory accounting.
  398. */
  399. /*
  400. * Executable code area - executable, not writable, not stack
  401. */
  402. static inline bool is_exec_mapping(vm_flags_t flags)
  403. {
  404. return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
  405. }
  406. /*
  407. * Stack area - automatically grows in one direction
  408. *
  409. * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
  410. * do_mmap() forbids all other combinations.
  411. */
  412. static inline bool is_stack_mapping(vm_flags_t flags)
  413. {
  414. return (flags & VM_STACK) == VM_STACK;
  415. }
  416. /*
  417. * Data area - private, writable, not stack
  418. */
  419. static inline bool is_data_mapping(vm_flags_t flags)
  420. {
  421. return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
  422. }
  423. /* mm/util.c */
  424. struct anon_vma *folio_anon_vma(struct folio *folio);
  425. #ifdef CONFIG_MMU
  426. void unmap_mapping_folio(struct folio *folio);
  427. extern long populate_vma_page_range(struct vm_area_struct *vma,
  428. unsigned long start, unsigned long end, int *locked);
  429. extern long faultin_vma_page_range(struct vm_area_struct *vma,
  430. unsigned long start, unsigned long end,
  431. bool write, int *locked);
  432. extern int mlock_future_check(struct mm_struct *mm, unsigned long flags,
  433. unsigned long len);
  434. /*
  435. * mlock_vma_page() and munlock_vma_page():
  436. * should be called with vma's mmap_lock held for read or write,
  437. * under page table lock for the pte/pmd being added or removed.
  438. *
  439. * mlock is usually called at the end of page_add_*_rmap(),
  440. * munlock at the end of page_remove_rmap(); but new anon
  441. * pages are managed by lru_cache_add_inactive_or_unevictable()
  442. * calling mlock_new_page().
  443. *
  444. * @compound is used to include pmd mappings of THPs, but filter out
  445. * pte mappings of THPs, which cannot be consistently counted: a pte
  446. * mapping of the THP head cannot be distinguished by the page alone.
  447. */
  448. void mlock_folio(struct folio *folio);
  449. static inline void mlock_vma_folio(struct folio *folio,
  450. struct vm_area_struct *vma, bool compound)
  451. {
  452. /*
  453. * The VM_SPECIAL check here serves two purposes.
  454. * 1) VM_IO check prevents migration from double-counting during mlock.
  455. * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED
  456. * is never left set on a VM_SPECIAL vma, there is an interval while
  457. * file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may
  458. * still be set while VM_SPECIAL bits are added: so ignore it then.
  459. */
  460. if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED) &&
  461. (compound || !folio_test_large(folio)))
  462. mlock_folio(folio);
  463. }
  464. static inline void mlock_vma_page(struct page *page,
  465. struct vm_area_struct *vma, bool compound)
  466. {
  467. mlock_vma_folio(page_folio(page), vma, compound);
  468. }
  469. void munlock_page(struct page *page);
  470. static inline void munlock_vma_page(struct page *page,
  471. struct vm_area_struct *vma, bool compound)
  472. {
  473. if (unlikely(vma->vm_flags & VM_LOCKED) &&
  474. (compound || !PageTransCompound(page)))
  475. munlock_page(page);
  476. }
  477. void mlock_new_page(struct page *page);
  478. bool need_mlock_page_drain(int cpu);
  479. void mlock_page_drain_local(void);
  480. void mlock_page_drain_remote(int cpu);
  481. extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
  482. /*
  483. * Return the start of user virtual address at the specific offset within
  484. * a vma.
  485. */
  486. static inline unsigned long
  487. vma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages,
  488. struct vm_area_struct *vma)
  489. {
  490. unsigned long address;
  491. if (pgoff >= vma->vm_pgoff) {
  492. address = vma->vm_start +
  493. ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
  494. /* Check for address beyond vma (or wrapped through 0?) */
  495. if (address < vma->vm_start || address >= vma->vm_end)
  496. address = -EFAULT;
  497. } else if (pgoff + nr_pages - 1 >= vma->vm_pgoff) {
  498. /* Test above avoids possibility of wrap to 0 on 32-bit */
  499. address = vma->vm_start;
  500. } else {
  501. address = -EFAULT;
  502. }
  503. return address;
  504. }
  505. /*
  506. * Return the start of user virtual address of a page within a vma.
  507. * Returns -EFAULT if all of the page is outside the range of vma.
  508. * If page is a compound head, the entire compound page is considered.
  509. */
  510. static inline unsigned long
  511. vma_address(struct page *page, struct vm_area_struct *vma)
  512. {
  513. VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
  514. return vma_pgoff_address(page_to_pgoff(page), compound_nr(page), vma);
  515. }
  516. /*
  517. * Then at what user virtual address will none of the range be found in vma?
  518. * Assumes that vma_address() already returned a good starting address.
  519. */
  520. static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw)
  521. {
  522. struct vm_area_struct *vma = pvmw->vma;
  523. pgoff_t pgoff;
  524. unsigned long address;
  525. /* Common case, plus ->pgoff is invalid for KSM */
  526. if (pvmw->nr_pages == 1)
  527. return pvmw->address + PAGE_SIZE;
  528. pgoff = pvmw->pgoff + pvmw->nr_pages;
  529. address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
  530. /* Check for address beyond vma (or wrapped through 0?) */
  531. if (address < vma->vm_start || address > vma->vm_end)
  532. address = vma->vm_end;
  533. return address;
  534. }
  535. static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
  536. struct file *fpin)
  537. {
  538. int flags = vmf->flags;
  539. if (fpin)
  540. return fpin;
  541. /*
  542. * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
  543. * anything, so we only pin the file and drop the mmap_lock if only
  544. * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
  545. */
  546. if (fault_flag_allow_retry_first(flags) &&
  547. !(flags & FAULT_FLAG_RETRY_NOWAIT)) {
  548. fpin = get_file(vmf->vma->vm_file);
  549. release_fault_lock(vmf);
  550. }
  551. return fpin;
  552. }
  553. #else /* !CONFIG_MMU */
  554. static inline void unmap_mapping_folio(struct folio *folio) { }
  555. static inline void mlock_vma_page(struct page *page,
  556. struct vm_area_struct *vma, bool compound) { }
  557. static inline void munlock_vma_page(struct page *page,
  558. struct vm_area_struct *vma, bool compound) { }
  559. static inline void mlock_new_page(struct page *page) { }
  560. static inline bool need_mlock_page_drain(int cpu) { return false; }
  561. static inline void mlock_page_drain_local(void) { }
  562. static inline void mlock_page_drain_remote(int cpu) { }
  563. static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
  564. {
  565. }
  566. #endif /* !CONFIG_MMU */
  567. /* Memory initialisation debug and verification */
  568. enum mminit_level {
  569. MMINIT_WARNING,
  570. MMINIT_VERIFY,
  571. MMINIT_TRACE
  572. };
  573. #ifdef CONFIG_DEBUG_MEMORY_INIT
  574. extern int mminit_loglevel;
  575. #define mminit_dprintk(level, prefix, fmt, arg...) \
  576. do { \
  577. if (level < mminit_loglevel) { \
  578. if (level <= MMINIT_WARNING) \
  579. pr_warn("mminit::" prefix " " fmt, ##arg); \
  580. else \
  581. printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \
  582. } \
  583. } while (0)
  584. extern void mminit_verify_pageflags_layout(void);
  585. extern void mminit_verify_zonelist(void);
  586. #else
  587. static inline void mminit_dprintk(enum mminit_level level,
  588. const char *prefix, const char *fmt, ...)
  589. {
  590. }
  591. static inline void mminit_verify_pageflags_layout(void)
  592. {
  593. }
  594. static inline void mminit_verify_zonelist(void)
  595. {
  596. }
  597. #endif /* CONFIG_DEBUG_MEMORY_INIT */
  598. #define NODE_RECLAIM_NOSCAN -2
  599. #define NODE_RECLAIM_FULL -1
  600. #define NODE_RECLAIM_SOME 0
  601. #define NODE_RECLAIM_SUCCESS 1
  602. #ifdef CONFIG_NUMA
  603. extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
  604. extern int find_next_best_node(int node, nodemask_t *used_node_mask);
  605. #else
  606. static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
  607. unsigned int order)
  608. {
  609. return NODE_RECLAIM_NOSCAN;
  610. }
  611. static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
  612. {
  613. return NUMA_NO_NODE;
  614. }
  615. #endif
  616. /*
  617. * mm/memory-failure.c
  618. */
  619. extern int hwpoison_filter(struct page *p);
  620. extern u32 hwpoison_filter_dev_major;
  621. extern u32 hwpoison_filter_dev_minor;
  622. extern u64 hwpoison_filter_flags_mask;
  623. extern u64 hwpoison_filter_flags_value;
  624. extern u64 hwpoison_filter_memcg;
  625. extern u32 hwpoison_filter_enable;
  626. #ifdef CONFIG_MEMORY_FAILURE
  627. void clear_hwpoisoned_pages(struct page *memmap, int nr_pages);
  628. #else
  629. static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
  630. {
  631. }
  632. #endif
  633. extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long,
  634. unsigned long, unsigned long,
  635. unsigned long, unsigned long);
  636. extern void set_pageblock_order(void);
  637. unsigned int reclaim_clean_pages_from_list(struct zone *zone,
  638. struct list_head *page_list);
  639. /* The ALLOC_WMARK bits are used as an index to zone->watermark */
  640. #define ALLOC_WMARK_MIN WMARK_MIN
  641. #define ALLOC_WMARK_LOW WMARK_LOW
  642. #define ALLOC_WMARK_HIGH WMARK_HIGH
  643. #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */
  644. /* Mask to get the watermark bits */
  645. #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1)
  646. /*
  647. * Only MMU archs have async oom victim reclaim - aka oom_reaper so we
  648. * cannot assume a reduced access to memory reserves is sufficient for
  649. * !MMU
  650. */
  651. #ifdef CONFIG_MMU
  652. #define ALLOC_OOM 0x08
  653. #else
  654. #define ALLOC_OOM ALLOC_NO_WATERMARKS
  655. #endif
  656. #define ALLOC_HARDER 0x10 /* try to alloc harder */
  657. #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
  658. #define ALLOC_CPUSET 0x40 /* check for correct cpuset */
  659. #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
  660. #ifdef CONFIG_ZONE_DMA32
  661. #define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */
  662. #else
  663. #define ALLOC_NOFRAGMENT 0x0
  664. #endif
  665. #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
  666. enum ttu_flags;
  667. struct tlbflush_unmap_batch;
  668. /*
  669. * only for MM internal work items which do not depend on
  670. * any allocations or locks which might depend on allocations
  671. */
  672. extern struct workqueue_struct *mm_percpu_wq;
  673. #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
  674. void try_to_unmap_flush(void);
  675. void try_to_unmap_flush_dirty(void);
  676. void flush_tlb_batched_pending(struct mm_struct *mm);
  677. #else
  678. static inline void try_to_unmap_flush(void)
  679. {
  680. }
  681. static inline void try_to_unmap_flush_dirty(void)
  682. {
  683. }
  684. static inline void flush_tlb_batched_pending(struct mm_struct *mm)
  685. {
  686. }
  687. #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
  688. extern const struct trace_print_flags pageflag_names[];
  689. extern const struct trace_print_flags vmaflag_names[];
  690. extern const struct trace_print_flags gfpflag_names[];
  691. static inline bool is_migrate_highatomic(enum migratetype migratetype)
  692. {
  693. return migratetype == MIGRATE_HIGHATOMIC;
  694. }
  695. static inline bool is_migrate_highatomic_page(struct page *page)
  696. {
  697. return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC;
  698. }
  699. void setup_zone_pageset(struct zone *zone);
  700. struct migration_target_control {
  701. int nid; /* preferred node id */
  702. nodemask_t *nmask;
  703. gfp_t gfp_mask;
  704. };
  705. /*
  706. * mm/vmalloc.c
  707. */
  708. #ifdef CONFIG_MMU
  709. int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
  710. pgprot_t prot, struct page **pages, unsigned int page_shift);
  711. #else
  712. static inline
  713. int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
  714. pgprot_t prot, struct page **pages, unsigned int page_shift)
  715. {
  716. return -EINVAL;
  717. }
  718. #endif
  719. int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
  720. pgprot_t prot, struct page **pages,
  721. unsigned int page_shift);
  722. void vunmap_range_noflush(unsigned long start, unsigned long end);
  723. void __vunmap_range_noflush(unsigned long start, unsigned long end);
  724. int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
  725. unsigned long addr, int page_nid, int *flags);
  726. void free_zone_device_page(struct page *page);
  727. int migrate_device_coherent_page(struct page *page);
  728. /*
  729. * mm/gup.c
  730. */
  731. struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
  732. extern bool mirrored_kernelcore;
  733. static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
  734. {
  735. /*
  736. * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty
  737. * enablements, because when without soft-dirty being compiled in,
  738. * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
  739. * will be constantly true.
  740. */
  741. if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
  742. return false;
  743. /*
  744. * Soft-dirty is kind of special: its tracking is enabled when the
  745. * vma flags not set.
  746. */
  747. return !(vma->vm_flags & VM_SOFTDIRTY);
  748. }
  749. #endif /* __MM_INTERNAL_H */