pagemap.h 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _LINUX_PAGEMAP_H
  3. #define _LINUX_PAGEMAP_H
  4. /*
  5. * Copyright 1995 Linus Torvalds
  6. */
  7. #include <linux/mm.h>
  8. #include <linux/fs.h>
  9. #include <linux/list.h>
  10. #include <linux/highmem.h>
  11. #include <linux/compiler.h>
  12. #include <linux/uaccess.h>
  13. #include <linux/gfp.h>
  14. #include <linux/bitops.h>
  15. #include <linux/hardirq.h> /* for in_interrupt() */
  16. #include <linux/hugetlb_inline.h>
  17. struct folio_batch;
  18. unsigned long invalidate_mapping_pages(struct address_space *mapping,
  19. pgoff_t start, pgoff_t end);
  20. static inline void invalidate_remote_inode(struct inode *inode)
  21. {
  22. if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
  23. S_ISLNK(inode->i_mode))
  24. invalidate_mapping_pages(inode->i_mapping, 0, -1);
  25. }
  26. int invalidate_inode_pages2(struct address_space *mapping);
  27. int invalidate_inode_pages2_range(struct address_space *mapping,
  28. pgoff_t start, pgoff_t end);
  29. int write_inode_now(struct inode *, int sync);
  30. int filemap_fdatawrite(struct address_space *);
  31. int filemap_flush(struct address_space *);
  32. int filemap_fdatawait_keep_errors(struct address_space *mapping);
  33. int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
  34. int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
  35. loff_t start_byte, loff_t end_byte);
  36. static inline int filemap_fdatawait(struct address_space *mapping)
  37. {
  38. return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
  39. }
  40. bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
  41. int filemap_write_and_wait_range(struct address_space *mapping,
  42. loff_t lstart, loff_t lend);
  43. int __filemap_fdatawrite_range(struct address_space *mapping,
  44. loff_t start, loff_t end, int sync_mode);
  45. int filemap_fdatawrite_range(struct address_space *mapping,
  46. loff_t start, loff_t end);
  47. int filemap_check_errors(struct address_space *mapping);
  48. void __filemap_set_wb_err(struct address_space *mapping, int err);
  49. int filemap_fdatawrite_wbc(struct address_space *mapping,
  50. struct writeback_control *wbc);
  51. static inline int filemap_write_and_wait(struct address_space *mapping)
  52. {
  53. return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
  54. }
  55. /**
  56. * filemap_set_wb_err - set a writeback error on an address_space
  57. * @mapping: mapping in which to set writeback error
  58. * @err: error to be set in mapping
  59. *
  60. * When writeback fails in some way, we must record that error so that
  61. * userspace can be informed when fsync and the like are called. We endeavor
  62. * to report errors on any file that was open at the time of the error. Some
  63. * internal callers also need to know when writeback errors have occurred.
  64. *
  65. * When a writeback error occurs, most filesystems will want to call
  66. * filemap_set_wb_err to record the error in the mapping so that it will be
  67. * automatically reported whenever fsync is called on the file.
  68. */
  69. static inline void filemap_set_wb_err(struct address_space *mapping, int err)
  70. {
  71. /* Fastpath for common case of no error */
  72. if (unlikely(err))
  73. __filemap_set_wb_err(mapping, err);
  74. }
  75. /**
  76. * filemap_check_wb_err - has an error occurred since the mark was sampled?
  77. * @mapping: mapping to check for writeback errors
  78. * @since: previously-sampled errseq_t
  79. *
  80. * Grab the errseq_t value from the mapping, and see if it has changed "since"
  81. * the given value was sampled.
  82. *
  83. * If it has then report the latest error set, otherwise return 0.
  84. */
  85. static inline int filemap_check_wb_err(struct address_space *mapping,
  86. errseq_t since)
  87. {
  88. return errseq_check(&mapping->wb_err, since);
  89. }
  90. /**
  91. * filemap_sample_wb_err - sample the current errseq_t to test for later errors
  92. * @mapping: mapping to be sampled
  93. *
  94. * Writeback errors are always reported relative to a particular sample point
  95. * in the past. This function provides those sample points.
  96. */
  97. static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
  98. {
  99. return errseq_sample(&mapping->wb_err);
  100. }
  101. /**
  102. * file_sample_sb_err - sample the current errseq_t to test for later errors
  103. * @file: file pointer to be sampled
  104. *
  105. * Grab the most current superblock-level errseq_t value for the given
  106. * struct file.
  107. */
  108. static inline errseq_t file_sample_sb_err(struct file *file)
  109. {
  110. return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
  111. }
  112. /*
  113. * Flush file data before changing attributes. Caller must hold any locks
  114. * required to prevent further writes to this file until we're done setting
  115. * flags.
  116. */
  117. static inline int inode_drain_writes(struct inode *inode)
  118. {
  119. inode_dio_wait(inode);
  120. return filemap_write_and_wait(inode->i_mapping);
  121. }
  122. static inline bool mapping_empty(struct address_space *mapping)
  123. {
  124. return xa_empty(&mapping->i_pages);
  125. }
  126. /*
  127. * mapping_shrinkable - test if page cache state allows inode reclaim
  128. * @mapping: the page cache mapping
  129. *
  130. * This checks the mapping's cache state for the pupose of inode
  131. * reclaim and LRU management.
  132. *
  133. * The caller is expected to hold the i_lock, but is not required to
  134. * hold the i_pages lock, which usually protects cache state. That's
  135. * because the i_lock and the list_lru lock that protect the inode and
  136. * its LRU state don't nest inside the irq-safe i_pages lock.
  137. *
  138. * Cache deletions are performed under the i_lock, which ensures that
  139. * when an inode goes empty, it will reliably get queued on the LRU.
  140. *
  141. * Cache additions do not acquire the i_lock and may race with this
  142. * check, in which case we'll report the inode as shrinkable when it
  143. * has cache pages. This is okay: the shrinker also checks the
  144. * refcount and the referenced bit, which will be elevated or set in
  145. * the process of adding new cache pages to an inode.
  146. */
  147. static inline bool mapping_shrinkable(struct address_space *mapping)
  148. {
  149. void *head;
  150. /*
  151. * On highmem systems, there could be lowmem pressure from the
  152. * inodes before there is highmem pressure from the page
  153. * cache. Make inodes shrinkable regardless of cache state.
  154. */
  155. if (IS_ENABLED(CONFIG_HIGHMEM))
  156. return true;
  157. /* Cache completely empty? Shrink away. */
  158. head = rcu_access_pointer(mapping->i_pages.xa_head);
  159. if (!head)
  160. return true;
  161. /*
  162. * The xarray stores single offset-0 entries directly in the
  163. * head pointer, which allows non-resident page cache entries
  164. * to escape the shadow shrinker's list of xarray nodes. The
  165. * inode shrinker needs to pick them up under memory pressure.
  166. */
  167. if (!xa_is_node(head) && xa_is_value(head))
  168. return true;
  169. return false;
  170. }
  171. /*
  172. * Bits in mapping->flags.
  173. */
  174. enum mapping_flags {
  175. AS_EIO = 0, /* IO error on async write */
  176. AS_ENOSPC = 1, /* ENOSPC on async write */
  177. AS_MM_ALL_LOCKS = 2, /* under mm_take_all_locks() */
  178. AS_UNEVICTABLE = 3, /* e.g., ramdisk, SHM_LOCK */
  179. AS_EXITING = 4, /* final truncate in progress */
  180. /* writeback related tags are not used */
  181. AS_NO_WRITEBACK_TAGS = 5,
  182. AS_LARGE_FOLIO_SUPPORT = 6,
  183. };
  184. /**
  185. * mapping_set_error - record a writeback error in the address_space
  186. * @mapping: the mapping in which an error should be set
  187. * @error: the error to set in the mapping
  188. *
  189. * When writeback fails in some way, we must record that error so that
  190. * userspace can be informed when fsync and the like are called. We endeavor
  191. * to report errors on any file that was open at the time of the error. Some
  192. * internal callers also need to know when writeback errors have occurred.
  193. *
  194. * When a writeback error occurs, most filesystems will want to call
  195. * mapping_set_error to record the error in the mapping so that it can be
  196. * reported when the application calls fsync(2).
  197. */
  198. static inline void mapping_set_error(struct address_space *mapping, int error)
  199. {
  200. if (likely(!error))
  201. return;
  202. /* Record in wb_err for checkers using errseq_t based tracking */
  203. __filemap_set_wb_err(mapping, error);
  204. /* Record it in superblock */
  205. if (mapping->host)
  206. errseq_set(&mapping->host->i_sb->s_wb_err, error);
  207. /* Record it in flags for now, for legacy callers */
  208. if (error == -ENOSPC)
  209. set_bit(AS_ENOSPC, &mapping->flags);
  210. else
  211. set_bit(AS_EIO, &mapping->flags);
  212. }
  213. static inline void mapping_set_unevictable(struct address_space *mapping)
  214. {
  215. set_bit(AS_UNEVICTABLE, &mapping->flags);
  216. }
  217. static inline void mapping_clear_unevictable(struct address_space *mapping)
  218. {
  219. clear_bit(AS_UNEVICTABLE, &mapping->flags);
  220. }
  221. static inline bool mapping_unevictable(struct address_space *mapping)
  222. {
  223. return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
  224. }
  225. static inline void mapping_set_exiting(struct address_space *mapping)
  226. {
  227. set_bit(AS_EXITING, &mapping->flags);
  228. }
  229. static inline int mapping_exiting(struct address_space *mapping)
  230. {
  231. return test_bit(AS_EXITING, &mapping->flags);
  232. }
  233. static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
  234. {
  235. set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
  236. }
  237. static inline int mapping_use_writeback_tags(struct address_space *mapping)
  238. {
  239. return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
  240. }
  241. static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
  242. {
  243. return mapping->gfp_mask;
  244. }
  245. /* Restricts the given gfp_mask to what the mapping allows. */
  246. static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
  247. gfp_t gfp_mask)
  248. {
  249. return mapping_gfp_mask(mapping) & gfp_mask;
  250. }
  251. /*
  252. * This is non-atomic. Only to be used before the mapping is activated.
  253. * Probably needs a barrier...
  254. */
  255. static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
  256. {
  257. m->gfp_mask = mask;
  258. }
  259. /**
  260. * mapping_set_large_folios() - Indicate the file supports large folios.
  261. * @mapping: The file.
  262. *
  263. * The filesystem should call this function in its inode constructor to
  264. * indicate that the VFS can use large folios to cache the contents of
  265. * the file.
  266. *
  267. * Context: This should not be called while the inode is active as it
  268. * is non-atomic.
  269. */
  270. static inline void mapping_set_large_folios(struct address_space *mapping)
  271. {
  272. __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
  273. }
  274. /*
  275. * Large folio support currently depends on THP. These dependencies are
  276. * being worked on but are not yet fixed.
  277. */
  278. static inline bool mapping_large_folio_support(struct address_space *mapping)
  279. {
  280. return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
  281. test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
  282. }
  283. static inline int filemap_nr_thps(struct address_space *mapping)
  284. {
  285. #ifdef CONFIG_READ_ONLY_THP_FOR_FS
  286. return atomic_read(&mapping->nr_thps);
  287. #else
  288. return 0;
  289. #endif
  290. }
  291. static inline void filemap_nr_thps_inc(struct address_space *mapping)
  292. {
  293. #ifdef CONFIG_READ_ONLY_THP_FOR_FS
  294. if (!mapping_large_folio_support(mapping))
  295. atomic_inc(&mapping->nr_thps);
  296. #else
  297. WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
  298. #endif
  299. }
  300. static inline void filemap_nr_thps_dec(struct address_space *mapping)
  301. {
  302. #ifdef CONFIG_READ_ONLY_THP_FOR_FS
  303. if (!mapping_large_folio_support(mapping))
  304. atomic_dec(&mapping->nr_thps);
  305. #else
  306. WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
  307. #endif
  308. }
  309. struct address_space *page_mapping(struct page *);
  310. struct address_space *folio_mapping(struct folio *);
  311. struct address_space *swapcache_mapping(struct folio *);
  312. /**
  313. * folio_file_mapping - Find the mapping this folio belongs to.
  314. * @folio: The folio.
  315. *
  316. * For folios which are in the page cache, return the mapping that this
  317. * page belongs to. Folios in the swap cache return the mapping of the
  318. * swap file or swap device where the data is stored. This is different
  319. * from the mapping returned by folio_mapping(). The only reason to
  320. * use it is if, like NFS, you return 0 from ->activate_swapfile.
  321. *
  322. * Do not call this for folios which aren't in the page cache or swap cache.
  323. */
  324. static inline struct address_space *folio_file_mapping(struct folio *folio)
  325. {
  326. if (unlikely(folio_test_swapcache(folio)))
  327. return swapcache_mapping(folio);
  328. return folio->mapping;
  329. }
  330. static inline struct address_space *page_file_mapping(struct page *page)
  331. {
  332. return folio_file_mapping(page_folio(page));
  333. }
  334. /*
  335. * For file cache pages, return the address_space, otherwise return NULL
  336. */
  337. static inline struct address_space *page_mapping_file(struct page *page)
  338. {
  339. struct folio *folio = page_folio(page);
  340. if (unlikely(folio_test_swapcache(folio)))
  341. return NULL;
  342. return folio_mapping(folio);
  343. }
  344. /**
  345. * folio_inode - Get the host inode for this folio.
  346. * @folio: The folio.
  347. *
  348. * For folios which are in the page cache, return the inode that this folio
  349. * belongs to.
  350. *
  351. * Do not call this for folios which aren't in the page cache.
  352. */
  353. static inline struct inode *folio_inode(struct folio *folio)
  354. {
  355. return folio->mapping->host;
  356. }
  357. /**
  358. * folio_attach_private - Attach private data to a folio.
  359. * @folio: Folio to attach data to.
  360. * @data: Data to attach to folio.
  361. *
  362. * Attaching private data to a folio increments the page's reference count.
  363. * The data must be detached before the folio will be freed.
  364. */
  365. static inline void folio_attach_private(struct folio *folio, void *data)
  366. {
  367. folio_get(folio);
  368. folio->private = data;
  369. folio_set_private(folio);
  370. }
  371. /**
  372. * folio_change_private - Change private data on a folio.
  373. * @folio: Folio to change the data on.
  374. * @data: Data to set on the folio.
  375. *
  376. * Change the private data attached to a folio and return the old
  377. * data. The page must previously have had data attached and the data
  378. * must be detached before the folio will be freed.
  379. *
  380. * Return: Data that was previously attached to the folio.
  381. */
  382. static inline void *folio_change_private(struct folio *folio, void *data)
  383. {
  384. void *old = folio_get_private(folio);
  385. folio->private = data;
  386. return old;
  387. }
  388. /**
  389. * folio_detach_private - Detach private data from a folio.
  390. * @folio: Folio to detach data from.
  391. *
  392. * Removes the data that was previously attached to the folio and decrements
  393. * the refcount on the page.
  394. *
  395. * Return: Data that was attached to the folio.
  396. */
  397. static inline void *folio_detach_private(struct folio *folio)
  398. {
  399. void *data = folio_get_private(folio);
  400. if (!folio_test_private(folio))
  401. return NULL;
  402. folio_clear_private(folio);
  403. folio->private = NULL;
  404. folio_put(folio);
  405. return data;
  406. }
  407. static inline void attach_page_private(struct page *page, void *data)
  408. {
  409. folio_attach_private(page_folio(page), data);
  410. }
  411. static inline void *detach_page_private(struct page *page)
  412. {
  413. return folio_detach_private(page_folio(page));
  414. }
  415. #ifdef CONFIG_NUMA
  416. struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
  417. #else
  418. static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
  419. {
  420. return folio_alloc(gfp, order);
  421. }
  422. #endif
  423. static inline struct page *__page_cache_alloc(gfp_t gfp)
  424. {
  425. return &filemap_alloc_folio(gfp, 0)->page;
  426. }
  427. static inline struct page *page_cache_alloc(struct address_space *x)
  428. {
  429. return __page_cache_alloc(mapping_gfp_mask(x));
  430. }
  431. static inline gfp_t __readahead_gfp_mask(struct address_space *x)
  432. {
  433. return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
  434. }
  435. gfp_t readahead_gfp_mask(struct address_space *x);
  436. typedef int filler_t(struct file *, struct folio *);
  437. pgoff_t page_cache_next_miss(struct address_space *mapping,
  438. pgoff_t index, unsigned long max_scan);
  439. pgoff_t page_cache_prev_miss(struct address_space *mapping,
  440. pgoff_t index, unsigned long max_scan);
  441. #define FGP_ACCESSED 0x00000001
  442. #define FGP_LOCK 0x00000002
  443. #define FGP_CREAT 0x00000004
  444. #define FGP_WRITE 0x00000008
  445. #define FGP_NOFS 0x00000010
  446. #define FGP_NOWAIT 0x00000020
  447. #define FGP_FOR_MMAP 0x00000040
  448. #define FGP_HEAD 0x00000080
  449. #define FGP_ENTRY 0x00000100
  450. #define FGP_STABLE 0x00000200
  451. struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
  452. int fgp_flags, gfp_t gfp);
  453. struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
  454. int fgp_flags, gfp_t gfp);
  455. /**
  456. * filemap_get_folio - Find and get a folio.
  457. * @mapping: The address_space to search.
  458. * @index: The page index.
  459. *
  460. * Looks up the page cache entry at @mapping & @index. If a folio is
  461. * present, it is returned with an increased refcount.
  462. *
  463. * Otherwise, %NULL is returned.
  464. */
  465. static inline struct folio *filemap_get_folio(struct address_space *mapping,
  466. pgoff_t index)
  467. {
  468. return __filemap_get_folio(mapping, index, 0, 0);
  469. }
  470. /**
  471. * filemap_lock_folio - Find and lock a folio.
  472. * @mapping: The address_space to search.
  473. * @index: The page index.
  474. *
  475. * Looks up the page cache entry at @mapping & @index. If a folio is
  476. * present, it is returned locked with an increased refcount.
  477. *
  478. * Context: May sleep.
  479. * Return: A folio or %NULL if there is no folio in the cache for this
  480. * index. Will not return a shadow, swap or DAX entry.
  481. */
  482. static inline struct folio *filemap_lock_folio(struct address_space *mapping,
  483. pgoff_t index)
  484. {
  485. return __filemap_get_folio(mapping, index, FGP_LOCK, 0);
  486. }
  487. /**
  488. * find_get_page - find and get a page reference
  489. * @mapping: the address_space to search
  490. * @offset: the page index
  491. *
  492. * Looks up the page cache slot at @mapping & @offset. If there is a
  493. * page cache page, it is returned with an increased refcount.
  494. *
  495. * Otherwise, %NULL is returned.
  496. */
  497. static inline struct page *find_get_page(struct address_space *mapping,
  498. pgoff_t offset)
  499. {
  500. return pagecache_get_page(mapping, offset, 0, 0);
  501. }
  502. static inline struct page *find_get_page_flags(struct address_space *mapping,
  503. pgoff_t offset, int fgp_flags)
  504. {
  505. return pagecache_get_page(mapping, offset, fgp_flags, 0);
  506. }
  507. /**
  508. * find_lock_page - locate, pin and lock a pagecache page
  509. * @mapping: the address_space to search
  510. * @index: the page index
  511. *
  512. * Looks up the page cache entry at @mapping & @index. If there is a
  513. * page cache page, it is returned locked and with an increased
  514. * refcount.
  515. *
  516. * Context: May sleep.
  517. * Return: A struct page or %NULL if there is no page in the cache for this
  518. * index.
  519. */
  520. static inline struct page *find_lock_page(struct address_space *mapping,
  521. pgoff_t index)
  522. {
  523. return pagecache_get_page(mapping, index, FGP_LOCK, 0);
  524. }
  525. /**
  526. * find_or_create_page - locate or add a pagecache page
  527. * @mapping: the page's address_space
  528. * @index: the page's index into the mapping
  529. * @gfp_mask: page allocation mode
  530. *
  531. * Looks up the page cache slot at @mapping & @offset. If there is a
  532. * page cache page, it is returned locked and with an increased
  533. * refcount.
  534. *
  535. * If the page is not present, a new page is allocated using @gfp_mask
  536. * and added to the page cache and the VM's LRU list. The page is
  537. * returned locked and with an increased refcount.
  538. *
  539. * On memory exhaustion, %NULL is returned.
  540. *
  541. * find_or_create_page() may sleep, even if @gfp_flags specifies an
  542. * atomic allocation!
  543. */
  544. static inline struct page *find_or_create_page(struct address_space *mapping,
  545. pgoff_t index, gfp_t gfp_mask)
  546. {
  547. return pagecache_get_page(mapping, index,
  548. FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
  549. gfp_mask);
  550. }
  551. /**
  552. * grab_cache_page_nowait - returns locked page at given index in given cache
  553. * @mapping: target address_space
  554. * @index: the page index
  555. *
  556. * Same as grab_cache_page(), but do not wait if the page is unavailable.
  557. * This is intended for speculative data generators, where the data can
  558. * be regenerated if the page couldn't be grabbed. This routine should
  559. * be safe to call while holding the lock for another page.
  560. *
  561. * Clear __GFP_FS when allocating the page to avoid recursion into the fs
  562. * and deadlock against the caller's locked page.
  563. */
  564. static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
  565. pgoff_t index)
  566. {
  567. return pagecache_get_page(mapping, index,
  568. FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
  569. mapping_gfp_mask(mapping));
  570. }
  571. #define swapcache_index(folio) __page_file_index(&(folio)->page)
  572. /**
  573. * folio_index - File index of a folio.
  574. * @folio: The folio.
  575. *
  576. * For a folio which is either in the page cache or the swap cache,
  577. * return its index within the address_space it belongs to. If you know
  578. * the page is definitely in the page cache, you can look at the folio's
  579. * index directly.
  580. *
  581. * Return: The index (offset in units of pages) of a folio in its file.
  582. */
  583. static inline pgoff_t folio_index(struct folio *folio)
  584. {
  585. if (unlikely(folio_test_swapcache(folio)))
  586. return swapcache_index(folio);
  587. return folio->index;
  588. }
  589. /**
  590. * folio_next_index - Get the index of the next folio.
  591. * @folio: The current folio.
  592. *
  593. * Return: The index of the folio which follows this folio in the file.
  594. */
  595. static inline pgoff_t folio_next_index(struct folio *folio)
  596. {
  597. return folio->index + folio_nr_pages(folio);
  598. }
  599. /**
  600. * folio_file_page - The page for a particular index.
  601. * @folio: The folio which contains this index.
  602. * @index: The index we want to look up.
  603. *
  604. * Sometimes after looking up a folio in the page cache, we need to
  605. * obtain the specific page for an index (eg a page fault).
  606. *
  607. * Return: The page containing the file data for this index.
  608. */
  609. static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
  610. {
  611. /* HugeTLBfs indexes the page cache in units of hpage_size */
  612. if (folio_test_hugetlb(folio))
  613. return &folio->page;
  614. return folio_page(folio, index & (folio_nr_pages(folio) - 1));
  615. }
  616. /**
  617. * folio_contains - Does this folio contain this index?
  618. * @folio: The folio.
  619. * @index: The page index within the file.
  620. *
  621. * Context: The caller should have the page locked in order to prevent
  622. * (eg) shmem from moving the page between the page cache and swap cache
  623. * and changing its index in the middle of the operation.
  624. * Return: true or false.
  625. */
  626. static inline bool folio_contains(struct folio *folio, pgoff_t index)
  627. {
  628. /* HugeTLBfs indexes the page cache in units of hpage_size */
  629. if (folio_test_hugetlb(folio))
  630. return folio->index == index;
  631. return index - folio_index(folio) < folio_nr_pages(folio);
  632. }
  633. /*
  634. * Given the page we found in the page cache, return the page corresponding
  635. * to this index in the file
  636. */
  637. static inline struct page *find_subpage(struct page *head, pgoff_t index)
  638. {
  639. /* HugeTLBfs wants the head page regardless */
  640. if (PageHuge(head))
  641. return head;
  642. return head + (index & (thp_nr_pages(head) - 1));
  643. }
  644. unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
  645. pgoff_t end, struct folio_batch *fbatch);
  646. unsigned filemap_get_folios_contig(struct address_space *mapping,
  647. pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
  648. unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
  649. pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
  650. unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
  651. pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
  652. struct page **pages);
  653. static inline unsigned find_get_pages_tag(struct address_space *mapping,
  654. pgoff_t *index, xa_mark_t tag, unsigned int nr_pages,
  655. struct page **pages)
  656. {
  657. return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
  658. nr_pages, pages);
  659. }
  660. struct page *grab_cache_page_write_begin(struct address_space *mapping,
  661. pgoff_t index);
  662. /*
  663. * Returns locked page at given index in given cache, creating it if needed.
  664. */
  665. static inline struct page *grab_cache_page(struct address_space *mapping,
  666. pgoff_t index)
  667. {
  668. return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
  669. }
  670. struct folio *read_cache_folio(struct address_space *, pgoff_t index,
  671. filler_t *filler, struct file *file);
  672. struct page *read_cache_page(struct address_space *, pgoff_t index,
  673. filler_t *filler, struct file *file);
  674. extern struct page * read_cache_page_gfp(struct address_space *mapping,
  675. pgoff_t index, gfp_t gfp_mask);
  676. static inline struct page *read_mapping_page(struct address_space *mapping,
  677. pgoff_t index, struct file *file)
  678. {
  679. return read_cache_page(mapping, index, NULL, file);
  680. }
  681. static inline struct folio *read_mapping_folio(struct address_space *mapping,
  682. pgoff_t index, struct file *file)
  683. {
  684. return read_cache_folio(mapping, index, NULL, file);
  685. }
  686. /*
  687. * Get index of the page within radix-tree (but not for hugetlb pages).
  688. * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
  689. */
  690. static inline pgoff_t page_to_index(struct page *page)
  691. {
  692. struct page *head;
  693. if (likely(!PageTransTail(page)))
  694. return page->index;
  695. head = compound_head(page);
  696. /*
  697. * We don't initialize ->index for tail pages: calculate based on
  698. * head page
  699. */
  700. return head->index + page - head;
  701. }
  702. extern pgoff_t hugetlb_basepage_index(struct page *page);
  703. /*
  704. * Get the offset in PAGE_SIZE (even for hugetlb pages).
  705. * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
  706. */
  707. static inline pgoff_t page_to_pgoff(struct page *page)
  708. {
  709. if (unlikely(PageHuge(page)))
  710. return hugetlb_basepage_index(page);
  711. return page_to_index(page);
  712. }
  713. /*
  714. * Return byte-offset into filesystem object for page.
  715. */
  716. static inline loff_t page_offset(struct page *page)
  717. {
  718. return ((loff_t)page->index) << PAGE_SHIFT;
  719. }
  720. static inline loff_t page_file_offset(struct page *page)
  721. {
  722. return ((loff_t)page_index(page)) << PAGE_SHIFT;
  723. }
  724. /**
  725. * folio_pos - Returns the byte position of this folio in its file.
  726. * @folio: The folio.
  727. */
  728. static inline loff_t folio_pos(struct folio *folio)
  729. {
  730. return page_offset(&folio->page);
  731. }
  732. /**
  733. * folio_file_pos - Returns the byte position of this folio in its file.
  734. * @folio: The folio.
  735. *
  736. * This differs from folio_pos() for folios which belong to a swap file.
  737. * NFS is the only filesystem today which needs to use folio_file_pos().
  738. */
  739. static inline loff_t folio_file_pos(struct folio *folio)
  740. {
  741. return page_file_offset(&folio->page);
  742. }
  743. /*
  744. * Get the offset in PAGE_SIZE (even for hugetlb folios).
  745. * (TODO: hugetlb folios should have ->index in PAGE_SIZE)
  746. */
  747. static inline pgoff_t folio_pgoff(struct folio *folio)
  748. {
  749. if (unlikely(folio_test_hugetlb(folio)))
  750. return hugetlb_basepage_index(&folio->page);
  751. return folio->index;
  752. }
  753. extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
  754. unsigned long address);
  755. static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
  756. unsigned long address)
  757. {
  758. pgoff_t pgoff;
  759. if (unlikely(is_vm_hugetlb_page(vma)))
  760. return linear_hugepage_index(vma, address);
  761. pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
  762. pgoff += vma->vm_pgoff;
  763. return pgoff;
  764. }
  765. struct wait_page_key {
  766. struct folio *folio;
  767. int bit_nr;
  768. int page_match;
  769. };
  770. struct wait_page_queue {
  771. struct folio *folio;
  772. int bit_nr;
  773. wait_queue_entry_t wait;
  774. };
  775. static inline bool wake_page_match(struct wait_page_queue *wait_page,
  776. struct wait_page_key *key)
  777. {
  778. if (wait_page->folio != key->folio)
  779. return false;
  780. key->page_match = 1;
  781. if (wait_page->bit_nr != key->bit_nr)
  782. return false;
  783. return true;
  784. }
  785. void __folio_lock(struct folio *folio);
  786. int __folio_lock_killable(struct folio *folio);
  787. vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf);
  788. void unlock_page(struct page *page);
  789. void folio_unlock(struct folio *folio);
  790. /**
  791. * folio_trylock() - Attempt to lock a folio.
  792. * @folio: The folio to attempt to lock.
  793. *
  794. * Sometimes it is undesirable to wait for a folio to be unlocked (eg
  795. * when the locks are being taken in the wrong order, or if making
  796. * progress through a batch of folios is more important than processing
  797. * them in order). Usually folio_lock() is the correct function to call.
  798. *
  799. * Context: Any context.
  800. * Return: Whether the lock was successfully acquired.
  801. */
  802. static inline bool folio_trylock(struct folio *folio)
  803. {
  804. return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
  805. }
  806. /*
  807. * Return true if the page was successfully locked
  808. */
  809. static inline int trylock_page(struct page *page)
  810. {
  811. return folio_trylock(page_folio(page));
  812. }
  813. /**
  814. * folio_lock() - Lock this folio.
  815. * @folio: The folio to lock.
  816. *
  817. * The folio lock protects against many things, probably more than it
  818. * should. It is primarily held while a folio is being brought uptodate,
  819. * either from its backing file or from swap. It is also held while a
  820. * folio is being truncated from its address_space, so holding the lock
  821. * is sufficient to keep folio->mapping stable.
  822. *
  823. * The folio lock is also held while write() is modifying the page to
  824. * provide POSIX atomicity guarantees (as long as the write does not
  825. * cross a page boundary). Other modifications to the data in the folio
  826. * do not hold the folio lock and can race with writes, eg DMA and stores
  827. * to mapped pages.
  828. *
  829. * Context: May sleep. If you need to acquire the locks of two or
  830. * more folios, they must be in order of ascending index, if they are
  831. * in the same address_space. If they are in different address_spaces,
  832. * acquire the lock of the folio which belongs to the address_space which
  833. * has the lowest address in memory first.
  834. */
  835. static inline void folio_lock(struct folio *folio)
  836. {
  837. might_sleep();
  838. if (!folio_trylock(folio))
  839. __folio_lock(folio);
  840. }
  841. /**
  842. * lock_page() - Lock the folio containing this page.
  843. * @page: The page to lock.
  844. *
  845. * See folio_lock() for a description of what the lock protects.
  846. * This is a legacy function and new code should probably use folio_lock()
  847. * instead.
  848. *
  849. * Context: May sleep. Pages in the same folio share a lock, so do not
  850. * attempt to lock two pages which share a folio.
  851. */
  852. static inline void lock_page(struct page *page)
  853. {
  854. struct folio *folio;
  855. might_sleep();
  856. folio = page_folio(page);
  857. if (!folio_trylock(folio))
  858. __folio_lock(folio);
  859. }
  860. /**
  861. * folio_lock_killable() - Lock this folio, interruptible by a fatal signal.
  862. * @folio: The folio to lock.
  863. *
  864. * Attempts to lock the folio, like folio_lock(), except that the sleep
  865. * to acquire the lock is interruptible by a fatal signal.
  866. *
  867. * Context: May sleep; see folio_lock().
  868. * Return: 0 if the lock was acquired; -EINTR if a fatal signal was received.
  869. */
  870. static inline int folio_lock_killable(struct folio *folio)
  871. {
  872. might_sleep();
  873. if (!folio_trylock(folio))
  874. return __folio_lock_killable(folio);
  875. return 0;
  876. }
  877. /*
  878. * lock_page_killable is like lock_page but can be interrupted by fatal
  879. * signals. It returns 0 if it locked the page and -EINTR if it was
  880. * killed while waiting.
  881. */
  882. static inline int lock_page_killable(struct page *page)
  883. {
  884. return folio_lock_killable(page_folio(page));
  885. }
  886. /*
  887. * folio_lock_or_retry - Lock the folio, unless this would block and the
  888. * caller indicated that it can handle a retry.
  889. *
  890. * Return value and mmap_lock implications depend on flags; see
  891. * __folio_lock_or_retry().
  892. */
  893. static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
  894. struct vm_fault *vmf)
  895. {
  896. might_sleep();
  897. if (!folio_trylock(folio))
  898. return __folio_lock_or_retry(folio, vmf);
  899. return 0;
  900. }
  901. /*
  902. * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
  903. * and should not be used directly.
  904. */
  905. void folio_wait_bit(struct folio *folio, int bit_nr);
  906. int folio_wait_bit_killable(struct folio *folio, int bit_nr);
  907. /*
  908. * Wait for a folio to be unlocked.
  909. *
  910. * This must be called with the caller "holding" the folio,
  911. * ie with increased folio reference count so that the folio won't
  912. * go away during the wait.
  913. */
  914. static inline void folio_wait_locked(struct folio *folio)
  915. {
  916. if (folio_test_locked(folio))
  917. folio_wait_bit(folio, PG_locked);
  918. }
  919. static inline int folio_wait_locked_killable(struct folio *folio)
  920. {
  921. if (!folio_test_locked(folio))
  922. return 0;
  923. return folio_wait_bit_killable(folio, PG_locked);
  924. }
  925. static inline void wait_on_page_locked(struct page *page)
  926. {
  927. folio_wait_locked(page_folio(page));
  928. }
  929. static inline int wait_on_page_locked_killable(struct page *page)
  930. {
  931. return folio_wait_locked_killable(page_folio(page));
  932. }
  933. void wait_on_page_writeback(struct page *page);
  934. void folio_wait_writeback(struct folio *folio);
  935. int folio_wait_writeback_killable(struct folio *folio);
  936. void end_page_writeback(struct page *page);
  937. void folio_end_writeback(struct folio *folio);
  938. void wait_for_stable_page(struct page *page);
  939. void folio_wait_stable(struct folio *folio);
  940. void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
  941. static inline void __set_page_dirty(struct page *page,
  942. struct address_space *mapping, int warn)
  943. {
  944. __folio_mark_dirty(page_folio(page), mapping, warn);
  945. }
  946. void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
  947. void __folio_cancel_dirty(struct folio *folio);
  948. static inline void folio_cancel_dirty(struct folio *folio)
  949. {
  950. /* Avoid atomic ops, locking, etc. when not actually needed. */
  951. if (folio_test_dirty(folio))
  952. __folio_cancel_dirty(folio);
  953. }
  954. bool folio_clear_dirty_for_io(struct folio *folio);
  955. bool clear_page_dirty_for_io(struct page *page);
  956. void folio_invalidate(struct folio *folio, size_t offset, size_t length);
  957. int __must_check folio_write_one(struct folio *folio);
  958. static inline int __must_check write_one_page(struct page *page)
  959. {
  960. return folio_write_one(page_folio(page));
  961. }
  962. int __set_page_dirty_nobuffers(struct page *page);
  963. bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);
  964. #ifdef CONFIG_MIGRATION
  965. int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
  966. struct folio *src, enum migrate_mode mode);
  967. #else
  968. #define filemap_migrate_folio NULL
  969. #endif
  970. void page_endio(struct page *page, bool is_write, int err);
  971. void folio_end_private_2(struct folio *folio);
  972. void folio_wait_private_2(struct folio *folio);
  973. int folio_wait_private_2_killable(struct folio *folio);
  974. /*
  975. * Add an arbitrary waiter to a page's wait queue
  976. */
  977. void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
  978. /*
  979. * Fault in userspace address range.
  980. */
  981. size_t fault_in_writeable(char __user *uaddr, size_t size);
  982. size_t fault_in_subpage_writeable(char __user *uaddr, size_t size);
  983. size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
  984. size_t fault_in_readable(const char __user *uaddr, size_t size);
  985. int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
  986. pgoff_t index, gfp_t gfp);
  987. int filemap_add_folio(struct address_space *mapping, struct folio *folio,
  988. pgoff_t index, gfp_t gfp);
  989. void filemap_remove_folio(struct folio *folio);
  990. void delete_from_page_cache(struct page *page);
  991. void __filemap_remove_folio(struct folio *folio, void *shadow);
  992. void replace_page_cache_page(struct page *old, struct page *new);
  993. void delete_from_page_cache_batch(struct address_space *mapping,
  994. struct folio_batch *fbatch);
  995. int try_to_release_page(struct page *page, gfp_t gfp);
  996. bool filemap_release_folio(struct folio *folio, gfp_t gfp);
  997. loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
  998. int whence);
  999. /* Must be non-static for BPF error injection */
  1000. int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
  1001. pgoff_t index, gfp_t gfp, void **shadowp);
  1002. bool filemap_range_has_writeback(struct address_space *mapping,
  1003. loff_t start_byte, loff_t end_byte);
  1004. /**
  1005. * filemap_range_needs_writeback - check if range potentially needs writeback
  1006. * @mapping: address space within which to check
  1007. * @start_byte: offset in bytes where the range starts
  1008. * @end_byte: offset in bytes where the range ends (inclusive)
  1009. *
  1010. * Find at least one page in the range supplied, usually used to check if
  1011. * direct writing in this range will trigger a writeback. Used by O_DIRECT
  1012. * read/write with IOCB_NOWAIT, to see if the caller needs to do
  1013. * filemap_write_and_wait_range() before proceeding.
  1014. *
  1015. * Return: %true if the caller should do filemap_write_and_wait_range() before
  1016. * doing O_DIRECT to a page in this range, %false otherwise.
  1017. */
  1018. static inline bool filemap_range_needs_writeback(struct address_space *mapping,
  1019. loff_t start_byte,
  1020. loff_t end_byte)
  1021. {
  1022. if (!mapping->nrpages)
  1023. return false;
  1024. if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
  1025. !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
  1026. return false;
  1027. return filemap_range_has_writeback(mapping, start_byte, end_byte);
  1028. }
  1029. /**
  1030. * struct readahead_control - Describes a readahead request.
  1031. *
  1032. * A readahead request is for consecutive pages. Filesystems which
  1033. * implement the ->readahead method should call readahead_page() or
  1034. * readahead_page_batch() in a loop and attempt to start I/O against
  1035. * each page in the request.
  1036. *
  1037. * Most of the fields in this struct are private and should be accessed
  1038. * by the functions below.
  1039. *
  1040. * @file: The file, used primarily by network filesystems for authentication.
  1041. * May be NULL if invoked internally by the filesystem.
  1042. * @mapping: Readahead this filesystem object.
  1043. * @ra: File readahead state. May be NULL.
  1044. */
  1045. struct readahead_control {
  1046. struct file *file;
  1047. struct address_space *mapping;
  1048. struct file_ra_state *ra;
  1049. /* private: use the readahead_* accessors instead */
  1050. pgoff_t _index;
  1051. unsigned int _nr_pages;
  1052. unsigned int _batch_count;
  1053. bool _workingset;
  1054. unsigned long _pflags;
  1055. };
  1056. #define DEFINE_READAHEAD(ractl, f, r, m, i) \
  1057. struct readahead_control ractl = { \
  1058. .file = f, \
  1059. .mapping = m, \
  1060. .ra = r, \
  1061. ._index = i, \
  1062. }
  1063. #define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
  1064. void page_cache_ra_unbounded(struct readahead_control *,
  1065. unsigned long nr_to_read, unsigned long lookahead_count);
  1066. void page_cache_sync_ra(struct readahead_control *, unsigned long req_count);
  1067. void page_cache_async_ra(struct readahead_control *, struct folio *,
  1068. unsigned long req_count);
  1069. void readahead_expand(struct readahead_control *ractl,
  1070. loff_t new_start, size_t new_len);
  1071. /**
  1072. * page_cache_sync_readahead - generic file readahead
  1073. * @mapping: address_space which holds the pagecache and I/O vectors
  1074. * @ra: file_ra_state which holds the readahead state
  1075. * @file: Used by the filesystem for authentication.
  1076. * @index: Index of first page to be read.
  1077. * @req_count: Total number of pages being read by the caller.
  1078. *
  1079. * page_cache_sync_readahead() should be called when a cache miss happened:
  1080. * it will submit the read. The readahead logic may decide to piggyback more
  1081. * pages onto the read request if access patterns suggest it will improve
  1082. * performance.
  1083. */
  1084. static inline
  1085. void page_cache_sync_readahead(struct address_space *mapping,
  1086. struct file_ra_state *ra, struct file *file, pgoff_t index,
  1087. unsigned long req_count)
  1088. {
  1089. DEFINE_READAHEAD(ractl, file, ra, mapping, index);
  1090. page_cache_sync_ra(&ractl, req_count);
  1091. }
  1092. /**
  1093. * page_cache_async_readahead - file readahead for marked pages
  1094. * @mapping: address_space which holds the pagecache and I/O vectors
  1095. * @ra: file_ra_state which holds the readahead state
  1096. * @file: Used by the filesystem for authentication.
  1097. * @folio: The folio at @index which triggered the readahead call.
  1098. * @index: Index of first page to be read.
  1099. * @req_count: Total number of pages being read by the caller.
  1100. *
  1101. * page_cache_async_readahead() should be called when a page is used which
  1102. * is marked as PageReadahead; this is a marker to suggest that the application
  1103. * has used up enough of the readahead window that we should start pulling in
  1104. * more pages.
  1105. */
  1106. static inline
  1107. void page_cache_async_readahead(struct address_space *mapping,
  1108. struct file_ra_state *ra, struct file *file,
  1109. struct folio *folio, pgoff_t index, unsigned long req_count)
  1110. {
  1111. DEFINE_READAHEAD(ractl, file, ra, mapping, index);
  1112. page_cache_async_ra(&ractl, folio, req_count);
  1113. }
  1114. static inline struct folio *__readahead_folio(struct readahead_control *ractl)
  1115. {
  1116. struct folio *folio;
  1117. BUG_ON(ractl->_batch_count > ractl->_nr_pages);
  1118. ractl->_nr_pages -= ractl->_batch_count;
  1119. ractl->_index += ractl->_batch_count;
  1120. if (!ractl->_nr_pages) {
  1121. ractl->_batch_count = 0;
  1122. return NULL;
  1123. }
  1124. folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
  1125. VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
  1126. ractl->_batch_count = folio_nr_pages(folio);
  1127. return folio;
  1128. }
  1129. /**
  1130. * readahead_page - Get the next page to read.
  1131. * @ractl: The current readahead request.
  1132. *
  1133. * Context: The page is locked and has an elevated refcount. The caller
  1134. * should decreases the refcount once the page has been submitted for I/O
  1135. * and unlock the page once all I/O to that page has completed.
  1136. * Return: A pointer to the next page, or %NULL if we are done.
  1137. */
  1138. static inline struct page *readahead_page(struct readahead_control *ractl)
  1139. {
  1140. struct folio *folio = __readahead_folio(ractl);
  1141. return &folio->page;
  1142. }
  1143. /**
  1144. * readahead_folio - Get the next folio to read.
  1145. * @ractl: The current readahead request.
  1146. *
  1147. * Context: The folio is locked. The caller should unlock the folio once
  1148. * all I/O to that folio has completed.
  1149. * Return: A pointer to the next folio, or %NULL if we are done.
  1150. */
  1151. static inline struct folio *readahead_folio(struct readahead_control *ractl)
  1152. {
  1153. struct folio *folio = __readahead_folio(ractl);
  1154. if (folio)
  1155. folio_put(folio);
  1156. return folio;
  1157. }
  1158. static inline unsigned int __readahead_batch(struct readahead_control *rac,
  1159. struct page **array, unsigned int array_sz)
  1160. {
  1161. unsigned int i = 0;
  1162. XA_STATE(xas, &rac->mapping->i_pages, 0);
  1163. struct page *page;
  1164. BUG_ON(rac->_batch_count > rac->_nr_pages);
  1165. rac->_nr_pages -= rac->_batch_count;
  1166. rac->_index += rac->_batch_count;
  1167. rac->_batch_count = 0;
  1168. xas_set(&xas, rac->_index);
  1169. rcu_read_lock();
  1170. xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
  1171. if (xas_retry(&xas, page))
  1172. continue;
  1173. VM_BUG_ON_PAGE(!PageLocked(page), page);
  1174. VM_BUG_ON_PAGE(PageTail(page), page);
  1175. array[i++] = page;
  1176. rac->_batch_count += thp_nr_pages(page);
  1177. if (i == array_sz)
  1178. break;
  1179. }
  1180. rcu_read_unlock();
  1181. return i;
  1182. }
  1183. /**
  1184. * readahead_page_batch - Get a batch of pages to read.
  1185. * @rac: The current readahead request.
  1186. * @array: An array of pointers to struct page.
  1187. *
  1188. * Context: The pages are locked and have an elevated refcount. The caller
  1189. * should decreases the refcount once the page has been submitted for I/O
  1190. * and unlock the page once all I/O to that page has completed.
  1191. * Return: The number of pages placed in the array. 0 indicates the request
  1192. * is complete.
  1193. */
  1194. #define readahead_page_batch(rac, array) \
  1195. __readahead_batch(rac, array, ARRAY_SIZE(array))
  1196. /**
  1197. * readahead_pos - The byte offset into the file of this readahead request.
  1198. * @rac: The readahead request.
  1199. */
  1200. static inline loff_t readahead_pos(struct readahead_control *rac)
  1201. {
  1202. return (loff_t)rac->_index * PAGE_SIZE;
  1203. }
  1204. /**
  1205. * readahead_length - The number of bytes in this readahead request.
  1206. * @rac: The readahead request.
  1207. */
  1208. static inline size_t readahead_length(struct readahead_control *rac)
  1209. {
  1210. return rac->_nr_pages * PAGE_SIZE;
  1211. }
  1212. /**
  1213. * readahead_index - The index of the first page in this readahead request.
  1214. * @rac: The readahead request.
  1215. */
  1216. static inline pgoff_t readahead_index(struct readahead_control *rac)
  1217. {
  1218. return rac->_index;
  1219. }
  1220. /**
  1221. * readahead_count - The number of pages in this readahead request.
  1222. * @rac: The readahead request.
  1223. */
  1224. static inline unsigned int readahead_count(struct readahead_control *rac)
  1225. {
  1226. return rac->_nr_pages;
  1227. }
  1228. /**
  1229. * readahead_batch_length - The number of bytes in the current batch.
  1230. * @rac: The readahead request.
  1231. */
  1232. static inline size_t readahead_batch_length(struct readahead_control *rac)
  1233. {
  1234. return rac->_batch_count * PAGE_SIZE;
  1235. }
  1236. static inline unsigned long dir_pages(struct inode *inode)
  1237. {
  1238. return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
  1239. PAGE_SHIFT;
  1240. }
  1241. /**
  1242. * folio_mkwrite_check_truncate - check if folio was truncated
  1243. * @folio: the folio to check
  1244. * @inode: the inode to check the folio against
  1245. *
  1246. * Return: the number of bytes in the folio up to EOF,
  1247. * or -EFAULT if the folio was truncated.
  1248. */
  1249. static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
  1250. struct inode *inode)
  1251. {
  1252. loff_t size = i_size_read(inode);
  1253. pgoff_t index = size >> PAGE_SHIFT;
  1254. size_t offset = offset_in_folio(folio, size);
  1255. if (!folio->mapping)
  1256. return -EFAULT;
  1257. /* folio is wholly inside EOF */
  1258. if (folio_next_index(folio) - 1 < index)
  1259. return folio_size(folio);
  1260. /* folio is wholly past EOF */
  1261. if (folio->index > index || !offset)
  1262. return -EFAULT;
  1263. /* folio is partially inside EOF */
  1264. return offset;
  1265. }
  1266. /**
  1267. * page_mkwrite_check_truncate - check if page was truncated
  1268. * @page: the page to check
  1269. * @inode: the inode to check the page against
  1270. *
  1271. * Returns the number of bytes in the page up to EOF,
  1272. * or -EFAULT if the page was truncated.
  1273. */
  1274. static inline int page_mkwrite_check_truncate(struct page *page,
  1275. struct inode *inode)
  1276. {
  1277. loff_t size = i_size_read(inode);
  1278. pgoff_t index = size >> PAGE_SHIFT;
  1279. int offset = offset_in_page(size);
  1280. if (page->mapping != inode->i_mapping)
  1281. return -EFAULT;
  1282. /* page is wholly inside EOF */
  1283. if (page->index < index)
  1284. return PAGE_SIZE;
  1285. /* page is wholly past EOF */
  1286. if (page->index > index || !offset)
  1287. return -EFAULT;
  1288. /* page is partially inside EOF */
  1289. return offset;
  1290. }
  1291. /**
  1292. * i_blocks_per_folio - How many blocks fit in this folio.
  1293. * @inode: The inode which contains the blocks.
  1294. * @folio: The folio.
  1295. *
  1296. * If the block size is larger than the size of this folio, return zero.
  1297. *
  1298. * Context: The caller should hold a refcount on the folio to prevent it
  1299. * from being split.
  1300. * Return: The number of filesystem blocks covered by this folio.
  1301. */
  1302. static inline
  1303. unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
  1304. {
  1305. return folio_size(folio) >> inode->i_blkbits;
  1306. }
  1307. static inline
  1308. unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
  1309. {
  1310. return i_blocks_per_folio(inode, page_folio(page));
  1311. }
  1312. #endif /* _LINUX_PAGEMAP_H */