mm.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _LINUX_SCHED_MM_H
  3. #define _LINUX_SCHED_MM_H
  4. #include <linux/kernel.h>
  5. #include <linux/atomic.h>
  6. #include <linux/sched.h>
  7. #include <linux/mm_types.h>
  8. #include <linux/gfp.h>
  9. #include <linux/sync_core.h>
  10. #include <linux/ioasid.h>
  11. /*
  12. * Routines for handling mm_structs
  13. */
  14. extern struct mm_struct *mm_alloc(void);
  15. /**
  16. * mmgrab() - Pin a &struct mm_struct.
  17. * @mm: The &struct mm_struct to pin.
  18. *
  19. * Make sure that @mm will not get freed even after the owning task
  20. * exits. This doesn't guarantee that the associated address space
  21. * will still exist later on and mmget_not_zero() has to be used before
  22. * accessing it.
  23. *
  24. * This is a preferred way to pin @mm for a longer/unbounded amount
  25. * of time.
  26. *
  27. * Use mmdrop() to release the reference acquired by mmgrab().
  28. *
  29. * See also <Documentation/mm/active_mm.rst> for an in-depth explanation
  30. * of &mm_struct.mm_count vs &mm_struct.mm_users.
  31. */
  32. static inline void mmgrab(struct mm_struct *mm)
  33. {
  34. atomic_inc(&mm->mm_count);
  35. }
  36. extern void __mmdrop(struct mm_struct *mm);
  37. static inline void mmdrop(struct mm_struct *mm)
  38. {
  39. /*
  40. * The implicit full barrier implied by atomic_dec_and_test() is
  41. * required by the membarrier system call before returning to
  42. * user-space, after storing to rq->curr.
  43. */
  44. if (unlikely(atomic_dec_and_test(&mm->mm_count)))
  45. __mmdrop(mm);
  46. }
  47. #ifdef CONFIG_PREEMPT_RT
  48. /*
  49. * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
  50. * by far the least expensive way to do that.
  51. */
  52. static inline void __mmdrop_delayed(struct rcu_head *rhp)
  53. {
  54. struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
  55. __mmdrop(mm);
  56. }
  57. /*
  58. * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
  59. * kernels via RCU.
  60. */
  61. static inline void mmdrop_sched(struct mm_struct *mm)
  62. {
  63. /* Provides a full memory barrier. See mmdrop() */
  64. if (atomic_dec_and_test(&mm->mm_count))
  65. call_rcu(&mm->delayed_drop, __mmdrop_delayed);
  66. }
  67. #else
  68. static inline void mmdrop_sched(struct mm_struct *mm)
  69. {
  70. mmdrop(mm);
  71. }
  72. #endif
  73. /**
  74. * mmget() - Pin the address space associated with a &struct mm_struct.
  75. * @mm: The address space to pin.
  76. *
  77. * Make sure that the address space of the given &struct mm_struct doesn't
  78. * go away. This does not protect against parts of the address space being
  79. * modified or freed, however.
  80. *
  81. * Never use this function to pin this address space for an
  82. * unbounded/indefinite amount of time.
  83. *
  84. * Use mmput() to release the reference acquired by mmget().
  85. *
  86. * See also <Documentation/mm/active_mm.rst> for an in-depth explanation
  87. * of &mm_struct.mm_count vs &mm_struct.mm_users.
  88. */
  89. static inline void mmget(struct mm_struct *mm)
  90. {
  91. atomic_inc(&mm->mm_users);
  92. }
  93. static inline bool mmget_not_zero(struct mm_struct *mm)
  94. {
  95. return atomic_inc_not_zero(&mm->mm_users);
  96. }
  97. /* mmput gets rid of the mappings and all user-space */
  98. extern void mmput(struct mm_struct *);
  99. #ifdef CONFIG_MMU
  100. /* same as above but performs the slow path from the async context. Can
  101. * be called from the atomic context as well
  102. */
  103. void mmput_async(struct mm_struct *);
  104. #endif
  105. /* Grab a reference to a task's mm, if it is not already going away */
  106. extern struct mm_struct *get_task_mm(struct task_struct *task);
  107. /*
  108. * Grab a reference to a task's mm, if it is not already going away
  109. * and ptrace_may_access with the mode parameter passed to it
  110. * succeeds.
  111. */
  112. extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
  113. /* Remove the current tasks stale references to the old mm_struct on exit() */
  114. extern void exit_mm_release(struct task_struct *, struct mm_struct *);
  115. /* Remove the current tasks stale references to the old mm_struct on exec() */
  116. extern void exec_mm_release(struct task_struct *, struct mm_struct *);
  117. #ifdef CONFIG_MEMCG
  118. extern void mm_update_next_owner(struct mm_struct *mm);
  119. #else
  120. static inline void mm_update_next_owner(struct mm_struct *mm)
  121. {
  122. }
  123. #endif /* CONFIG_MEMCG */
  124. #ifdef CONFIG_MMU
  125. #ifndef arch_get_mmap_end
  126. #define arch_get_mmap_end(addr, len, flags) (TASK_SIZE)
  127. #endif
  128. #ifndef arch_get_mmap_base
  129. #define arch_get_mmap_base(addr, base) (base)
  130. #endif
  131. extern void arch_pick_mmap_layout(struct mm_struct *mm,
  132. struct rlimit *rlim_stack);
  133. extern unsigned long
  134. arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
  135. unsigned long, unsigned long);
  136. extern unsigned long
  137. arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
  138. unsigned long len, unsigned long pgoff,
  139. unsigned long flags);
  140. unsigned long
  141. generic_get_unmapped_area(struct file *filp, unsigned long addr,
  142. unsigned long len, unsigned long pgoff,
  143. unsigned long flags);
  144. unsigned long
  145. generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
  146. unsigned long len, unsigned long pgoff,
  147. unsigned long flags);
  148. #else
  149. static inline void arch_pick_mmap_layout(struct mm_struct *mm,
  150. struct rlimit *rlim_stack) {}
  151. #endif
  152. static inline bool in_vfork(struct task_struct *tsk)
  153. {
  154. bool ret;
  155. /*
  156. * need RCU to access ->real_parent if CLONE_VM was used along with
  157. * CLONE_PARENT.
  158. *
  159. * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
  160. * imply CLONE_VM
  161. *
  162. * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
  163. * ->real_parent is not necessarily the task doing vfork(), so in
  164. * theory we can't rely on task_lock() if we want to dereference it.
  165. *
  166. * And in this case we can't trust the real_parent->mm == tsk->mm
  167. * check, it can be false negative. But we do not care, if init or
  168. * another oom-unkillable task does this it should blame itself.
  169. */
  170. rcu_read_lock();
  171. ret = tsk->vfork_done &&
  172. rcu_dereference(tsk->real_parent)->mm == tsk->mm;
  173. rcu_read_unlock();
  174. return ret;
  175. }
  176. /*
  177. * Applies per-task gfp context to the given allocation flags.
  178. * PF_MEMALLOC_NOIO implies GFP_NOIO
  179. * PF_MEMALLOC_NOFS implies GFP_NOFS
  180. * PF_MEMALLOC_PIN implies !GFP_MOVABLE
  181. */
  182. static inline gfp_t current_gfp_context(gfp_t flags)
  183. {
  184. unsigned int pflags = READ_ONCE(current->flags);
  185. if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) {
  186. /*
  187. * NOIO implies both NOIO and NOFS and it is a weaker context
  188. * so always make sure it makes precedence
  189. */
  190. if (pflags & PF_MEMALLOC_NOIO)
  191. flags &= ~(__GFP_IO | __GFP_FS);
  192. else if (pflags & PF_MEMALLOC_NOFS)
  193. flags &= ~__GFP_FS;
  194. if (pflags & PF_MEMALLOC_PIN)
  195. flags &= ~__GFP_MOVABLE;
  196. }
  197. return flags;
  198. }
  199. #ifdef CONFIG_LOCKDEP
  200. extern void __fs_reclaim_acquire(unsigned long ip);
  201. extern void __fs_reclaim_release(unsigned long ip);
  202. extern void fs_reclaim_acquire(gfp_t gfp_mask);
  203. extern void fs_reclaim_release(gfp_t gfp_mask);
  204. #else
  205. static inline void __fs_reclaim_acquire(unsigned long ip) { }
  206. static inline void __fs_reclaim_release(unsigned long ip) { }
  207. static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
  208. static inline void fs_reclaim_release(gfp_t gfp_mask) { }
  209. #endif
  210. /* Any memory-allocation retry loop should use
  211. * memalloc_retry_wait(), and pass the flags for the most
  212. * constrained allocation attempt that might have failed.
  213. * This provides useful documentation of where loops are,
  214. * and a central place to fine tune the waiting as the MM
  215. * implementation changes.
  216. */
  217. static inline void memalloc_retry_wait(gfp_t gfp_flags)
  218. {
  219. /* We use io_schedule_timeout because waiting for memory
  220. * typically included waiting for dirty pages to be
  221. * written out, which requires IO.
  222. */
  223. __set_current_state(TASK_UNINTERRUPTIBLE);
  224. gfp_flags = current_gfp_context(gfp_flags);
  225. if (gfpflags_allow_blocking(gfp_flags) &&
  226. !(gfp_flags & __GFP_NORETRY))
  227. /* Probably waited already, no need for much more */
  228. io_schedule_timeout(1);
  229. else
  230. /* Probably didn't wait, and has now released a lock,
  231. * so now is a good time to wait
  232. */
  233. io_schedule_timeout(HZ/50);
  234. }
  235. /**
  236. * might_alloc - Mark possible allocation sites
  237. * @gfp_mask: gfp_t flags that would be used to allocate
  238. *
  239. * Similar to might_sleep() and other annotations, this can be used in functions
  240. * that might allocate, but often don't. Compiles to nothing without
  241. * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking.
  242. */
  243. static inline void might_alloc(gfp_t gfp_mask)
  244. {
  245. fs_reclaim_acquire(gfp_mask);
  246. fs_reclaim_release(gfp_mask);
  247. might_sleep_if(gfpflags_allow_blocking(gfp_mask));
  248. }
  249. /**
  250. * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
  251. *
  252. * This functions marks the beginning of the GFP_NOIO allocation scope.
  253. * All further allocations will implicitly drop __GFP_IO flag and so
  254. * they are safe for the IO critical section from the allocation recursion
  255. * point of view. Use memalloc_noio_restore to end the scope with flags
  256. * returned by this function.
  257. *
  258. * This function is safe to be used from any context.
  259. */
  260. static inline unsigned int memalloc_noio_save(void)
  261. {
  262. unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
  263. current->flags |= PF_MEMALLOC_NOIO;
  264. return flags;
  265. }
  266. /**
  267. * memalloc_noio_restore - Ends the implicit GFP_NOIO scope.
  268. * @flags: Flags to restore.
  269. *
  270. * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function.
  271. * Always make sure that the given flags is the return value from the
  272. * pairing memalloc_noio_save call.
  273. */
  274. static inline void memalloc_noio_restore(unsigned int flags)
  275. {
  276. current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
  277. }
  278. /**
  279. * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope.
  280. *
  281. * This functions marks the beginning of the GFP_NOFS allocation scope.
  282. * All further allocations will implicitly drop __GFP_FS flag and so
  283. * they are safe for the FS critical section from the allocation recursion
  284. * point of view. Use memalloc_nofs_restore to end the scope with flags
  285. * returned by this function.
  286. *
  287. * This function is safe to be used from any context.
  288. */
  289. static inline unsigned int memalloc_nofs_save(void)
  290. {
  291. unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
  292. current->flags |= PF_MEMALLOC_NOFS;
  293. return flags;
  294. }
  295. /**
  296. * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope.
  297. * @flags: Flags to restore.
  298. *
  299. * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function.
  300. * Always make sure that the given flags is the return value from the
  301. * pairing memalloc_nofs_save call.
  302. */
  303. static inline void memalloc_nofs_restore(unsigned int flags)
  304. {
  305. current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
  306. }
  307. static inline unsigned int memalloc_noreclaim_save(void)
  308. {
  309. unsigned int flags = current->flags & PF_MEMALLOC;
  310. current->flags |= PF_MEMALLOC;
  311. return flags;
  312. }
  313. static inline void memalloc_noreclaim_restore(unsigned int flags)
  314. {
  315. current->flags = (current->flags & ~PF_MEMALLOC) | flags;
  316. }
  317. static inline unsigned int memalloc_pin_save(void)
  318. {
  319. unsigned int flags = current->flags & PF_MEMALLOC_PIN;
  320. current->flags |= PF_MEMALLOC_PIN;
  321. return flags;
  322. }
  323. static inline void memalloc_pin_restore(unsigned int flags)
  324. {
  325. current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags;
  326. }
  327. #ifdef CONFIG_MEMCG
  328. DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
  329. /**
  330. * set_active_memcg - Starts the remote memcg charging scope.
  331. * @memcg: memcg to charge.
  332. *
  333. * This function marks the beginning of the remote memcg charging scope. All the
  334. * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
  335. * given memcg.
  336. *
  337. * NOTE: This function can nest. Users must save the return value and
  338. * reset the previous value after their own charging scope is over.
  339. */
  340. static inline struct mem_cgroup *
  341. set_active_memcg(struct mem_cgroup *memcg)
  342. {
  343. struct mem_cgroup *old;
  344. if (!in_task()) {
  345. old = this_cpu_read(int_active_memcg);
  346. this_cpu_write(int_active_memcg, memcg);
  347. } else {
  348. old = current->active_memcg;
  349. current->active_memcg = memcg;
  350. }
  351. return old;
  352. }
  353. #else
  354. static inline struct mem_cgroup *
  355. set_active_memcg(struct mem_cgroup *memcg)
  356. {
  357. return NULL;
  358. }
  359. #endif
  360. #ifdef CONFIG_MEMBARRIER
  361. enum {
  362. MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
  363. MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1),
  364. MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2),
  365. MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3),
  366. MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4),
  367. MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5),
  368. MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY = (1U << 6),
  369. MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ = (1U << 7),
  370. };
  371. enum {
  372. MEMBARRIER_FLAG_SYNC_CORE = (1U << 0),
  373. MEMBARRIER_FLAG_RSEQ = (1U << 1),
  374. };
  375. #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
  376. #include <asm/membarrier.h>
  377. #endif
  378. static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
  379. {
  380. if (current->mm != mm)
  381. return;
  382. if (likely(!(atomic_read(&mm->membarrier_state) &
  383. MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
  384. return;
  385. sync_core_before_usermode();
  386. }
  387. extern void membarrier_exec_mmap(struct mm_struct *mm);
  388. extern void membarrier_update_current_mm(struct mm_struct *next_mm);
  389. #else
  390. #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
  391. static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
  392. struct mm_struct *next,
  393. struct task_struct *tsk)
  394. {
  395. }
  396. #endif
  397. static inline void membarrier_exec_mmap(struct mm_struct *mm)
  398. {
  399. }
  400. static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
  401. {
  402. }
  403. static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
  404. {
  405. }
  406. #endif
  407. #ifdef CONFIG_IOMMU_SVA
  408. static inline void mm_pasid_init(struct mm_struct *mm)
  409. {
  410. mm->pasid = INVALID_IOASID;
  411. }
  412. /* Associate a PASID with an mm_struct: */
  413. static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
  414. {
  415. mm->pasid = pasid;
  416. }
  417. static inline void mm_pasid_drop(struct mm_struct *mm)
  418. {
  419. if (pasid_valid(mm->pasid)) {
  420. ioasid_free(mm->pasid);
  421. mm->pasid = INVALID_IOASID;
  422. }
  423. }
  424. #else
  425. static inline void mm_pasid_init(struct mm_struct *mm) {}
  426. static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
  427. static inline void mm_pasid_drop(struct mm_struct *mm) {}
  428. #endif
  429. #endif /* _LINUX_SCHED_MM_H */