From f4f2c619d5fda58b96c6a9eeb790f56969d64f16 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 19 Nov 2021 14:51:26 -0800 Subject: [PATCH] FROMLIST: mm/oom_kill: allow process_mrelease to run under mmap_lock protection With exit_mmap holding mmap_write_lock during free_pgtables call, process_mrelease does not need to elevate mm->mm_users in order to prevent exit_mmap from destrying pagetables while __oom_reap_task_mm is walking the VMA tree. The change prevents process_mrelease from calling the last mmput, which can lead to waiting for IO completion in exit_aio. Fixes: 337546e83fc7 ("mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap") Signed-off-by: Suren Baghdasaryan Link: https://lore.kernel.org/all/20211124235906.14437-2-surenb@google.com/ Bug: 130172058 Bug: 189803002 Signed-off-by: Suren Baghdasaryan Change-Id: I1e2728e0c477af9cc20e9e0b715ee67dee760618 --- mm/oom_kill.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 764a3a2aad6b..226940116bd5 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1230,15 +1230,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) goto put_task; } - if (mmget_not_zero(p->mm)) { - mm = p->mm; - if (task_will_free_mem(p)) - reap = true; - else { - /* Error only if the work has not been done already */ - if (!test_bit(MMF_OOM_SKIP, &mm->flags)) - ret = -EINVAL; - } + mm = p->mm; + mmgrab(mm); + + if (task_will_free_mem(p)) + reap = true; + else { + /* Error only if the work has not been done already */ + if (!test_bit(MMF_OOM_SKIP, &mm->flags)) + ret = -EINVAL; } task_unlock(p); @@ -1249,13 +1249,16 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) ret = -EINTR; goto drop_mm; } - if (!__oom_reap_task_mm(mm)) + /* + * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure + * possible change in exit_mmap is seen + */ + if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm)) ret = -EAGAIN; mmap_read_unlock(mm); drop_mm: - if (mm) - mmput(mm); + mmdrop(mm); put_task: put_task_struct(task); put_pid: