hugetlbfs: revert "Use i_mmap_rwsem to fix page fault/truncate race"
This reverts c86aa7bbfd
The reverted commit caused ABBA deadlocks when file migration raced with
file eviction for specific hugetlbfs files. This was discovered with a
modified version of the LTP move_pages12 test.
The purpose of the reverted patch was to close a long existing race
between hugetlbfs file truncation and page faults. After more analysis
of the patch and impacted code, it was determined that i_mmap_rwsem can
not be used for all required synchronization. Therefore, revert this
patch while working an another approach to the underlying issue.
Link: http://lkml.kernel.org/r/20190103235452.29335-1-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: Jan Stancek <jstancek@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Prakash Sangappa <prakash.sangappa@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
8ab88c7169
commit
e7c5809779
21
mm/hugetlb.c
21
mm/hugetlb.c
@@ -3755,16 +3755,16 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
|
||||
}
|
||||
|
||||
/*
|
||||
* We can not race with truncation due to holding i_mmap_rwsem.
|
||||
* Check once here for faults beyond end of file.
|
||||
* Use page lock to guard against racing truncation
|
||||
* before we get page_table_lock.
|
||||
*/
|
||||
size = i_size_read(mapping->host) >> huge_page_shift(h);
|
||||
if (idx >= size)
|
||||
goto out;
|
||||
|
||||
retry:
|
||||
page = find_lock_page(mapping, idx);
|
||||
if (!page) {
|
||||
size = i_size_read(mapping->host) >> huge_page_shift(h);
|
||||
if (idx >= size)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Check for page in userfault range
|
||||
*/
|
||||
@@ -3854,6 +3854,9 @@ retry:
|
||||
}
|
||||
|
||||
ptl = huge_pte_lock(h, mm, ptep);
|
||||
size = i_size_read(mapping->host) >> huge_page_shift(h);
|
||||
if (idx >= size)
|
||||
goto backout;
|
||||
|
||||
ret = 0;
|
||||
if (!huge_pte_none(huge_ptep_get(ptep)))
|
||||
@@ -3956,10 +3959,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
|
||||
/*
|
||||
* Acquire i_mmap_rwsem before calling huge_pte_alloc and hold
|
||||
* until finished with ptep. This serves two purposes:
|
||||
* 1) It prevents huge_pmd_unshare from being called elsewhere
|
||||
* and making the ptep no longer valid.
|
||||
* 2) It synchronizes us with file truncation.
|
||||
* until finished with ptep. This prevents huge_pmd_unshare from
|
||||
* being called elsewhere and making the ptep no longer valid.
|
||||
*
|
||||
* ptep could have already be assigned via huge_pte_offset. That
|
||||
* is OK, as huge_pte_alloc will return the same value unless
|
||||
|
Reference in New Issue
Block a user