mm/gup: track FOLL_PIN pages
Add tracking of pages that were pinned via FOLL_PIN. This tracking is implemented via overloading of page->_refcount: pins are added by adding GUP_PIN_COUNTING_BIAS (1024) to the refcount. This provides a fuzzy indication of pinning, and it can have false positives (and that's OK). Please see the pre-existing Documentation/core-api/pin_user_pages.rst for details. As mentioned in pin_user_pages.rst, callers who effectively set FOLL_PIN (typically via pin_user_pages*()) are required to ultimately free such pages via unpin_user_page(). Please also note the limitation, discussed in pin_user_pages.rst under the "TODO: for 1GB and larger huge pages" section. (That limitation will be removed in a following patch.) The effect of a FOLL_PIN flag is similar to that of FOLL_GET, and may be thought of as "FOLL_GET for DIO and/or RDMA use". Pages that have been pinned via FOLL_PIN are identifiable via a new function call: bool page_maybe_dma_pinned(struct page *page); What to do in response to encountering such a page, is left to later patchsets. There is discussion about this in [1], [2], [3], and [4]. This also changes a BUG_ON(), to a WARN_ON(), in follow_page_mask(). [1] Some slow progress on get_user_pages() (Apr 2, 2019): https://lwn.net/Articles/784574/ [2] DMA and get_user_pages() (LPC: Dec 12, 2018): https://lwn.net/Articles/774411/ [3] The trouble with get_user_pages() (Apr 30, 2018): https://lwn.net/Articles/753027/ [4] LWN kernel index: get_user_pages(): https://lwn.net/Kernel/Index/#Memory_management-get_user_pages [jhubbard@nvidia.com: add kerneldoc] Link: http://lkml.kernel.org/r/20200307021157.235726-1-jhubbard@nvidia.com [imbrenda@linux.ibm.com: if pin fails, we need to unpin, a simple put_page will not be enough] Link: http://lkml.kernel.org/r/20200306132537.783769-2-imbrenda@linux.ibm.com [akpm@linux-foundation.org: fix put_compound_head defined but not used] Suggested-by: Jan Kara <jack@suse.cz> Suggested-by: Jérôme Glisse <jglisse@redhat.com> Signed-off-by: John Hubbard <jhubbard@nvidia.com> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Jan Kara <jack@suse.cz> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Link: http://lkml.kernel.org/r/20200211001536.1027652-7-jhubbard@nvidia.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
94202f126f
commit
3faa52c03f
54
mm/hugetlb.c
54
mm/hugetlb.c
@@ -4375,19 +4375,6 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
|
||||
page = pte_page(huge_ptep_get(pte));
|
||||
|
||||
/*
|
||||
* Instead of doing 'try_get_page()' below in the same_page
|
||||
* loop, just check the count once here.
|
||||
*/
|
||||
if (unlikely(page_count(page) <= 0)) {
|
||||
if (pages) {
|
||||
spin_unlock(ptl);
|
||||
remainder = 0;
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If subpage information not requested, update counters
|
||||
* and skip the same_page loop below.
|
||||
@@ -4405,7 +4392,22 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
same_page:
|
||||
if (pages) {
|
||||
pages[i] = mem_map_offset(page, pfn_offset);
|
||||
get_page(pages[i]);
|
||||
/*
|
||||
* try_grab_page() should always succeed here, because:
|
||||
* a) we hold the ptl lock, and b) we've just checked
|
||||
* that the huge page is present in the page tables. If
|
||||
* the huge page is present, then the tail pages must
|
||||
* also be present. The ptl prevents the head page and
|
||||
* tail pages from being rearranged in any way. So this
|
||||
* page must be available at this point, unless the page
|
||||
* refcount overflowed:
|
||||
*/
|
||||
if (WARN_ON_ONCE(!try_grab_page(pages[i], flags))) {
|
||||
spin_unlock(ptl);
|
||||
remainder = 0;
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (vmas)
|
||||
@@ -4965,6 +4967,12 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
||||
struct page *page = NULL;
|
||||
spinlock_t *ptl;
|
||||
pte_t pte;
|
||||
|
||||
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
|
||||
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
|
||||
(FOLL_PIN | FOLL_GET)))
|
||||
return NULL;
|
||||
|
||||
retry:
|
||||
ptl = pmd_lockptr(mm, pmd);
|
||||
spin_lock(ptl);
|
||||
@@ -4977,8 +4985,18 @@ retry:
|
||||
pte = huge_ptep_get((pte_t *)pmd);
|
||||
if (pte_present(pte)) {
|
||||
page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
if (flags & FOLL_GET)
|
||||
get_page(page);
|
||||
/*
|
||||
* try_grab_page() should always succeed here, because: a) we
|
||||
* hold the pmd (ptl) lock, and b) we've just checked that the
|
||||
* huge pmd (head) page is present in the page tables. The ptl
|
||||
* prevents the head page and tail pages from being rearranged
|
||||
* in any way. So this page must be available at this point,
|
||||
* unless the page refcount overflowed:
|
||||
*/
|
||||
if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
|
||||
page = NULL;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
if (is_hugetlb_entry_migration(pte)) {
|
||||
spin_unlock(ptl);
|
||||
@@ -4999,7 +5017,7 @@ struct page * __weak
|
||||
follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
||||
pud_t *pud, int flags)
|
||||
{
|
||||
if (flags & FOLL_GET)
|
||||
if (flags & (FOLL_GET | FOLL_PIN))
|
||||
return NULL;
|
||||
|
||||
return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
@@ -5008,7 +5026,7 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
||||
struct page * __weak
|
||||
follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags)
|
||||
{
|
||||
if (flags & FOLL_GET)
|
||||
if (flags & (FOLL_GET | FOLL_PIN))
|
||||
return NULL;
|
||||
|
||||
return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
|
||||
|
Reference in New Issue
Block a user