Merge branch 'page-refs' (page ref overflow)
Merge page ref overflow branch. Jann Horn reported that he can overflow the page ref count with sufficient memory (and a filesystem that is intentionally extremely slow). Admittedly it's not exactly easy. To have more than four billion references to a page requires a minimum of 32GB of kernel memory just for the pointers to the pages, much less any metadata to keep track of those pointers. Jann needed a total of 140GB of memory and a specially crafted filesystem that leaves all reads pending (in order to not ever free the page references and just keep adding more). Still, we have a fairly straightforward way to limit the two obvious user-controllable sources of page references: direct-IO like page references gotten through get_user_pages(), and the splice pipe page duplication. So let's just do that. * branch page-refs: fs: prevent page refcount overflow in pipe_buf_get mm: prevent get_user_pages() from overflowing page refcount mm: add 'try_get_page()' helper function mm: make page ref count overflow check tighter and more explicit
This commit is contained in:
48
mm/gup.c
48
mm/gup.c
@@ -160,8 +160,12 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (flags & FOLL_GET)
|
||||
get_page(page);
|
||||
if (flags & FOLL_GET) {
|
||||
if (unlikely(!try_get_page(page))) {
|
||||
page = ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (flags & FOLL_TOUCH) {
|
||||
if ((flags & FOLL_WRITE) &&
|
||||
!pte_dirty(pte) && !PageDirty(page))
|
||||
@@ -298,7 +302,10 @@ retry_locked:
|
||||
if (pmd_trans_unstable(pmd))
|
||||
ret = -EBUSY;
|
||||
} else {
|
||||
get_page(page);
|
||||
if (unlikely(!try_get_page(page))) {
|
||||
spin_unlock(ptl);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
lock_page(page);
|
||||
ret = split_huge_page(page);
|
||||
@@ -500,7 +507,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
|
||||
if (is_device_public_page(*page))
|
||||
goto unmap;
|
||||
}
|
||||
get_page(*page);
|
||||
if (unlikely(!try_get_page(*page))) {
|
||||
ret = -ENOMEM;
|
||||
goto unmap;
|
||||
}
|
||||
out:
|
||||
ret = 0;
|
||||
unmap:
|
||||
@@ -1545,6 +1555,20 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the compund head page with ref appropriately incremented,
|
||||
* or NULL if that failed.
|
||||
*/
|
||||
static inline struct page *try_get_compound_head(struct page *page, int refs)
|
||||
{
|
||||
struct page *head = compound_head(page);
|
||||
if (WARN_ON_ONCE(page_ref_count(head) < 0))
|
||||
return NULL;
|
||||
if (unlikely(!page_cache_add_speculative(head, refs)))
|
||||
return NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
|
||||
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
int write, struct page **pages, int *nr)
|
||||
@@ -1579,9 +1603,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
|
||||
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
page = pte_page(pte);
|
||||
head = compound_head(page);
|
||||
|
||||
if (!page_cache_get_speculative(head))
|
||||
head = try_get_compound_head(page, 1);
|
||||
if (!head)
|
||||
goto pte_unmap;
|
||||
|
||||
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
|
||||
@@ -1720,8 +1744,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
head = compound_head(pmd_page(orig));
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
head = try_get_compound_head(pmd_page(orig), refs);
|
||||
if (!head) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
}
|
||||
@@ -1758,8 +1782,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
head = compound_head(pud_page(orig));
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
head = try_get_compound_head(pud_page(orig), refs);
|
||||
if (!head) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
}
|
||||
@@ -1795,8 +1819,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
head = compound_head(pgd_page(orig));
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
head = try_get_compound_head(pgd_page(orig), refs);
|
||||
if (!head) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
}
|
||||
|
13
mm/hugetlb.c
13
mm/hugetlb.c
@@ -4299,6 +4299,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
|
||||
pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
|
||||
page = pte_page(huge_ptep_get(pte));
|
||||
|
||||
/*
|
||||
* Instead of doing 'try_get_page()' below in the same_page
|
||||
* loop, just check the count once here.
|
||||
*/
|
||||
if (unlikely(page_count(page) <= 0)) {
|
||||
if (pages) {
|
||||
spin_unlock(ptl);
|
||||
remainder = 0;
|
||||
err = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
same_page:
|
||||
if (pages) {
|
||||
pages[i] = mem_map_offset(page, pfn_offset);
|
||||
|
Reference in New Issue
Block a user