mm: track vma changes with VM_SOFTDIRTY bit

Pavel reported that in case if vma area get unmapped and then mapped (or
expanded) in-place, the soft dirty tracker won't be able to recognize this
situation since it works on pte level and ptes are get zapped on unmap,
loosing soft dirty bit of course.

So to resolve this situation we need to track actions on vma level, there
VM_SOFTDIRTY flag comes in.  When new vma area created (or old expanded)
we set this bit, and keep it here until application calls for clearing
soft dirty bit.

Thus when user space application track memory changes now it can detect if
vma area is renewed.

Reported-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Rob Landley <rob@landley.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Cyrill Gorcunov
2013-09-11 14:22:24 -07:00
کامیت شده توسط Linus Torvalds
والد 3b11f0aaae
کامیت d9104d1ca9
5فایلهای تغییر یافته به همراه61 افزوده شده و 12 حذف شده

مشاهده پرونده

@@ -740,6 +740,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
ptent = pte_file_clear_soft_dirty(ptent);
}
if (vma->vm_flags & VM_SOFTDIRTY)
vma->vm_flags &= ~VM_SOFTDIRTY;
set_pte_at(vma->vm_mm, addr, pte, ptent);
#endif
}
@@ -949,13 +952,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
} else {
*pme = make_pme(PM_NOT_PRESENT(pm->v2));
if (vma->vm_flags & VM_SOFTDIRTY)
flags2 |= __PM_SOFT_DIRTY;
*pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
return;
}
if (page && !PageAnon(page))
flags |= PM_FILE;
if (pte_soft_dirty(pte))
if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte))
flags2 |= __PM_SOFT_DIRTY;
*pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
@@ -974,7 +979,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p
*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
| PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
else
*pme = make_pme(PM_NOT_PRESENT(pm->v2));
*pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2));
}
#else
static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
@@ -997,7 +1002,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
int pmd_flags2;
pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0);
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
pmd_flags2 = __PM_SOFT_DIRTY;
else
pmd_flags2 = 0;
for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset;
@@ -1015,12 +1024,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (pmd_trans_unstable(pmd))
return 0;
for (; addr != end; addr += PAGE_SIZE) {
int flags2;
/* check to see if we've left 'vma' behind
* and need a new, higher one */
if (vma && (addr >= vma->vm_end)) {
vma = find_vma(walk->mm, addr);
pme = make_pme(PM_NOT_PRESENT(pm->v2));
if (vma && (vma->vm_flags & VM_SOFTDIRTY))
flags2 = __PM_SOFT_DIRTY;
else
flags2 = 0;
pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
}
/* check that 'vma' actually covers this address,
@@ -1044,13 +1058,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
#ifdef CONFIG_HUGETLB_PAGE
static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
pte_t pte, int offset)
pte_t pte, int offset, int flags2)
{
if (pte_present(pte))
*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
| PM_STATUS2(pm->v2, 0) | PM_PRESENT);
*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
PM_STATUS2(pm->v2, flags2) |
PM_PRESENT);
else
*pme = make_pme(PM_NOT_PRESENT(pm->v2));
*pme = make_pme(PM_NOT_PRESENT(pm->v2) |
PM_STATUS2(pm->v2, flags2));
}
/* This function walks within one hugetlb entry in the single call */
@@ -1059,12 +1075,22 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
struct mm_walk *walk)
{
struct pagemapread *pm = walk->private;
struct vm_area_struct *vma;
int err = 0;
int flags2;
pagemap_entry_t pme;
vma = find_vma(walk->mm, addr);
WARN_ON_ONCE(!vma);
if (vma && (vma->vm_flags & VM_SOFTDIRTY))
flags2 = __PM_SOFT_DIRTY;
else
flags2 = 0;
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
huge_pte_to_pagemap_entry(&pme, pm, *pte, offset);
huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;