powerpc/mm/thp: Make page table walk safe against thp split/collapse
We can disable a THP split or a hugepage collapse by disabling irq. We do send IPI to all the cpus in the early part of split/collapse, and disabling local irq ensure we don't make progress with split/collapse. If the THP is getting split we return NULL from find_linux_pte_or_hugepte(). For all the current callers it should be ok. We need to be careful if we want to use returned pte_t pointer outside the irq disabled region. W.r.t to THP split, the pfn remains the same, but then a hugepage collapse will result in a pfn change. There are few steps we can take to avoid a hugepage collapse.One way is to take page reference inside the irq disable region. Other option is to take mmap_sem so that a parallel collapse will not happen. We can also disable collapse by taking pmd_lock. Another method used by kvm subsystem is to check whether we had a mmu_notifer update in between using mmu_notifier_retry(). Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:

committed by
Michael Ellerman

parent
dac5657067
commit
691e95fd73
@@ -338,6 +338,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
pte_t *ptep;
|
||||
unsigned int wimg = 0;
|
||||
pgd_t *pgdir;
|
||||
unsigned long flags;
|
||||
|
||||
/* used to check for invalidations in progress */
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
@@ -468,14 +469,23 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
|
||||
|
||||
|
||||
pgdir = vcpu_e500->vcpu.arch.pgdir;
|
||||
/*
|
||||
* We are just looking at the wimg bits, so we don't
|
||||
* care much about the trans splitting bit.
|
||||
* We are holding kvm->mmu_lock so a notifier invalidate
|
||||
* can't run hence pfn won't change.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL);
|
||||
if (ptep) {
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
|
||||
if (pte_present(pte))
|
||||
if (pte_present(pte)) {
|
||||
wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
|
||||
MAS2_WIMGE_MASK;
|
||||
else {
|
||||
local_irq_restore(flags);
|
||||
} else {
|
||||
local_irq_restore(flags);
|
||||
pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
|
||||
__func__, (long)gfn, pfn);
|
||||
ret = -EINVAL;
|
||||
|
Reference in New Issue
Block a user