sparc64: Support transparent huge pages.

This is relatively easy since PMD's now cover exactly 4MB of memory.

Our PMD entries are 32-bits each, so we use a special encoding.  The
lowest bit, PMD_ISHUGE, determines the interpretation.  This is possible
because sparc64's page tables are purely software entities so we can use
whatever encoding scheme we want.  We just have to make the TLB miss
assembler page table walkers aware of the layout.

set_pmd_at() works much like set_pte_at() but it has to operate in two
page from a table of non-huge PTEs, so we have to queue up TLB flushes
based upon what mappings are valid in the PTE table.  In the second regime
we are going from huge-page to non-huge-page, and in that case we need
only queue up a single TLB flush to push out the huge page mapping.

We still have 5 bits remaining in the huge PMD encoding so we can very
likely support any new pieces of THP state tracking that might get added
in the future.

With lots of help from Johannes Weiner.

Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
David Miller
2012-10-08 16:34:29 -07:00
committed by Linus Torvalds
parent f5c8ad4728
commit 9e695d2ecc
14 changed files with 591 additions and 119 deletions

View File

@@ -306,12 +306,24 @@ static void flush_dcache(unsigned long pfn)
}
}
/* mm->context.lock must be held */
static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
unsigned long tsb_hash_shift, unsigned long address,
unsigned long tte)
{
struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
unsigned long tag;
tsb += ((address >> tsb_hash_shift) &
(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
tag = (address >> 22UL);
tsb_insert(tsb, tag, tte);
}
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
{
unsigned long tsb_index, tsb_hash_shift, flags;
struct mm_struct *mm;
struct tsb *tsb;
unsigned long tag, flags;
unsigned long tsb_index, tsb_hash_shift;
pte_t pte = *ptep;
if (tlb_type != hypervisor) {
@@ -328,7 +340,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
spin_lock_irqsave(&mm->context.lock, flags);
#ifdef CONFIG_HUGETLB_PAGE
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
if ((tlb_type == hypervisor &&
(pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
@@ -340,11 +352,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
}
#endif
tsb = mm->context.tsb_block[tsb_index].tsb;
tsb += ((address >> tsb_hash_shift) &
(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
tag = (address >> 22UL);
tsb_insert(tsb, tag, pte_val(pte));
__update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift,
address, pte_val(pte));
spin_unlock_irqrestore(&mm->context.lock, flags);
}
@@ -2568,3 +2577,180 @@ void pgtable_free(void *table, bool is_page)
else
kmem_cache_free(pgtable_cache, table);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify)
{
if (pgprot_val(pgprot) & _PAGE_VALID)
pmd_val(pmd) |= PMD_HUGE_PRESENT;
if (tlb_type == hypervisor) {
if (pgprot_val(pgprot) & _PAGE_WRITE_4V)
pmd_val(pmd) |= PMD_HUGE_WRITE;
if (pgprot_val(pgprot) & _PAGE_EXEC_4V)
pmd_val(pmd) |= PMD_HUGE_EXEC;
if (!for_modify) {
if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V)
pmd_val(pmd) |= PMD_HUGE_ACCESSED;
if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V)
pmd_val(pmd) |= PMD_HUGE_DIRTY;
}
} else {
if (pgprot_val(pgprot) & _PAGE_WRITE_4U)
pmd_val(pmd) |= PMD_HUGE_WRITE;
if (pgprot_val(pgprot) & _PAGE_EXEC_4U)
pmd_val(pmd) |= PMD_HUGE_EXEC;
if (!for_modify) {
if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U)
pmd_val(pmd) |= PMD_HUGE_ACCESSED;
if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U)
pmd_val(pmd) |= PMD_HUGE_DIRTY;
}
}
return pmd;
}
pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{
pmd_t pmd;
pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT)));
pmd_val(pmd) |= PMD_ISHUGE;
pmd = pmd_set_protbits(pmd, pgprot, false);
return pmd;
}
pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
PMD_HUGE_WRITE |
PMD_HUGE_EXEC);
pmd = pmd_set_protbits(pmd, newprot, true);
return pmd;
}
pgprot_t pmd_pgprot(pmd_t entry)
{
unsigned long pte = 0;
if (pmd_val(entry) & PMD_HUGE_PRESENT)
pte |= _PAGE_VALID;
if (tlb_type == hypervisor) {
if (pmd_val(entry) & PMD_HUGE_PRESENT)
pte |= _PAGE_PRESENT_4V;
if (pmd_val(entry) & PMD_HUGE_EXEC)
pte |= _PAGE_EXEC_4V;
if (pmd_val(entry) & PMD_HUGE_WRITE)
pte |= _PAGE_W_4V;
if (pmd_val(entry) & PMD_HUGE_ACCESSED)
pte |= _PAGE_ACCESSED_4V;
if (pmd_val(entry) & PMD_HUGE_DIRTY)
pte |= _PAGE_MODIFIED_4V;
pte |= _PAGE_CP_4V|_PAGE_CV_4V;
} else {
if (pmd_val(entry) & PMD_HUGE_PRESENT)
pte |= _PAGE_PRESENT_4U;
if (pmd_val(entry) & PMD_HUGE_EXEC)
pte |= _PAGE_EXEC_4U;
if (pmd_val(entry) & PMD_HUGE_WRITE)
pte |= _PAGE_W_4U;
if (pmd_val(entry) & PMD_HUGE_ACCESSED)
pte |= _PAGE_ACCESSED_4U;
if (pmd_val(entry) & PMD_HUGE_DIRTY)
pte |= _PAGE_MODIFIED_4U;
pte |= _PAGE_CP_4U|_PAGE_CV_4U;
}
return __pgprot(pte);
}
void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd)
{
unsigned long pte, flags;
struct mm_struct *mm;
pmd_t entry = *pmd;
pgprot_t prot;
if (!pmd_large(entry) || !pmd_young(entry))
return;
pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS);
pte <<= PMD_PADDR_SHIFT;
pte |= _PAGE_VALID;
prot = pmd_pgprot(entry);
if (tlb_type == hypervisor)
pgprot_val(prot) |= _PAGE_SZHUGE_4V;
else
pgprot_val(prot) |= _PAGE_SZHUGE_4U;
pte |= pgprot_val(prot);
mm = vma->vm_mm;
spin_lock_irqsave(&mm->context.lock, flags);
if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
__update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
addr, pte);
spin_unlock_irqrestore(&mm->context.lock, flags);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
static void context_reload(void *__data)
{
struct mm_struct *mm = __data;
if (mm == current->mm)
load_secondary_context(mm);
}
void hugetlb_setup(struct mm_struct *mm)
{
struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
if (likely(tp->tsb != NULL))
return;
tsb_grow(mm, MM_TSB_HUGE, 0);
tsb_context_switch(mm);
smp_tsb_sync(mm);
/* On UltraSPARC-III+ and later, configure the second half of
* the Data-TLB for huge pages.
*/
if (tlb_type == cheetah_plus) {
unsigned long ctx;
spin_lock(&ctx_alloc_lock);
ctx = mm->context.sparc64_ctx_val;
ctx &= ~CTX_PGSZ_MASK;
ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
if (ctx != mm->context.sparc64_ctx_val) {
/* When changing the page size fields, we
* must perform a context flush so that no
* stale entries match. This flush must
* occur with the original context register
* settings.
*/
do_flush_tlb_mm(mm);
/* Reload the context register of all processors
* also executing in this address space.
*/
mm->context.sparc64_ctx_val = ctx;
on_each_cpu(context_reload, mm, 0);
}
spin_unlock(&ctx_alloc_lock);
}
}
#endif