s390/mm: add support for 2GB hugepages
This adds support for 2GB hugetlbfs pages on s390. Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:

gecommit door
Martin Schwidefsky

bovenliggende
46210c440c
commit
d08de8e2d8
@@ -430,6 +430,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
|
||||
VM_BUG_ON(pgd_none(*pgd));
|
||||
pud = pud_offset(pgd, vmaddr);
|
||||
VM_BUG_ON(pud_none(*pud));
|
||||
/* large puds cannot yet be handled */
|
||||
if (pud_large(*pud))
|
||||
return -EFAULT;
|
||||
pmd = pmd_offset(pud, vmaddr);
|
||||
VM_BUG_ON(pmd_none(*pmd));
|
||||
/* large pmds cannot yet be handled */
|
||||
|
@@ -128,6 +128,44 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
struct page *head, *page;
|
||||
unsigned long mask;
|
||||
int refs;
|
||||
|
||||
mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
|
||||
if ((pud_val(pud) & mask) != 0)
|
||||
return 0;
|
||||
VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
|
||||
|
||||
refs = 0;
|
||||
head = pud_page(pud);
|
||||
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON_PAGE(compound_head(page) != head, page);
|
||||
pages[*nr] = page;
|
||||
(*nr)++;
|
||||
page++;
|
||||
refs++;
|
||||
} while (addr += PAGE_SIZE, addr != end);
|
||||
|
||||
if (!page_cache_add_speculative(head, refs)) {
|
||||
*nr -= refs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(pud_val(pud) != pud_val(*pudp))) {
|
||||
*nr -= refs;
|
||||
while (refs--)
|
||||
put_page(head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
@@ -144,7 +182,12 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(pud))
|
||||
return 0;
|
||||
if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
|
||||
if (unlikely(pud_large(pud))) {
|
||||
if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
|
||||
nr))
|
||||
return 0;
|
||||
} else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
|
||||
nr))
|
||||
return 0;
|
||||
} while (pudp++, addr = next, addr != end);
|
||||
|
||||
|
@@ -1,19 +1,22 @@
|
||||
/*
|
||||
* IBM System z Huge TLB Page Support for Kernel.
|
||||
*
|
||||
* Copyright IBM Corp. 2007
|
||||
* Copyright IBM Corp. 2007,2016
|
||||
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
|
||||
*/
|
||||
|
||||
#define KMSG_COMPONENT "hugetlb"
|
||||
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hugetlb.h>
|
||||
|
||||
static inline pmd_t __pte_to_pmd(pte_t pte)
|
||||
static inline unsigned long __pte_to_rste(pte_t pte)
|
||||
{
|
||||
pmd_t pmd;
|
||||
unsigned long rste;
|
||||
|
||||
/*
|
||||
* Convert encoding pte bits pmd bits
|
||||
* Convert encoding pte bits pmd / pud bits
|
||||
* lIR.uswrdy.p dy..R...I...wr
|
||||
* empty 010.000000.0 -> 00..0...1...00
|
||||
* prot-none, clean, old 111.000000.1 -> 00..1...1...00
|
||||
@@ -33,25 +36,31 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
|
||||
* u unused, l large
|
||||
*/
|
||||
if (pte_present(pte)) {
|
||||
pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
|
||||
pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4;
|
||||
pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4;
|
||||
pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5;
|
||||
pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
|
||||
pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
|
||||
pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
|
||||
pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
|
||||
rste = pte_val(pte) & PAGE_MASK;
|
||||
rste |= (pte_val(pte) & _PAGE_READ) >> 4;
|
||||
rste |= (pte_val(pte) & _PAGE_WRITE) >> 4;
|
||||
rste |= (pte_val(pte) & _PAGE_INVALID) >> 5;
|
||||
rste |= (pte_val(pte) & _PAGE_PROTECT);
|
||||
rste |= (pte_val(pte) & _PAGE_DIRTY) << 10;
|
||||
rste |= (pte_val(pte) & _PAGE_YOUNG) << 10;
|
||||
rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
|
||||
} else
|
||||
pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
|
||||
return pmd;
|
||||
rste = _SEGMENT_ENTRY_INVALID;
|
||||
return rste;
|
||||
}
|
||||
|
||||
static inline pte_t __pmd_to_pte(pmd_t pmd)
|
||||
static inline pte_t __rste_to_pte(unsigned long rste)
|
||||
{
|
||||
int present;
|
||||
pte_t pte;
|
||||
|
||||
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
|
||||
present = pud_present(__pud(rste));
|
||||
else
|
||||
present = pmd_present(__pmd(rste));
|
||||
|
||||
/*
|
||||
* Convert encoding pmd bits pte bits
|
||||
* Convert encoding pmd / pud bits pte bits
|
||||
* dy..R...I...wr lIR.uswrdy.p
|
||||
* empty 00..0...1...00 -> 010.000000.0
|
||||
* prot-none, clean, old 00..1...1...00 -> 111.000000.1
|
||||
@@ -70,16 +79,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
|
||||
* SW-bits: p present, y young, d dirty, r read, w write, s special,
|
||||
* u unused, l large
|
||||
*/
|
||||
if (pmd_present(pmd)) {
|
||||
pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
|
||||
if (present) {
|
||||
pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
|
||||
pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
|
||||
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4;
|
||||
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
|
||||
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
|
||||
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
|
||||
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
|
||||
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
|
||||
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
|
||||
pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4;
|
||||
pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4;
|
||||
pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5;
|
||||
pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT);
|
||||
pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10;
|
||||
pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10;
|
||||
pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
|
||||
} else
|
||||
pte_val(pte) = _PAGE_INVALID;
|
||||
return pte;
|
||||
@@ -88,27 +97,33 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
|
||||
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte)
|
||||
{
|
||||
pmd_t pmd = __pte_to_pmd(pte);
|
||||
unsigned long rste = __pte_to_rste(pte);
|
||||
|
||||
pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
|
||||
*(pmd_t *) ptep = pmd;
|
||||
/* Set correct table type for 2G hugepages */
|
||||
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
|
||||
rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
|
||||
else
|
||||
rste |= _SEGMENT_ENTRY_LARGE;
|
||||
pte_val(*ptep) = rste;
|
||||
}
|
||||
|
||||
pte_t huge_ptep_get(pte_t *ptep)
|
||||
{
|
||||
pmd_t pmd = *(pmd_t *) ptep;
|
||||
|
||||
return __pmd_to_pte(pmd);
|
||||
return __rste_to_pte(pte_val(*ptep));
|
||||
}
|
||||
|
||||
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
|
||||
unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
pte_t pte = huge_ptep_get(ptep);
|
||||
pmd_t *pmdp = (pmd_t *) ptep;
|
||||
pmd_t old;
|
||||
pud_t *pudp = (pud_t *) ptep;
|
||||
|
||||
old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
|
||||
return __pmd_to_pte(old);
|
||||
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
|
||||
pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
|
||||
else
|
||||
pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
|
||||
return pte;
|
||||
}
|
||||
|
||||
pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
@@ -120,8 +135,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
|
||||
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
pudp = pud_alloc(mm, pgdp, addr);
|
||||
if (pudp)
|
||||
pmdp = pmd_alloc(mm, pudp, addr);
|
||||
if (pudp) {
|
||||
if (sz == PUD_SIZE)
|
||||
return (pte_t *) pudp;
|
||||
else if (sz == PMD_SIZE)
|
||||
pmdp = pmd_alloc(mm, pudp, addr);
|
||||
}
|
||||
return (pte_t *) pmdp;
|
||||
}
|
||||
|
||||
@@ -134,8 +153,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
if (pgd_present(*pgdp)) {
|
||||
pudp = pud_offset(pgdp, addr);
|
||||
if (pud_present(*pudp))
|
||||
if (pud_present(*pudp)) {
|
||||
if (pud_large(*pudp))
|
||||
return (pte_t *) pudp;
|
||||
pmdp = pmd_offset(pudp, addr);
|
||||
}
|
||||
}
|
||||
return (pte_t *) pmdp;
|
||||
}
|
||||
@@ -147,5 +169,34 @@ int pmd_huge(pmd_t pmd)
|
||||
|
||||
int pud_huge(pud_t pud)
|
||||
{
|
||||
return 0;
|
||||
return pud_large(pud);
|
||||
}
|
||||
|
||||
struct page *
|
||||
follow_huge_pud(struct mm_struct *mm, unsigned long address,
|
||||
pud_t *pud, int flags)
|
||||
{
|
||||
if (flags & FOLL_GET)
|
||||
return NULL;
|
||||
|
||||
return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static __init int setup_hugepagesz(char *opt)
|
||||
{
|
||||
unsigned long size;
|
||||
char *string = opt;
|
||||
|
||||
size = memparse(opt, &opt);
|
||||
if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
|
||||
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
|
||||
} else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
|
||||
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
|
||||
} else {
|
||||
pr_err("hugepagesz= specifies an unsupported page size %s\n",
|
||||
string);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
__setup("hugepagesz=", setup_hugepagesz);
|
||||
|
@@ -352,6 +352,45 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
|
||||
}
|
||||
EXPORT_SYMBOL(pmdp_xchg_lazy);
|
||||
|
||||
static inline pud_t pudp_flush_direct(struct mm_struct *mm,
|
||||
unsigned long addr, pud_t *pudp)
|
||||
{
|
||||
pud_t old;
|
||||
|
||||
old = *pudp;
|
||||
if (pud_val(old) & _REGION_ENTRY_INVALID)
|
||||
return old;
|
||||
if (!MACHINE_HAS_IDTE) {
|
||||
/*
|
||||
* Invalid bit position is the same for pmd and pud, so we can
|
||||
* re-use _pmd_csp() here
|
||||
*/
|
||||
__pmdp_csp((pmd_t *) pudp);
|
||||
return old;
|
||||
}
|
||||
atomic_inc(&mm->context.flush_count);
|
||||
if (MACHINE_HAS_TLB_LC &&
|
||||
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
|
||||
__pudp_idte_local(addr, pudp);
|
||||
else
|
||||
__pudp_idte(addr, pudp);
|
||||
atomic_dec(&mm->context.flush_count);
|
||||
return old;
|
||||
}
|
||||
|
||||
pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
|
||||
pud_t *pudp, pud_t new)
|
||||
{
|
||||
pud_t old;
|
||||
|
||||
preempt_disable();
|
||||
old = pudp_flush_direct(mm, addr, pudp);
|
||||
*pudp = new;
|
||||
preempt_enable();
|
||||
return old;
|
||||
}
|
||||
EXPORT_SYMBOL(pudp_xchg_direct);
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
|
||||
pgtable_t pgtable)
|
||||
|
Verwijs in nieuw issue
Block a user