
On the 8xx, the page size is set in the PMD entry and applies to
all pages of the page table pointed by the said PMD entry.
When an app has some regular pages allocated (e.g. see below) and tries
to mmap() a huge page at a hint address covered by the same PMD entry,
the kernel accepts the hint allthough the 8xx cannot handle different
page sizes in the same PMD entry.
10000000-10001000 r-xp 00000000 00:0f 2597 /root/malloc
10010000-10011000 rwxp 00000000 00:0f 2597 /root/malloc
mmap(0x10080000, 524288, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x10080000
This results the app remaining forever in do_page_fault()/hugetlb_fault()
and when interrupting that app, we get the following warning:
[162980.035629] WARNING: CPU: 0 PID: 2777 at arch/powerpc/mm/hugetlbpage.c:354 hugetlb_free_pgd_range+0xc8/0x1e4
[162980.035699] CPU: 0 PID: 2777 Comm: malloc Tainted: G W 4.14.6 #85
[162980.035744] task: c67e2c00 task.stack: c668e000
[162980.035783] NIP: c000fe18 LR: c00e1eec CTR: c00f90c0
[162980.035830] REGS: c668fc20 TRAP: 0700 Tainted: G W (4.14.6)
[162980.035854] MSR: 00029032 <EE,ME,IR,DR,RI> CR: 24044224 XER: 20000000
[162980.036003]
[162980.036003] GPR00: c00e1eec c668fcd0 c67e2c00 00000010 c6869410 10080000 00000000 77fb4000
[162980.036003] GPR08: ffff0001 0683c001 00000000 ffffff80 44028228 10018a34 00004008 418004fc
[162980.036003] GPR16: c668e000 00040100 c668e000 c06c0000 c668fe78 c668e000 c6835ba0 c668fd48
[162980.036003] GPR24: 00000000 73ffffff 74000000 00000001 77fb4000 100fffff 10100000 10100000
[162980.036743] NIP [c000fe18] hugetlb_free_pgd_range+0xc8/0x1e4
[162980.036839] LR [c00e1eec] free_pgtables+0x12c/0x150
[162980.036861] Call Trace:
[162980.036939] [c668fcd0] [c00f0774] unlink_anon_vmas+0x1c4/0x214 (unreliable)
[162980.037040] [c668fd10] [c00e1eec] free_pgtables+0x12c/0x150
[162980.037118] [c668fd40] [c00eabac] exit_mmap+0xe8/0x1b4
[162980.037210] [c668fda0] [c0019710] mmput.part.9+0x20/0xd8
[162980.037301] [c668fdb0] [c001ecb0] do_exit+0x1f0/0x93c
[162980.037386] [c668fe00] [c001f478] do_group_exit+0x40/0xcc
[162980.037479] [c668fe10] [c002a76c] get_signal+0x47c/0x614
[162980.037570] [c668fe70] [c0007840] do_signal+0x54/0x244
[162980.037654] [c668ff30] [c0007ae8] do_notify_resume+0x34/0x88
[162980.037744] [c668ff40] [c000dae8] do_user_signal+0x74/0xc4
[162980.037781] Instruction dump:
[162980.037821] 7fdff378 81370000 54a3463a 80890020 7d24182e 7c841a14 712a0004 4082ff94
[162980.038014] 2f890000 419e0010 712a0ff0 408200e0 <0fe00000> 54a9000a 7f984840 419d0094
[162980.038216] ---[ end trace c0ceeca8e7a5800a ]---
[162980.038754] BUG: non-zero nr_ptes on freeing mm: 1
[162985.363322] BUG: non-zero nr_ptes on freeing mm: -1
In order to fix this, this patch uses the address space "slices"
implemented for BOOK3S/64 and enhanced to support PPC32 by the
preceding patch.
This patch modifies the context.id on the 8xx to be in the range
[1:16] instead of [0:15] in order to identify context.id == 0 as
not initialised contexts as done on BOOK3S
This patch activates CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is
selected for the 8xx
Alltough we could in theory have as many slices as PMD entries, the
current slices implementation limits the number of low slices to 16.
This limitation is not preventing us to fix the initial issue allthough
it is suboptimal. It will be cured in a subsequent patch.
Fixes: 4b91428699
("powerpc/8xx: Implement support of hugepages")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
252 lines
9.4 KiB
C
252 lines
9.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_POWERPC_MMU_8XX_H_
|
|
#define _ASM_POWERPC_MMU_8XX_H_
|
|
/*
|
|
* PPC8xx support
|
|
*/
|
|
|
|
/* Control/status registers for the MPC8xx.
|
|
* A write operation to these registers causes serialized access.
|
|
* During software tablewalk, the registers used perform mask/shift-add
|
|
* operations when written/read. A TLB entry is created when the Mx_RPN
|
|
* is written, and the contents of several registers are used to
|
|
* create the entry.
|
|
*/
|
|
#define SPRN_MI_CTR 784 /* Instruction TLB control register */
|
|
#define MI_GPM 0x80000000 /* Set domain manager mode */
|
|
#define MI_PPM 0x40000000 /* Set subpage protection */
|
|
#define MI_CIDEF 0x20000000 /* Set cache inhibit when MMU dis */
|
|
#define MI_RSV4I 0x08000000 /* Reserve 4 TLB entries */
|
|
#define MI_PPCS 0x02000000 /* Use MI_RPN prob/priv state */
|
|
#define MI_IDXMASK 0x00001f00 /* TLB index to be loaded */
|
|
#define MI_RESETVAL 0x00000000 /* Value of register at reset */
|
|
|
|
/* These are the Ks and Kp from the PowerPC books. For proper operation,
|
|
* Ks = 0, Kp = 1.
|
|
*/
|
|
#define SPRN_MI_AP 786
|
|
#define MI_Ks 0x80000000 /* Should not be set */
|
|
#define MI_Kp 0x40000000 /* Should always be set */
|
|
|
|
/*
|
|
* All pages' PP data bits are set to either 001 or 011 by copying _PAGE_EXEC
|
|
* into bit 21 in the ITLBmiss handler (bit 21 is the middle bit), which means
|
|
* respectively NA for All or X for Supervisor and no access for User.
|
|
* Then we use the APG to say whether accesses are according to Page rules or
|
|
* "all Supervisor" rules (Access to all)
|
|
* We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
|
|
* When that bit is not set access is done iaw "all user"
|
|
* which means no access iaw page rules.
|
|
* Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
|
|
* 0x => No access => 11 (all accesses performed as user iaw page definition)
|
|
* 10 => No user => 01 (all accesses performed according to page definition)
|
|
* 11 => User => 00 (all accesses performed as supervisor iaw page definition)
|
|
* We define all 16 groups so that all other bits of APG can take any value
|
|
*/
|
|
#ifdef CONFIG_SWAP
|
|
#define MI_APG_INIT 0xf4f4f4f4
|
|
#else
|
|
#define MI_APG_INIT 0x44444444
|
|
#endif
|
|
|
|
/* The effective page number register. When read, contains the information
|
|
* about the last instruction TLB miss. When MI_RPN is written, bits in
|
|
* this register are used to create the TLB entry.
|
|
*/
|
|
#define SPRN_MI_EPN 787
|
|
#define MI_EPNMASK 0xfffff000 /* Effective page number for entry */
|
|
#define MI_EVALID 0x00000200 /* Entry is valid */
|
|
#define MI_ASIDMASK 0x0000000f /* ASID match value */
|
|
/* Reset value is undefined */
|
|
|
|
/* A "level 1" or "segment" or whatever you want to call it register.
|
|
* For the instruction TLB, it contains bits that get loaded into the
|
|
* TLB entry when the MI_RPN is written.
|
|
*/
|
|
#define SPRN_MI_TWC 789
|
|
#define MI_APG 0x000001e0 /* Access protection group (0) */
|
|
#define MI_GUARDED 0x00000010 /* Guarded storage */
|
|
#define MI_PSMASK 0x0000000c /* Mask of page size bits */
|
|
#define MI_PS8MEG 0x0000000c /* 8M page size */
|
|
#define MI_PS512K 0x00000004 /* 512K page size */
|
|
#define MI_PS4K_16K 0x00000000 /* 4K or 16K page size */
|
|
#define MI_SVALID 0x00000001 /* Segment entry is valid */
|
|
/* Reset value is undefined */
|
|
|
|
/* Real page number. Defined by the pte. Writing this register
|
|
* causes a TLB entry to be created for the instruction TLB, using
|
|
* additional information from the MI_EPN, and MI_TWC registers.
|
|
*/
|
|
#define SPRN_MI_RPN 790
|
|
#define MI_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */
|
|
|
|
/* Define an RPN value for mapping kernel memory to large virtual
|
|
* pages for boot initialization. This has real page number of 0,
|
|
* large page size, shared page, cache enabled, and valid.
|
|
* Also mark all subpages valid and write access.
|
|
*/
|
|
#define MI_BOOTINIT 0x000001fd
|
|
|
|
#define SPRN_MD_CTR 792 /* Data TLB control register */
|
|
#define MD_GPM 0x80000000 /* Set domain manager mode */
|
|
#define MD_PPM 0x40000000 /* Set subpage protection */
|
|
#define MD_CIDEF 0x20000000 /* Set cache inhibit when MMU dis */
|
|
#define MD_WTDEF 0x10000000 /* Set writethrough when MMU dis */
|
|
#define MD_RSV4I 0x08000000 /* Reserve 4 TLB entries */
|
|
#define MD_TWAM 0x04000000 /* Use 4K page hardware assist */
|
|
#define MD_PPCS 0x02000000 /* Use MI_RPN prob/priv state */
|
|
#define MD_IDXMASK 0x00001f00 /* TLB index to be loaded */
|
|
#define MD_RESETVAL 0x04000000 /* Value of register at reset */
|
|
|
|
#define SPRN_M_CASID 793 /* Address space ID (context) to match */
|
|
#define MC_ASIDMASK 0x0000000f /* Bits used for ASID value */
|
|
|
|
|
|
/* These are the Ks and Kp from the PowerPC books. For proper operation,
|
|
* Ks = 0, Kp = 1.
|
|
*/
|
|
#define SPRN_MD_AP 794
|
|
#define MD_Ks 0x80000000 /* Should not be set */
|
|
#define MD_Kp 0x40000000 /* Should always be set */
|
|
|
|
/*
|
|
* All pages' PP data bits are set to either 000 or 011 or 001, which means
|
|
* respectively RW for Supervisor and no access for User, or RO for
|
|
* Supervisor and no access for user and NA for ALL.
|
|
* Then we use the APG to say whether accesses are according to Page rules or
|
|
* "all Supervisor" rules (Access to all)
|
|
* We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
|
|
* When that bit is not set access is done iaw "all user"
|
|
* which means no access iaw page rules.
|
|
* Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
|
|
* 0x => No access => 11 (all accesses performed as user iaw page definition)
|
|
* 10 => No user => 01 (all accesses performed according to page definition)
|
|
* 11 => User => 00 (all accesses performed as supervisor iaw page definition)
|
|
* We define all 16 groups so that all other bits of APG can take any value
|
|
*/
|
|
#ifdef CONFIG_SWAP
|
|
#define MD_APG_INIT 0xf4f4f4f4
|
|
#else
|
|
#define MD_APG_INIT 0x44444444
|
|
#endif
|
|
|
|
/* The effective page number register. When read, contains the information
|
|
* about the last instruction TLB miss. When MD_RPN is written, bits in
|
|
* this register are used to create the TLB entry.
|
|
*/
|
|
#define SPRN_MD_EPN 795
|
|
#define MD_EPNMASK 0xfffff000 /* Effective page number for entry */
|
|
#define MD_EVALID 0x00000200 /* Entry is valid */
|
|
#define MD_ASIDMASK 0x0000000f /* ASID match value */
|
|
/* Reset value is undefined */
|
|
|
|
/* The pointer to the base address of the first level page table.
|
|
* During a software tablewalk, reading this register provides the address
|
|
* of the entry associated with MD_EPN.
|
|
*/
|
|
#define SPRN_M_TWB 796
|
|
#define M_L1TB 0xfffff000 /* Level 1 table base address */
|
|
#define M_L1INDX 0x00000ffc /* Level 1 index, when read */
|
|
/* Reset value is undefined */
|
|
|
|
/* A "level 1" or "segment" or whatever you want to call it register.
|
|
* For the data TLB, it contains bits that get loaded into the TLB entry
|
|
* when the MD_RPN is written. It is also provides the hardware assist
|
|
* for finding the PTE address during software tablewalk.
|
|
*/
|
|
#define SPRN_MD_TWC 797
|
|
#define MD_L2TB 0xfffff000 /* Level 2 table base address */
|
|
#define MD_L2INDX 0xfffffe00 /* Level 2 index (*pte), when read */
|
|
#define MD_APG 0x000001e0 /* Access protection group (0) */
|
|
#define MD_GUARDED 0x00000010 /* Guarded storage */
|
|
#define MD_PSMASK 0x0000000c /* Mask of page size bits */
|
|
#define MD_PS8MEG 0x0000000c /* 8M page size */
|
|
#define MD_PS512K 0x00000004 /* 512K page size */
|
|
#define MD_PS4K_16K 0x00000000 /* 4K or 16K page size */
|
|
#define MD_WT 0x00000002 /* Use writethrough page attribute */
|
|
#define MD_SVALID 0x00000001 /* Segment entry is valid */
|
|
/* Reset value is undefined */
|
|
|
|
|
|
/* Real page number. Defined by the pte. Writing this register
|
|
* causes a TLB entry to be created for the data TLB, using
|
|
* additional information from the MD_EPN, and MD_TWC registers.
|
|
*/
|
|
#define SPRN_MD_RPN 798
|
|
#define MD_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */
|
|
|
|
/* This is a temporary storage register that could be used to save
|
|
* a processor working register during a tablewalk.
|
|
*/
|
|
#define SPRN_M_TW 799
|
|
|
|
/* APGs */
|
|
#define M_APG0 0x00000000
|
|
#define M_APG1 0x00000020
|
|
#define M_APG2 0x00000040
|
|
#define M_APG3 0x00000060
|
|
|
|
#ifndef __ASSEMBLY__
|
|
typedef struct {
|
|
unsigned int id;
|
|
unsigned int active;
|
|
unsigned long vdso_base;
|
|
#ifdef CONFIG_PPC_MM_SLICES
|
|
u16 user_psize; /* page size index */
|
|
u64 low_slices_psize; /* page size encodings */
|
|
unsigned char high_slices_psize[0];
|
|
unsigned long slb_addr_limit;
|
|
#endif
|
|
} mm_context_t;
|
|
|
|
#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
|
|
#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
|
|
|
|
/* Page size definitions, common between 32 and 64-bit
|
|
*
|
|
* shift : is the "PAGE_SHIFT" value for that page size
|
|
* penc : is the pte encoding mask
|
|
*
|
|
*/
|
|
struct mmu_psize_def {
|
|
unsigned int shift; /* number of bits */
|
|
unsigned int enc; /* PTE encoding */
|
|
unsigned int ind; /* Corresponding indirect page size shift */
|
|
unsigned int flags;
|
|
#define MMU_PAGE_SIZE_DIRECT 0x1 /* Supported as a direct size */
|
|
#define MMU_PAGE_SIZE_INDIRECT 0x2 /* Supported as an indirect size */
|
|
};
|
|
|
|
extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
|
|
|
|
static inline int shift_to_mmu_psize(unsigned int shift)
|
|
{
|
|
int psize;
|
|
|
|
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
|
|
if (mmu_psize_defs[psize].shift == shift)
|
|
return psize;
|
|
return -1;
|
|
}
|
|
|
|
static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
|
|
{
|
|
if (mmu_psize_defs[mmu_psize].shift)
|
|
return mmu_psize_defs[mmu_psize].shift;
|
|
BUG();
|
|
}
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#if defined(CONFIG_PPC_4K_PAGES)
|
|
#define mmu_virtual_psize MMU_PAGE_4K
|
|
#elif defined(CONFIG_PPC_16K_PAGES)
|
|
#define mmu_virtual_psize MMU_PAGE_16K
|
|
#else
|
|
#error "Unsupported PAGE_SIZE"
|
|
#endif
|
|
|
|
#define mmu_linear_psize MMU_PAGE_8M
|
|
|
|
#endif /* _ASM_POWERPC_MMU_8XX_H_ */
|