powerpc: Update kernel VSID range

This patch change the kernel VSID range so that we limit VSID_BITS to 37.
This enables us to support 64TB with 65 bit VA (37+28). Without this patch
we have boot hangs on platforms that only support 65 bit VA.

With this patch we now have proto vsid generated as below:

We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
from mmu context id and effective segment id of the address.

For user processes max context id is limited to ((1ul << 19) - 5)
for kernel space, we use the top 4 context ids to map address as below
0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Tested-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org> [v3.8]
This commit is contained in:
Aneesh Kumar K.V
2013-03-13 03:34:54 +00:00
committed by Benjamin Herrenschmidt
parent e39d1a4714
commit c60ac5693c
6 changed files with 126 additions and 106 deletions

View File

@@ -343,17 +343,16 @@ extern void slb_set_size(u16 size);
/*
* VSID allocation (256MB segment)
*
* We first generate a 38-bit "proto-VSID". For kernel addresses this
* is equal to the ESID | 1 << 37, for user addresses it is:
* (context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1)
* We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
* from mmu context id and effective segment id of the address.
*
* This splits the proto-VSID into the below range
* 0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range
* 2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range
*
* We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1
* That is, we assign half of the space to user processes and half
* to the kernel.
* For user processes max context id is limited to ((1ul << 19) - 5)
* for kernel space, we use the top 4 context ids to map address as below
* NOTE: each context only support 64TB now.
* 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ]
* 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ]
* 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ]
* 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ]
*
* The proto-VSIDs are then scrambled into real VSIDs with the
* multiplicative hash:
@@ -363,38 +362,45 @@ extern void slb_set_size(u16 size);
* VSID_MULTIPLIER is prime, so in particular it is
* co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
* Because the modulus is 2^n-1 we can compute it efficiently without
* a divide or extra multiply (see below).
* a divide or extra multiply (see below). The scramble function gives
* robust scattering in the hash table (at least based on some initial
* results).
*
* This scheme has several advantages over older methods:
* We also consider VSID 0 special. We use VSID 0 for slb entries mapping
* bad address. This enables us to consolidate bad address handling in
* hash_page.
*
* - We have VSIDs allocated for every kernel address
* (i.e. everything above 0xC000000000000000), except the very top
* segment, which simplifies several things.
*
* - We allow for USER_ESID_BITS significant bits of ESID and
* CONTEXT_BITS bits of context for user addresses.
* i.e. 64T (46 bits) of address space for up to half a million contexts.
*
* - The scramble function gives robust scattering in the hash
* table (at least based on some initial results). The previous
* method was more susceptible to pathological cases giving excessive
* hash collisions.
* We also need to avoid the last segment of the last context, because that
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
* because of the modulo operation in vsid scramble. But the vmemmap
* (which is what uses region 0xf) will never be close to 64TB in size
* (it's 56 bytes per page of system memory).
*/
#define CONTEXT_BITS 19
#define USER_ESID_BITS 18
#define USER_ESID_BITS_1T 6
/*
* 256MB segment
* The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
* available for user + kernel mapping. The top 4 contexts are used for
* kernel mapping. Each segment contains 2^28 bytes. Each
* context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
* (19 == 37 + 28 - 46).
*/
#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5)
/*
* This should be computed such that protovosid * vsid_mulitplier
* doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
*/
#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */
#define VSID_BITS_256M (CONTEXT_BITS + USER_ESID_BITS + 1)
#define VSID_BITS_256M (CONTEXT_BITS + USER_ESID_BITS)
#define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1)
#define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */
#define VSID_BITS_1T (CONTEXT_BITS + USER_ESID_BITS_1T + 1)
#define VSID_BITS_1T (CONTEXT_BITS + USER_ESID_BITS_1T)
#define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1)
@@ -422,7 +428,8 @@ extern void slb_set_size(u16 size);
srdi rx,rt,VSID_BITS_##size; \
clrldi rt,rt,(64-VSID_BITS_##size); \
add rt,rt,rx; /* add high and low bits */ \
/* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
/* NOTE: explanation based on VSID_BITS_##size = 36 \
* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
* 2^36-1+2^28-1. That in particular means that if r3 >= \
* 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \
* the bit clear, r3 already has the answer we want, if it \
@@ -514,34 +521,6 @@ typedef struct {
})
#endif /* 1 */
/*
* This is only valid for addresses >= PAGE_OFFSET
* The proto-VSID space is divided into two class
* User: 0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1
* kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1
*
* With KERNEL_START at 0xc000000000000000, the proto vsid for
* the kernel ends up with 0xc00000000 (36 bits). With 64TB
* support we need to have kernel proto-VSID in the
* [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS.
*/
static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
{
unsigned long proto_vsid;
/*
* We need to make sure proto_vsid for the kernel is
* >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T])
*/
if (ssize == MMU_SEGSIZE_256M) {
proto_vsid = ea >> SID_SHIFT;
proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS));
return vsid_scramble(proto_vsid, 256M);
}
proto_vsid = ea >> SID_SHIFT_1T;
proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T));
return vsid_scramble(proto_vsid, 1T);
}
/* Returns the segment size indicator for a user address */
static inline int user_segment_size(unsigned long addr)
{
@@ -551,10 +530,15 @@ static inline int user_segment_size(unsigned long addr)
return MMU_SEGSIZE_256M;
}
/* This is only valid for user addresses (which are below 2^44) */
static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
int ssize)
{
/*
* Bad address. We return VSID 0 for that
*/
if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
return 0;
if (ssize == MMU_SEGSIZE_256M)
return vsid_scramble((context << USER_ESID_BITS)
| (ea >> SID_SHIFT), 256M);
@@ -562,6 +546,25 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
| (ea >> SID_SHIFT_1T), 1T);
}
/*
* This is only valid for addresses >= PAGE_OFFSET
*
* For kernel space, we use the top 4 context ids to map address as below
* 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ]
* 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ]
* 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ]
* 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ]
*/
static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
{
unsigned long context;
/*
* kernel take the top 4 context from the available range
*/
context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
return get_vsid(context, ea, ssize);
}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_MMU_HASH64_H_ */