Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm changes from Peter Anvin:
 "This is a huge set of several partly interrelated (and concurrently
  developed) changes, which is why the branch history is messier than
  one would like.

  The *really* big items are two humonguous patchsets mostly developed
  by Yinghai Lu at my request, which completely revamps the way we
  create initial page tables.  In particular, rather than estimating how
  much memory we will need for page tables and then build them into that
  memory -- a calculation that has shown to be incredibly fragile -- we
  now build them (on 64 bits) with the aid of a "pseudo-linear mode" --
  a #PF handler which creates temporary page tables on demand.

  This has several advantages:

  1. It makes it much easier to support things that need access to data
     very early (a followon patchset uses this to load microcode way
     early in the kernel startup).

  2. It allows the kernel and all the kernel data objects to be invoked
     from above the 4 GB limit.  This allows kdump to work on very large
     systems.

  3. It greatly reduces the difference between Xen and native (Xen's
     equivalent of the #PF handler are the temporary page tables created
     by the domain builder), eliminating a bunch of fragile hooks.

  The patch series also gets us a bit closer to W^X.

  Additional work in this pull is the 64-bit get_user() work which you
  were also involved with, and a bunch of cleanups/speedups to
  __phys_addr()/__pa()."

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (105 commits)
  x86, mm: Move reserving low memory later in initialization
  x86, doc: Clarify the use of asm("%edx") in uaccess.h
  x86, mm: Redesign get_user with a __builtin_choose_expr hack
  x86: Be consistent with data size in getuser.S
  x86, mm: Use a bitfield to mask nuisance get_user() warnings
  x86/kvm: Fix compile warning in kvm_register_steal_time()
  x86-32: Add support for 64bit get_user()
  x86-32, mm: Remove reference to alloc_remap()
  x86-32, mm: Remove reference to resume_map_numa_kva()
  x86-32, mm: Rip out x86_32 NUMA remapping code
  x86/numa: Use __pa_nodebug() instead
  x86: Don't panic if can not alloc buffer for swiotlb
  mm: Add alloc_bootmem_low_pages_nopanic()
  x86, 64bit, mm: hibernate use generic mapping_init
  x86, 64bit, mm: Mark data/bss/brk to nx
  x86: Merge early kernel reserve for 32bit and 64bit
  x86: Add Crash kernel low reservation
  x86, kdump: Remove crashkernel range find limit for 64bit
  memblock: Add memblock_mem_size()
  x86, boot: Not need to check setup_header version for setup_data
  ...
This commit is contained in:
Linus Torvalds
2013-02-21 18:06:55 -08:00
77 changed files with 1556 additions and 1261 deletions

View File

@@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
# include <asm/proto.h>
# include <asm/numa_64.h>
#endif /* X86 */
#define BAD_MADT_ENTRY(entry, end) ( \

View File

@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void)
#ifndef CONFIG_64BIT
header->pmode_entry = (u32)&wakeup_pmode_return;
header->pmode_cr3 = (u32)__pa(&initial_page_table);
header->pmode_cr3 = (u32)__pa_symbol(initial_page_table);
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP

View File

@@ -768,10 +768,9 @@ int __init gart_iommu_init(void)
aper_base = info.aper_base;
end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
if (end_pfn > max_low_pfn_mapped) {
start_pfn = (aper_base>>PAGE_SHIFT);
start_pfn = PFN_DOWN(aper_base);
if (!pfn_range_is_mapped(start_pfn, end_pfn))
init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
}
pr_info("PCI-DMA: using GART IOMMU.\n");
iommu_size = check_iommu_size(info.aper_base, aper_size);

View File

@@ -28,6 +28,7 @@
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/apic_flat_64.h>
#include <asm/pgtable.h>
static int numachip_system __read_mostly;

View File

@@ -12,7 +12,6 @@
#include <asm/pci-direct.h>
#ifdef CONFIG_X86_64
# include <asm/numa_64.h>
# include <asm/mmconfig.h>
# include <asm/cacheflush.h>
#endif
@@ -680,12 +679,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
* benefit in doing so.
*/
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
unsigned long pfn = tseg >> PAGE_SHIFT;
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
if ((tseg>>PMD_SHIFT) <
(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
((tseg>>PMD_SHIFT) <
(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
(tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
if (pfn_range_is_mapped(pfn, pfn + 1))
set_memory_4k((unsigned long)__va(tseg), 1);
}
}

View File

@@ -17,7 +17,6 @@
#ifdef CONFIG_X86_64
#include <linux/topology.h>
#include <asm/numa_64.h>
#endif
#include "cpu.h"
@@ -168,7 +167,7 @@ int __cpuinit ppro_with_ram_bug(void)
#ifdef CONFIG_X86_F00F_BUG
static void __cpuinit trap_init_f00f_bug(void)
{
__set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
__set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
/*
* Update the IDT descriptor and reload the IDT so that

View File

@@ -835,7 +835,7 @@ static int __init parse_memopt(char *p)
}
early_param("mem", parse_memopt);
static int __init parse_memmap_opt(char *p)
static int __init parse_memmap_one(char *p)
{
char *oldp;
u64 start_at, mem_size;
@@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p)
return *p == '\0' ? 0 : -EINVAL;
}
static int __init parse_memmap_opt(char *str)
{
while (str) {
char *k = strchr(str, ',');
if (k)
*k++ = 0;
parse_memmap_one(str);
str = k;
}
return 0;
}
early_param("memmap", parse_memmap_opt);
void __init finish_e820_parsing(void)

View File

@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
* kernel identity mapping to modify code.
*/
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa(ip));
ip = (unsigned long)__va(__pa_symbol(ip));
return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
}
@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size)
* kernel identity mapping to modify code.
*/
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa(ip));
ip = (unsigned long)__va(__pa_symbol(ip));
return probe_kernel_write((void *)ip, val, size);
}

View File

@@ -33,20 +33,6 @@ void __init i386_start_kernel(void)
{
sanitize_boot_params(&boot_params);
memblock_reserve(__pa_symbol(&_text),
__pa_symbol(&__bss_stop) - __pa_symbol(&_text));
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
/* Assume only end is not page aligned */
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
}
#endif
/* Call the subarch specific early setup function */
switch (boot_params.hdr.hardware_subarch) {
case X86_SUBARCH_MRST:
@@ -60,11 +46,5 @@ void __init i386_start_kernel(void)
break;
}
/*
* At this point everything still needed from the boot loader
* or BIOS or kernel text should be early reserved or marked not
* RAM in e820. All other memory is free game.
*/
start_kernel();
}

View File

@@ -27,11 +27,81 @@
#include <asm/bios_ebda.h>
#include <asm/bootparam_utils.h>
static void __init zap_identity_mappings(void)
/*
* Manage page tables very early on.
*/
extern pgd_t early_level4_pgt[PTRS_PER_PGD];
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt = 2;
/* Wipe all early page tables except for the kernel symbol map */
static void __init reset_early_page_tables(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
pgd_clear(pgd);
__flush_tlb_all();
unsigned long i;
for (i = 0; i < PTRS_PER_PGD-1; i++)
early_level4_pgt[i].pgd = 0;
next_early_pgt = 0;
write_cr3(__pa(early_level4_pgt));
}
/* Create a new PMD entry */
int __init early_make_pgtable(unsigned long address)
{
unsigned long physaddr = address - __PAGE_OFFSET;
unsigned long i;
pgdval_t pgd, *pgd_p;
pudval_t pud, *pud_p;
pmdval_t pmd, *pmd_p;
/* Invalid address or early pgt is done ? */
if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt))
return -1;
again:
pgd_p = &early_level4_pgt[pgd_index(address)].pgd;
pgd = *pgd_p;
/*
* The use of __START_KERNEL_map rather than __PAGE_OFFSET here is
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
if (pgd)
pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
else {
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
reset_early_page_tables();
goto again;
}
pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
for (i = 0; i < PTRS_PER_PUD; i++)
pud_p[i] = 0;
*pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pud_p += pud_index(address);
pud = *pud_p;
if (pud)
pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
else {
if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
reset_early_page_tables();
goto again;
}
pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++];
for (i = 0; i < PTRS_PER_PMD; i++)
pmd_p[i] = 0;
*pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
}
pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL);
pmd_p[pmd_index(address)] = pmd;
return 0;
}
/* Don't add a printk in there. printk relies on the PDA which is not initialized
@@ -42,14 +112,25 @@ static void __init clear_bss(void)
(unsigned long) __bss_stop - (unsigned long) __bss_start);
}
static unsigned long get_cmd_line_ptr(void)
{
unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr;
cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32;
return cmd_line_ptr;
}
static void __init copy_bootdata(char *real_mode_data)
{
char * command_line;
unsigned long cmd_line_ptr;
memcpy(&boot_params, real_mode_data, sizeof boot_params);
sanitize_boot_params(&boot_params);
if (boot_params.hdr.cmd_line_ptr) {
command_line = __va(boot_params.hdr.cmd_line_ptr);
cmd_line_ptr = get_cmd_line_ptr();
if (cmd_line_ptr) {
command_line = __va(cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
}
}
@@ -72,14 +153,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
/* Kill off the identity-map trampoline */
reset_early_page_tables();
/* clear bss before set_intr_gate with early_idt_handler */
clear_bss();
/* Make NULL pointers segfault */
zap_identity_mappings();
max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
#ifdef CONFIG_EARLY_PRINTK
set_intr_gate(i, &early_idt_handlers[i]);
@@ -89,37 +168,25 @@ void __init x86_64_start_kernel(char * real_mode_data)
}
load_idt((const struct desc_ptr *)&idt_descr);
copy_bootdata(__va(real_mode_data));
if (console_loglevel == 10)
early_printk("Kernel alive\n");
clear_page(init_level4_pgt);
/* set init_level4_pgt kernel high mapping*/
init_level4_pgt[511] = early_level4_pgt[511];
x86_64_start_reservations(real_mode_data);
}
void __init x86_64_start_reservations(char *real_mode_data)
{
copy_bootdata(__va(real_mode_data));
memblock_reserve(__pa_symbol(&_text),
__pa_symbol(&__bss_stop) - __pa_symbol(&_text));
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
/* Assume only end is not page aligned */
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
}
#endif
/* version is always not zero if it is copied */
if (!boot_params.hdr.version)
copy_bootdata(__va(real_mode_data));
reserve_ebda_region();
/*
* At this point everything still needed from the boot loader
* or BIOS or kernel text should be early reserved or marked not
* RAM in e820. All other memory is free game.
*/
start_kernel();
}

View File

@@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
.code64
.globl startup_64
startup_64:
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
* and someone has loaded an identity mapped page table
* for us. These identity mapped page tables map all of the
* kernel pages and possibly all of memory.
*
* %esi holds a physical pointer to real_mode_data.
* %rsi holds a physical pointer to real_mode_data.
*
* We come here either directly from a 64bit bootloader, or from
* arch/x86_64/boot/compressed/head.S.
@@ -66,7 +65,8 @@ startup_64:
* tables and then reload them.
*/
/* Compute the delta between the address I am compiled to run at and the
/*
* Compute the delta between the address I am compiled to run at and the
* address I am actually running at.
*/
leaq _text(%rip), %rbp
@@ -78,45 +78,62 @@ startup_64:
testl %eax, %eax
jnz bad_address
/* Is the address too large? */
leaq _text(%rip), %rdx
movq $PGDIR_SIZE, %rax
cmpq %rax, %rdx
jae bad_address
/* Fixup the physical addresses in the page table
/*
* Is the address too large?
*/
addq %rbp, init_level4_pgt + 0(%rip)
addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
leaq _text(%rip), %rax
shrq $MAX_PHYSMEM_BITS, %rax
jnz bad_address
addq %rbp, level3_ident_pgt + 0(%rip)
/*
* Fixup the physical addresses in the page table
*/
addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
addq %rbp, level3_kernel_pgt + (510*8)(%rip)
addq %rbp, level3_kernel_pgt + (511*8)(%rip)
addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
/* Add an Identity mapping if I am above 1G */
/*
* Set up the identity mapping for the switchover. These
* entries should *NOT* have the global bit set! This also
* creates a bunch of nonsense entries but that is fine --
* it avoids problems around wraparound.
*/
leaq _text(%rip), %rdi
andq $PMD_PAGE_MASK, %rdi
leaq early_level4_pgt(%rip), %rbx
movq %rdi, %rax
shrq $PGDIR_SHIFT, %rax
leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx
movq %rdx, 0(%rbx,%rax,8)
movq %rdx, 8(%rbx,%rax,8)
addq $4096, %rdx
movq %rdi, %rax
shrq $PUD_SHIFT, %rax
andq $(PTRS_PER_PUD - 1), %rax
jz ident_complete
leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
leaq level3_ident_pgt(%rip), %rbx
movq %rdx, 0(%rbx, %rax, 8)
andl $(PTRS_PER_PUD-1), %eax
movq %rdx, (4096+0)(%rbx,%rax,8)
movq %rdx, (4096+8)(%rbx,%rax,8)
addq $8192, %rbx
movq %rdi, %rax
shrq $PMD_SHIFT, %rax
andq $(PTRS_PER_PMD - 1), %rax
leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
leaq level2_spare_pgt(%rip), %rbx
movq %rdx, 0(%rbx, %rax, 8)
ident_complete:
shrq $PMD_SHIFT, %rdi
addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
leaq (_end - 1)(%rip), %rcx
shrq $PMD_SHIFT, %rcx
subq %rdi, %rcx
incl %ecx
1:
andq $(PTRS_PER_PMD - 1), %rdi
movq %rax, (%rbx,%rdi,8)
incq %rdi
addq $PMD_SIZE, %rax
decl %ecx
jnz 1b
/*
* Fixup the kernel text+data virtual addresses. Note that
@@ -124,7 +141,6 @@ ident_complete:
* cleanup_highmap() fixes this up along with the mappings
* beyond _end.
*/
leaq level2_kernel_pgt(%rip), %rdi
leaq 4096(%rdi), %r8
/* See if it is a valid page table entry */
@@ -139,17 +155,14 @@ ident_complete:
/* Fixup phys_base */
addq %rbp, phys_base(%rip)
/* Due to ENTRY(), sometimes the empty space gets filled with
* zeros. Better take a jmp than relying on empty space being
* filled with 0x90 (nop)
*/
jmp secondary_startup_64
movq $(early_level4_pgt - __START_KERNEL_map), %rax
jmp 1f
ENTRY(secondary_startup_64)
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
* and someone has loaded a mapped page table.
*
* %esi holds a physical pointer to real_mode_data.
* %rsi holds a physical pointer to real_mode_data.
*
* We come here either from startup_64 (using physical addresses)
* or from trampoline.S (using virtual addresses).
@@ -159,12 +172,14 @@ ENTRY(secondary_startup_64)
* after the boot processor executes this code.
*/
movq $(init_level4_pgt - __START_KERNEL_map), %rax
1:
/* Enable PAE mode and PGE */
movl $(X86_CR4_PAE | X86_CR4_PGE), %eax
movq %rax, %cr4
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
movq %rcx, %cr4
/* Setup early boot stage 4 level pagetables. */
movq $(init_level4_pgt - __START_KERNEL_map), %rax
addq phys_base(%rip), %rax
movq %rax, %cr3
@@ -196,7 +211,7 @@ ENTRY(secondary_startup_64)
movq %rax, %cr0
/* Setup a boot time stack */
movq stack_start(%rip),%rsp
movq stack_start(%rip), %rsp
/* zero EFLAGS after setting rsp */
pushq $0
@@ -236,15 +251,33 @@ ENTRY(secondary_startup_64)
movl initial_gs+4(%rip),%edx
wrmsr
/* esi is pointer to real mode structure with interesting info.
/* rsi is pointer to real mode structure with interesting info.
pass it to C */
movl %esi, %edi
movq %rsi, %rdi
/* Finally jump to run C code and to be on real kernel address
* Since we are running on identity-mapped space we have to jump
* to the full 64bit address, this is only possible as indirect
* jump. In addition we need to ensure %cs is set so we make this
* a far return.
*
* Note: do not change to far jump indirect with 64bit offset.
*
* AMD does not support far jump indirect with 64bit offset.
* AMD64 Architecture Programmer's Manual, Volume 3: states only
* JMP FAR mem16:16 FF /5 Far jump indirect,
* with the target specified by a far pointer in memory.
* JMP FAR mem16:32 FF /5 Far jump indirect,
* with the target specified by a far pointer in memory.
*
* Intel64 does support 64bit offset.
* Software Developer Manual Vol 2: states:
* FF /5 JMP m16:16 Jump far, absolute indirect,
* address given in m16:16
* FF /5 JMP m16:32 Jump far, absolute indirect,
* address given in m16:32.
* REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
* address given in m16:64.
*/
movq initial_code(%rip),%rax
pushq $0 # fake return address to stop unwinder
@@ -270,13 +303,13 @@ ENDPROC(start_cpu0)
/* SMP bootup changes these two */
__REFDATA
.align 8
ENTRY(initial_code)
.balign 8
GLOBAL(initial_code)
.quad x86_64_start_kernel
ENTRY(initial_gs)
GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
ENTRY(stack_start)
GLOBAL(stack_start)
.quad init_thread_union+THREAD_SIZE-8
.word 0
__FINITDATA
@@ -284,7 +317,7 @@ ENDPROC(start_cpu0)
bad_address:
jmp bad_address
.section ".init.text","ax"
__INIT
.globl early_idt_handlers
early_idt_handlers:
# 104(%rsp) %rflags
@@ -321,14 +354,22 @@ ENTRY(early_idt_handler)
pushq %r11 # 0(%rsp)
cmpl $__KERNEL_CS,96(%rsp)
jne 10f
jne 11f
cmpl $14,72(%rsp) # Page fault?
jnz 10f
GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
call early_make_pgtable
andl %eax,%eax
jz 20f # All good
10:
leaq 88(%rsp),%rdi # Pointer to %rip
call early_fixup_exception
andl %eax,%eax
jnz 20f # Found an exception entry
10:
11:
#ifdef CONFIG_EARLY_PRINTK
GET_CR2_INTO(%r9) # can clobber any volatile register if pv
movl 80(%rsp),%r8d # error code
@@ -350,7 +391,7 @@ ENTRY(early_idt_handler)
1: hlt
jmp 1b
20: # Exception table entry found
20: # Exception table entry found or page table generated
popq %r11
popq %r10
popq %r9
@@ -364,6 +405,8 @@ ENTRY(early_idt_handler)
decl early_recursion_flag(%rip)
INTERRUPT_RETURN
__INITDATA
.balign 4
early_recursion_flag:
.long 0
@@ -374,11 +417,10 @@ early_idt_msg:
early_idt_ripmsg:
.asciz "RIP %s\n"
#endif /* CONFIG_EARLY_PRINTK */
.previous
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
ENTRY(name)
GLOBAL(name)
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
@@ -388,24 +430,37 @@ ENTRY(name)
i = i + 1 ; \
.endr
.data
/*
* This default setting generates an ident mapping at address 0x100000
* and a mapping for the kernel that precisely maps virtual address
* 0xffffffff80000000 to physical address 0x000000. (always using
* 2Mbyte large pages provided by PAE mode)
*/
NEXT_PAGE(init_level4_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.org init_level4_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
__INITDATA
NEXT_PAGE(early_level4_pgt)
.fill 511,8,0
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
NEXT_PAGE(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
.data
#ifndef CONFIG_XEN
NEXT_PAGE(init_level4_pgt)
.fill 512,8,0
#else
NEXT_PAGE(init_level4_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.org init_level4_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
.fill 511,8,0
.fill 511, 8, 0
NEXT_PAGE(level2_ident_pgt)
/* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#endif
NEXT_PAGE(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
@@ -413,21 +468,6 @@ NEXT_PAGE(level3_kernel_pgt)
.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
NEXT_PAGE(level2_fixmap_pgt)
.fill 506,8,0
.quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
.fill 5,8,0
NEXT_PAGE(level1_fixmap_pgt)
.fill 512,8,0
NEXT_PAGE(level2_ident_pgt)
/* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
NEXT_PAGE(level2_kernel_pgt)
/*
* 512 MB kernel mapping. We spend a full page on this pagetable
@@ -442,11 +482,16 @@ NEXT_PAGE(level2_kernel_pgt)
PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_spare_pgt)
.fill 512, 8, 0
NEXT_PAGE(level2_fixmap_pgt)
.fill 506,8,0
.quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
.fill 5,8,0
NEXT_PAGE(level1_fixmap_pgt)
.fill 512,8,0
#undef PMDS
#undef NEXT_PAGE
.data
.align 16
@@ -472,6 +517,5 @@ ENTRY(nmi_idt_table)
.skip IDT_ENTRIES * 16
__PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE

View File

@@ -26,6 +26,7 @@ EXPORT_SYMBOL(csum_partial_copy_generic);
EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
EXPORT_SYMBOL(__get_user_8);
EXPORT_SYMBOL(__put_user_1);
EXPORT_SYMBOL(__put_user_2);

View File

@@ -297,9 +297,9 @@ static void kvm_register_steal_time(void)
memset(st, 0, sizeof(*st));
wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED));
printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n",
cpu, __pa(st));
wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
pr_info("kvm-stealtime: cpu %d, msr %llx\n",
cpu, (unsigned long long) slow_virt_to_phys(st));
}
static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
@@ -324,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void)
return;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa = __pa(&__get_cpu_var(apf_reason));
u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason));
#ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
@@ -340,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void)
/* Size alignment is implied but just to make it explicit. */
BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
__get_cpu_var(kvm_apic_eoi) = 0;
pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED;
pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi))
| KVM_MSR_ENABLED;
wrmsrl(MSR_KVM_PV_EOI_EN, pa);
}

View File

@@ -162,8 +162,8 @@ int kvm_register_clock(char *txt)
int low, high, ret;
struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
low = (int)__pa(src) | 1;
high = ((u64)__pa(src) >> 32);
low = (int)slow_virt_to_phys(src) | 1;
high = ((u64)slow_virt_to_phys(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt);

View File

@@ -16,125 +16,12 @@
#include <linux/io.h>
#include <linux/suspend.h>
#include <asm/init.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/debugreg.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
{
pud_t *pud;
pmd_t *pmd;
struct page *page;
int result = -ENOMEM;
addr &= PMD_MASK;
pgd += pgd_index(addr);
if (!pgd_present(*pgd)) {
page = kimage_alloc_control_pages(image, 0);
if (!page)
goto out;
pud = (pud_t *)page_address(page);
clear_page(pud);
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
}
pud = pud_offset(pgd, addr);
if (!pud_present(*pud)) {
page = kimage_alloc_control_pages(image, 0);
if (!page)
goto out;
pmd = (pmd_t *)page_address(page);
clear_page(pmd);
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
}
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd))
set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
result = 0;
out:
return result;
}
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
unsigned long end_addr;
addr &= PAGE_MASK;
end_addr = addr + PUD_SIZE;
while (addr < end_addr) {
set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
addr += PMD_SIZE;
}
}
static int init_level3_page(struct kimage *image, pud_t *level3p,
unsigned long addr, unsigned long last_addr)
{
unsigned long end_addr;
int result;
result = 0;
addr &= PAGE_MASK;
end_addr = addr + PGDIR_SIZE;
while ((addr < last_addr) && (addr < end_addr)) {
struct page *page;
pmd_t *level2p;
page = kimage_alloc_control_pages(image, 0);
if (!page) {
result = -ENOMEM;
goto out;
}
level2p = (pmd_t *)page_address(page);
init_level2_page(level2p, addr);
set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
addr += PUD_SIZE;
}
/* clear the unused entries */
while (addr < end_addr) {
pud_clear(level3p++);
addr += PUD_SIZE;
}
out:
return result;
}
static int init_level4_page(struct kimage *image, pgd_t *level4p,
unsigned long addr, unsigned long last_addr)
{
unsigned long end_addr;
int result;
result = 0;
addr &= PAGE_MASK;
end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
while ((addr < last_addr) && (addr < end_addr)) {
struct page *page;
pud_t *level3p;
page = kimage_alloc_control_pages(image, 0);
if (!page) {
result = -ENOMEM;
goto out;
}
level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr);
if (result)
goto out;
set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
addr += PGDIR_SIZE;
}
/* clear the unused entries */
while (addr < end_addr) {
pgd_clear(level4p++);
addr += PGDIR_SIZE;
}
out:
return result;
}
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.pud);
@@ -184,22 +71,62 @@ err:
return result;
}
static void *alloc_pgt_page(void *data)
{
struct kimage *image = (struct kimage *)data;
struct page *page;
void *p = NULL;
page = kimage_alloc_control_pages(image, 0);
if (page) {
p = page_address(page);
clear_page(p);
}
return p;
}
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
struct x86_mapping_info info = {
.alloc_pgt_page = alloc_pgt_page,
.context = image,
.pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
};
unsigned long mstart, mend;
pgd_t *level4p;
int result;
int i;
level4p = (pgd_t *)__va(start_pgtable);
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
if (result)
return result;
clear_page(level4p);
for (i = 0; i < nr_pfn_mapped; i++) {
mstart = pfn_mapped[i].start << PAGE_SHIFT;
mend = pfn_mapped[i].end << PAGE_SHIFT;
result = kernel_ident_mapping_init(&info,
level4p, mstart, mend);
if (result)
return result;
}
/*
* image->start may be outside 0 ~ max_pfn, for example when
* jump back to original kernel from kexeced kernel
* segments's mem ranges could be outside 0 ~ max_pfn,
* for example when jump back to original kernel from kexeced kernel.
* or first kernel is booted with user mem map, and second kernel
* could be loaded out of that range.
*/
result = init_one_level2_page(image, level4p, image->start);
if (result)
return result;
for (i = 0; i < image->nr_segments; i++) {
mstart = image->segment[i].mem;
mend = mstart + image->segment[i].memsz;
result = kernel_ident_mapping_init(&info,
level4p, mstart, mend);
if (result)
return result;
}
return init_transition_pgtable(image, level4p);
}

View File

@@ -108,17 +108,16 @@
#include <asm/topology.h>
#include <asm/apicdef.h>
#include <asm/amd_nb.h>
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
#include <asm/mce.h>
#include <asm/alternative.h>
#include <asm/prom.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
* The direct mapping extends to max_pfn_mapped, so that we can directly access
* apertures, ACPI and other tables without having to play with fixmaps.
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
* max_pfn_mapped: highest direct mapped pfn over 4GB
*
* The direct mapping only covers E820_RAM regions, so the ranges and gaps are
* represented by pfn_mapped
*/
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
@@ -276,18 +275,7 @@ void * __init extend_brk(size_t size, size_t align)
return ret;
}
#ifdef CONFIG_X86_64
static void __init init_gbpages(void)
{
if (direct_gbpages && cpu_has_gbpages)
printk(KERN_INFO "Using GB pages for direct mapping\n");
else
direct_gbpages = 0;
}
#else
static inline void init_gbpages(void)
{
}
#ifdef CONFIG_X86_32
static void __init cleanup_highmap(void)
{
}
@@ -296,8 +284,8 @@ static void __init cleanup_highmap(void)
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
memblock_reserve(__pa(_brk_start),
__pa(_brk_end) - __pa(_brk_start));
memblock_reserve(__pa_symbol(_brk_start),
_brk_end - _brk_start);
/* Mark brk area as locked down and no longer taking any
new allocations */
@@ -306,27 +294,43 @@ static void __init reserve_brk(void)
#ifdef CONFIG_BLK_DEV_INITRD
static u64 __init get_ramdisk_image(void)
{
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
return ramdisk_image;
}
static u64 __init get_ramdisk_size(void)
{
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
return ramdisk_size;
}
#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
static void __init relocate_initrd(void)
{
/* Assume only end is not page aligned */
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_image = get_ramdisk_image();
u64 ramdisk_size = get_ramdisk_size();
u64 area_size = PAGE_ALIGN(ramdisk_size);
u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
u64 ramdisk_here;
unsigned long slop, clen, mapaddr;
char *p, *q;
/* We need to move the initrd down into lowmem */
ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
PAGE_SIZE);
/* We need to move the initrd down into directly mapped mem */
ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
area_size, PAGE_SIZE);
if (!ramdisk_here)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
/* Note: this includes all the lowmem currently occupied by
/* Note: this includes all the mem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
memblock_reserve(ramdisk_here, area_size);
initrd_start = ramdisk_here + PAGE_OFFSET;
@@ -336,17 +340,7 @@ static void __init relocate_initrd(void)
q = (char *)initrd_start;
/* Copy any lowmem portion of the initrd */
if (ramdisk_image < end_of_lowmem) {
clen = end_of_lowmem - ramdisk_image;
p = (char *)__va(ramdisk_image);
memcpy(q, p, clen);
q += clen;
ramdisk_image += clen;
ramdisk_size -= clen;
}
/* Copy the highmem portion of the initrd */
/* Copy the initrd */
while (ramdisk_size) {
slop = ramdisk_image & ~PAGE_MASK;
clen = ramdisk_size;
@@ -360,22 +354,35 @@ static void __init relocate_initrd(void)
ramdisk_image += clen;
ramdisk_size -= clen;
}
/* high pages is not converted by early_res_to_bootmem */
ramdisk_image = boot_params.hdr.ramdisk_image;
ramdisk_size = boot_params.hdr.ramdisk_size;
ramdisk_image = get_ramdisk_image();
ramdisk_size = get_ramdisk_size();
printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
" [mem %#010llx-%#010llx]\n",
ramdisk_image, ramdisk_image + ramdisk_size - 1,
ramdisk_here, ramdisk_here + ramdisk_size - 1);
}
static void __init early_reserve_initrd(void)
{
/* Assume only end is not page aligned */
u64 ramdisk_image = get_ramdisk_image();
u64 ramdisk_size = get_ramdisk_size();
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
}
static void __init reserve_initrd(void)
{
/* Assume only end is not page aligned */
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_image = get_ramdisk_image();
u64 ramdisk_size = get_ramdisk_size();
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
u64 mapped_size;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -383,22 +390,18 @@ static void __init reserve_initrd(void)
initrd_start = 0;
if (ramdisk_size >= (end_of_lowmem>>1)) {
mapped_size = memblock_mem_size(max_pfn_mapped);
if (ramdisk_size >= (mapped_size>>1))
panic("initrd too large to handle, "
"disabling initrd (%lld needed, %lld available)\n",
ramdisk_size, end_of_lowmem>>1);
}
ramdisk_size, mapped_size>>1);
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
ramdisk_end - 1);
if (ramdisk_end <= end_of_lowmem) {
/* All in lowmem, easy case */
/*
* don't need to reserve again, already reserved early
* in i386_start_kernel
*/
if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image),
PFN_DOWN(ramdisk_end))) {
/* All are mapped, easy case */
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
return;
@@ -409,6 +412,9 @@ static void __init reserve_initrd(void)
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
}
#else
static void __init early_reserve_initrd(void)
{
}
static void __init reserve_initrd(void)
{
}
@@ -419,8 +425,6 @@ static void __init parse_setup_data(void)
struct setup_data *data;
u64 pa_data;
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
u32 data_len, map_len;
@@ -456,8 +460,6 @@ static void __init e820_reserve_setup_data(void)
u64 pa_data;
int found = 0;
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
@@ -481,8 +483,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
struct setup_data *data;
u64 pa_data;
if (boot_params.hdr.version < 0x0209)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
@@ -501,17 +501,51 @@ static void __init memblock_x86_reserve_range_setup_data(void)
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
* On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
* limit once kexec-tools are fixed.
*/
#ifdef CONFIG_X86_32
# define CRASH_KERNEL_ADDR_MAX (512 << 20)
#else
# define CRASH_KERNEL_ADDR_MAX (896 << 20)
# define CRASH_KERNEL_ADDR_MAX MAXMEM
#endif
static void __init reserve_crashkernel_low(void)
{
#ifdef CONFIG_X86_64
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long low_base = 0, low_size = 0;
unsigned long total_low_mem;
unsigned long long base;
int ret;
total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
ret = parse_crashkernel_low(boot_command_line, total_low_mem,
&low_size, &base);
if (ret != 0 || low_size <= 0)
return;
low_base = memblock_find_in_range(low_size, (1ULL<<32),
low_size, alignment);
if (!low_base) {
pr_info("crashkernel low reservation failed - No suitable area found.\n");
return;
}
memblock_reserve(low_base, low_size);
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
(unsigned long)(low_size >> 20),
(unsigned long)(low_base >> 20),
(unsigned long)(total_low_mem >> 20));
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
insert_resource(&iomem_resource, &crashk_low_res);
#endif
}
static void __init reserve_crashkernel(void)
{
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long total_mem;
unsigned long long crash_size, crash_base;
int ret;
@@ -525,8 +559,6 @@ static void __init reserve_crashkernel(void)
/* 0 means: find the address automatically */
if (crash_base <= 0) {
const unsigned long long alignment = 16<<20; /* 16M */
/*
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
*/
@@ -537,6 +569,7 @@ static void __init reserve_crashkernel(void)
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
} else {
unsigned long long start;
@@ -558,6 +591,9 @@ static void __init reserve_crashkernel(void)
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
insert_resource(&iomem_resource, &crashk_res);
if (crash_base >= (1ULL<<32))
reserve_crashkernel_low();
}
#else
static void __init reserve_crashkernel(void)
@@ -608,8 +644,6 @@ static __init void reserve_ibft_region(void)
memblock_reserve(addr, size);
}
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
static bool __init snb_gfx_workaround_needed(void)
{
#ifdef CONFIG_PCI
@@ -698,8 +732,7 @@ static void __init trim_bios_range(void)
* since some BIOSes are known to corrupt low memory. See the
* Kconfig help text for X86_RESERVE_LOW.
*/
e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
E820_RAM, E820_RESERVED);
e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
/*
* special case: Some BIOSen report the PC BIOS
@@ -711,6 +744,29 @@ static void __init trim_bios_range(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
/* called before trim_bios_range() to spare extra sanitize */
static void __init e820_add_kernel_range(void)
{
u64 start = __pa_symbol(_text);
u64 size = __pa_symbol(_end) - start;
/*
* Complain if .text .data and .bss are not marked as E820_RAM and
* attempt to fix it by adding the range. We may have a confused BIOS,
* or the user may have used memmap=exactmap or memmap=xxM$yyM to
* exclude kernel range. If we really are running on top non-RAM,
* we will crash later anyways.
*/
if (e820_all_mapped(start, start + size, E820_RAM))
return;
pr_warn(".text .data .bss are not marked as E820_RAM!\n");
e820_remove_range(start, size, E820_RAM, 0);
e820_add_region(start, size, E820_RAM);
}
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
static int __init parse_reservelow(char *p)
{
unsigned long long size;
@@ -733,6 +789,11 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
static void __init trim_low_memory_range(void)
{
memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
}
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -748,6 +809,17 @@ early_param("reservelow", parse_reservelow);
void __init setup_arch(char **cmdline_p)
{
memblock_reserve(__pa_symbol(_text),
(unsigned long)__bss_stop - (unsigned long)_text);
early_reserve_initrd();
/*
* At this point everything still needed from the boot loader
* or BIOS or kernel text should be early reserved or marked not
* RAM in e820. All other memory is free game.
*/
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
@@ -835,12 +907,12 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = _brk_end;
code_resource.start = virt_to_phys(_text);
code_resource.end = virt_to_phys(_etext)-1;
data_resource.start = virt_to_phys(_etext);
data_resource.end = virt_to_phys(_edata)-1;
bss_resource.start = virt_to_phys(&__bss_start);
bss_resource.end = virt_to_phys(&__bss_stop)-1;
code_resource.start = __pa_symbol(_text);
code_resource.end = __pa_symbol(_etext)-1;
data_resource.start = __pa_symbol(_etext);
data_resource.end = __pa_symbol(_edata)-1;
bss_resource.start = __pa_symbol(__bss_start);
bss_resource.end = __pa_symbol(__bss_stop)-1;
#ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE
@@ -906,6 +978,7 @@ void __init setup_arch(char **cmdline_p)
insert_resource(&iomem_resource, &data_resource);
insert_resource(&iomem_resource, &bss_resource);
e820_add_kernel_range();
trim_bios_range();
#ifdef CONFIG_X86_32
if (ppro_with_ram_bug()) {
@@ -955,6 +1028,8 @@ void __init setup_arch(char **cmdline_p)
reserve_ibft_region();
early_alloc_pgt_buf();
/*
* Need to conclude brk, before memblock_x86_fill()
* it could use memblock_find_in_range, could overlap with
@@ -964,7 +1039,7 @@ void __init setup_arch(char **cmdline_p)
cleanup_highmap();
memblock.current_limit = get_max_mapped();
memblock.current_limit = ISA_END_ADDRESS;
memblock_x86_fill();
/*
@@ -981,41 +1056,22 @@ void __init setup_arch(char **cmdline_p)
setup_bios_corruption_check();
#endif
#ifdef CONFIG_X86_32
printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
(max_pfn_mapped<<PAGE_SHIFT) - 1);
#endif
reserve_real_mode();
trim_platform_memory_ranges();
trim_low_memory_range();
init_mem_mapping();
early_trap_pf_init();
setup_real_mode();
trim_platform_memory_ranges();
init_gbpages();
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
int i;
unsigned long start, end;
unsigned long start_pfn, end_pfn;
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
NULL) {
end = PFN_PHYS(end_pfn);
if (end <= (1UL<<32))
continue;
start = PFN_PHYS(start_pfn);
max_pfn_mapped = init_memory_mapping(
max((1UL<<32), start), end);
}
/* can we preseve max_low_pfn ?*/
max_low_pfn = max_pfn;
}
#endif
memblock.current_limit = get_max_mapped();
dma_contiguous_reserve(0);

View File

@@ -688,10 +688,19 @@ void __init early_trap_init(void)
set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
/* int3 can be called from all */
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
#ifdef CONFIG_X86_32
set_intr_gate(X86_TRAP_PF, &page_fault);
#endif
load_idt(&idt_descr);
}
void __init early_trap_pf_init(void)
{
#ifdef CONFIG_X86_64
set_intr_gate(X86_TRAP_PF, &page_fault);
#endif
}
void __init trap_init(void)
{
int i;

View File

@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(memmove);
#ifndef CONFIG_DEBUG_VIRTUAL
EXPORT_SYMBOL(phys_base);
#endif
EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
EXPORT_SYMBOL(native_load_gs_index);

View File

@@ -63,10 +63,6 @@ struct x86_init_ops x86_init __initdata = {
.banner = default_banner,
},
.mapping = {
.pagetable_reserve = native_pagetable_reserve,
},
.paging = {
.pagetable_init = native_pagetable_init,
},