x86/efi: Runtime services virtual mapping
We map the EFI regions needed for runtime services non-contiguously, with preserved alignment on virtual addresses starting from -4G down for a total max space of 64G. This way, we provide for stable runtime services addresses across kernels so that a kexec'd kernel can still use them. Thus, they're mapped in a separate pagetable so that we don't pollute the kernel namespace. Add an efi= kernel command line parameter for passing miscellaneous options and chicken bits from the command line. While at it, add a chicken bit called "efi=old_map" which can be used as a fallback to the old runtime services mapping method in case there's some b0rkage with a particular EFI implementation (haha, it is hard to hold up the sarcasm here...). Also, add the UEFI RT VA space to Documentation/x86/x86_64/mm.txt. Signed-off-by: Borislav Petkov <bp@suse.de> Signed-off-by: Matt Fleming <matt.fleming@intel.com>
This commit is contained in:

committed by
Matt Fleming

parent
82f0712ca0
commit
d2f7cbe7b2
@@ -12,6 +12,8 @@
|
||||
* Bibo Mao <bibo.mao@intel.com>
|
||||
* Chandramouli Narayanan <mouli@linux.intel.com>
|
||||
* Huang Ying <ying.huang@intel.com>
|
||||
* Copyright (C) 2013 SuSE Labs
|
||||
* Borislav Petkov <bp@suse.de> - runtime services VA mapping
|
||||
*
|
||||
* Copied from efi_32.c to eliminate the duplicated code between EFI
|
||||
* 32/64 support code. --ying 2007-10-26
|
||||
@@ -745,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size)
|
||||
set_memory_uc(addr, npages);
|
||||
}
|
||||
|
||||
void __init old_map_region(efi_memory_desc_t *md)
|
||||
{
|
||||
u64 start_pfn, end_pfn, end;
|
||||
unsigned long size;
|
||||
void *va;
|
||||
|
||||
start_pfn = PFN_DOWN(md->phys_addr);
|
||||
size = md->num_pages << PAGE_SHIFT;
|
||||
end = md->phys_addr + size;
|
||||
end_pfn = PFN_UP(end);
|
||||
|
||||
if (pfn_range_is_mapped(start_pfn, end_pfn)) {
|
||||
va = __va(md->phys_addr);
|
||||
|
||||
if (!(md->attribute & EFI_MEMORY_WB))
|
||||
efi_memory_uc((u64)(unsigned long)va, size);
|
||||
} else
|
||||
va = efi_ioremap(md->phys_addr, size,
|
||||
md->type, md->attribute);
|
||||
|
||||
md->virt_addr = (u64) (unsigned long) va;
|
||||
if (!va)
|
||||
pr_err("ioremap of 0x%llX failed!\n",
|
||||
(unsigned long long)md->phys_addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function will switch the EFI runtime services to virtual mode.
|
||||
* Essentially, look through the EFI memmap and map every region that
|
||||
* has the runtime attribute bit set in its memory descriptor and update
|
||||
* that memory descriptor with the virtual address obtained from ioremap().
|
||||
* This enables the runtime services to be called without having to
|
||||
* Essentially, we look through the EFI memmap and map every region that
|
||||
* has the runtime attribute bit set in its memory descriptor into the
|
||||
* ->trampoline_pgd page table using a top-down VA allocation scheme.
|
||||
*
|
||||
* The old method which used to update that memory descriptor with the
|
||||
* virtual address obtained from ioremap() is still supported when the
|
||||
* kernel is booted with efi=old_map on its command line. Same old
|
||||
* method enabled the runtime services to be called without having to
|
||||
* thunk back into physical mode for every invocation.
|
||||
*
|
||||
* The new method does a pagetable switch in a preemption-safe manner
|
||||
* so that we're in a different address space when calling a runtime
|
||||
* function. For function arguments passing we do copy the PGDs of the
|
||||
* kernel page table into ->trampoline_pgd prior to each call.
|
||||
*/
|
||||
void __init efi_enter_virtual_mode(void)
|
||||
{
|
||||
efi_memory_desc_t *md, *prev_md = NULL;
|
||||
efi_status_t status;
|
||||
void *p, *new_memmap = NULL;
|
||||
unsigned long size;
|
||||
u64 end, systab, start_pfn, end_pfn;
|
||||
void *p, *va, *new_memmap = NULL;
|
||||
efi_status_t status;
|
||||
u64 end, systab;
|
||||
int count = 0;
|
||||
|
||||
efi.systab = NULL;
|
||||
@@ -768,7 +805,6 @@ void __init efi_enter_virtual_mode(void)
|
||||
* We don't do virtual mode, since we don't do runtime services, on
|
||||
* non-native EFI
|
||||
*/
|
||||
|
||||
if (!efi_is_native()) {
|
||||
efi_unmap_memmap();
|
||||
return;
|
||||
@@ -799,6 +835,7 @@ void __init efi_enter_virtual_mode(void)
|
||||
continue;
|
||||
}
|
||||
prev_md = md;
|
||||
|
||||
}
|
||||
|
||||
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
|
||||
@@ -808,33 +845,18 @@ void __init efi_enter_virtual_mode(void)
|
||||
md->type != EFI_BOOT_SERVICES_DATA)
|
||||
continue;
|
||||
|
||||
efi_map_region(md);
|
||||
|
||||
size = md->num_pages << EFI_PAGE_SHIFT;
|
||||
end = md->phys_addr + size;
|
||||
|
||||
start_pfn = PFN_DOWN(md->phys_addr);
|
||||
end_pfn = PFN_UP(end);
|
||||
if (pfn_range_is_mapped(start_pfn, end_pfn)) {
|
||||
va = __va(md->phys_addr);
|
||||
|
||||
if (!(md->attribute & EFI_MEMORY_WB))
|
||||
efi_memory_uc((u64)(unsigned long)va, size);
|
||||
} else
|
||||
va = efi_ioremap(md->phys_addr, size,
|
||||
md->type, md->attribute);
|
||||
|
||||
md->virt_addr = (u64) (unsigned long) va;
|
||||
|
||||
if (!va) {
|
||||
pr_err("ioremap of 0x%llX failed!\n",
|
||||
(unsigned long long)md->phys_addr);
|
||||
continue;
|
||||
}
|
||||
|
||||
systab = (u64) (unsigned long) efi_phys.systab;
|
||||
if (md->phys_addr <= systab && systab < end) {
|
||||
systab += md->virt_addr - md->phys_addr;
|
||||
|
||||
efi.systab = (efi_system_table_t *) (unsigned long) systab;
|
||||
}
|
||||
|
||||
new_memmap = krealloc(new_memmap,
|
||||
(count + 1) * memmap.desc_size,
|
||||
GFP_KERNEL);
|
||||
@@ -845,6 +867,9 @@ void __init efi_enter_virtual_mode(void)
|
||||
|
||||
BUG_ON(!efi.systab);
|
||||
|
||||
efi_setup_page_tables();
|
||||
efi_sync_low_kernel_mappings();
|
||||
|
||||
status = phys_efi_set_virtual_address_map(
|
||||
memmap.desc_size * count,
|
||||
memmap.desc_size,
|
||||
@@ -877,7 +902,8 @@ void __init efi_enter_virtual_mode(void)
|
||||
efi.query_variable_info = virt_efi_query_variable_info;
|
||||
efi.update_capsule = virt_efi_update_capsule;
|
||||
efi.query_capsule_caps = virt_efi_query_capsule_caps;
|
||||
if (__supported_pte_mask & _PAGE_NX)
|
||||
|
||||
if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
|
||||
runtime_code_page_mkexec();
|
||||
|
||||
kfree(new_memmap);
|
||||
@@ -1007,3 +1033,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
|
||||
return EFI_SUCCESS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(efi_query_variable_store);
|
||||
|
||||
static int __init parse_efi_cmdline(char *str)
|
||||
{
|
||||
if (*str == '=')
|
||||
str++;
|
||||
|
||||
if (!strncmp(str, "old_map", 7))
|
||||
set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("efi", parse_efi_cmdline);
|
||||
|
Reference in New Issue
Block a user