Merge tag 'efi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi into x86/efi

Pull EFI virtual mapping changes from Matt Fleming:

  * New static EFI runtime services virtual mapping layout which is
    groundwork for kexec support on EFI. (Borislav Petkov)

Signed-off-by: Ingo Molnar <mingo@kernel.org>
这个提交包含在:
Ingo Molnar
2013-11-26 12:23:04 +01:00
当前提交 61d0669775
修改 10 个文件,包含 755 行新增70 行删除

查看文件

@@ -30,6 +30,7 @@
*/
struct cpa_data {
unsigned long *vaddr;
pgd_t *pgd;
pgprot_t mask_set;
pgprot_t mask_clr;
int numpages;
@@ -322,17 +323,9 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
return prot;
}
/*
* Lookup the page table entry for a virtual address. Return a pointer
* to the entry and the level of the mapping.
*
* Note: We return pud and pmd either when the entry is marked large
* or when the present bit is not set. Otherwise we would return a
* pointer to a nonexisting mapping.
*/
pte_t *lookup_address(unsigned long address, unsigned int *level)
static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level)
{
pgd_t *pgd = pgd_offset_k(address);
pud_t *pud;
pmd_t *pmd;
@@ -361,8 +354,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
return pte_offset_kernel(pmd, address);
}
/*
* Lookup the page table entry for a virtual address. Return a pointer
* to the entry and the level of the mapping.
*
* Note: We return pud and pmd either when the entry is marked large
* or when the present bit is not set. Otherwise we would return a
* pointer to a nonexisting mapping.
*/
pte_t *lookup_address(unsigned long address, unsigned int *level)
{
return __lookup_address_in_pgd(pgd_offset_k(address), address, level);
}
EXPORT_SYMBOL_GPL(lookup_address);
static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
unsigned int *level)
{
if (cpa->pgd)
return __lookup_address_in_pgd(cpa->pgd + pgd_index(address),
address, level);
return lookup_address(address, level);
}
/*
* This is necessary because __pa() does not work on some
* kinds of memory, like vmalloc() or the alloc_remap()
@@ -437,7 +453,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* Check for races, another CPU might have split this page
* up already:
*/
tmp = lookup_address(address, &level);
tmp = _lookup_address_cpa(cpa, address, &level);
if (tmp != kpte)
goto out_unlock;
@@ -543,7 +559,8 @@ out_unlock:
}
static int
__split_large_page(pte_t *kpte, unsigned long address, struct page *base)
__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
struct page *base)
{
pte_t *pbase = (pte_t *)page_address(base);
unsigned long pfn, pfninc = 1;
@@ -556,7 +573,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
* Check for races, another CPU might have split this page
* up for us already:
*/
tmp = lookup_address(address, &level);
tmp = _lookup_address_cpa(cpa, address, &level);
if (tmp != kpte) {
spin_unlock(&pgd_lock);
return 1;
@@ -632,7 +649,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base)
return 0;
}
static int split_large_page(pte_t *kpte, unsigned long address)
static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
unsigned long address)
{
struct page *base;
@@ -644,15 +662,390 @@ static int split_large_page(pte_t *kpte, unsigned long address)
if (!base)
return -ENOMEM;
if (__split_large_page(kpte, address, base))
if (__split_large_page(cpa, kpte, address, base))
__free_page(base);
return 0;
}
static bool try_to_free_pte_page(pte_t *pte)
{
int i;
for (i = 0; i < PTRS_PER_PTE; i++)
if (!pte_none(pte[i]))
return false;
free_page((unsigned long)pte);
return true;
}
static bool try_to_free_pmd_page(pmd_t *pmd)
{
int i;
for (i = 0; i < PTRS_PER_PMD; i++)
if (!pmd_none(pmd[i]))
return false;
free_page((unsigned long)pmd);
return true;
}
static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
{
pte_t *pte = pte_offset_kernel(pmd, start);
while (start < end) {
set_pte(pte, __pte(0));
start += PAGE_SIZE;
pte++;
}
if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
pmd_clear(pmd);
return true;
}
return false;
}
static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
unsigned long start, unsigned long end)
{
if (unmap_pte_range(pmd, start, end))
if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
pud_clear(pud);
}
static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
{
pmd_t *pmd = pmd_offset(pud, start);
/*
* Not on a 2MB page boundary?
*/
if (start & (PMD_SIZE - 1)) {
unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
unsigned long pre_end = min_t(unsigned long, end, next_page);
__unmap_pmd_range(pud, pmd, start, pre_end);
start = pre_end;
pmd++;
}
/*
* Try to unmap in 2M chunks.
*/
while (end - start >= PMD_SIZE) {
if (pmd_large(*pmd))
pmd_clear(pmd);
else
__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
start += PMD_SIZE;
pmd++;
}
/*
* 4K leftovers?
*/
if (start < end)
return __unmap_pmd_range(pud, pmd, start, end);
/*
* Try again to free the PMD page if haven't succeeded above.
*/
if (!pud_none(*pud))
if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
pud_clear(pud);
}
static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
{
pud_t *pud = pud_offset(pgd, start);
/*
* Not on a GB page boundary?
*/
if (start & (PUD_SIZE - 1)) {
unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
unsigned long pre_end = min_t(unsigned long, end, next_page);
unmap_pmd_range(pud, start, pre_end);
start = pre_end;
pud++;
}
/*
* Try to unmap in 1G chunks?
*/
while (end - start >= PUD_SIZE) {
if (pud_large(*pud))
pud_clear(pud);
else
unmap_pmd_range(pud, start, start + PUD_SIZE);
start += PUD_SIZE;
pud++;
}
/*
* 2M leftovers?
*/
if (start < end)
unmap_pmd_range(pud, start, end);
/*
* No need to try to free the PUD page because we'll free it in
* populate_pgd's error path
*/
}
static int alloc_pte_page(pmd_t *pmd)
{
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
if (!pte)
return -1;
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
return 0;
}
static int alloc_pmd_page(pud_t *pud)
{
pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
if (!pmd)
return -1;
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
return 0;
}
static void populate_pte(struct cpa_data *cpa,
unsigned long start, unsigned long end,
unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
{
pte_t *pte;
pte = pte_offset_kernel(pmd, start);
while (num_pages-- && start < end) {
/* deal with the NX bit */
if (!(pgprot_val(pgprot) & _PAGE_NX))
cpa->pfn &= ~_PAGE_NX;
set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));
start += PAGE_SIZE;
cpa->pfn += PAGE_SIZE;
pte++;
}
}
static int populate_pmd(struct cpa_data *cpa,
unsigned long start, unsigned long end,
unsigned num_pages, pud_t *pud, pgprot_t pgprot)
{
unsigned int cur_pages = 0;
pmd_t *pmd;
/*
* Not on a 2M boundary?
*/
if (start & (PMD_SIZE - 1)) {
unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
pre_end = min_t(unsigned long, pre_end, next_page);
cur_pages = (pre_end - start) >> PAGE_SHIFT;
cur_pages = min_t(unsigned int, num_pages, cur_pages);
/*
* Need a PTE page?
*/
pmd = pmd_offset(pud, start);
if (pmd_none(*pmd))
if (alloc_pte_page(pmd))
return -1;
populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
start = pre_end;
}
/*
* We mapped them all?
*/
if (num_pages == cur_pages)
return cur_pages;
while (end - start >= PMD_SIZE) {
/*
* We cannot use a 1G page so allocate a PMD page if needed.
*/
if (pud_none(*pud))
if (alloc_pmd_page(pud))
return -1;
pmd = pmd_offset(pud, start);
set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
start += PMD_SIZE;
cpa->pfn += PMD_SIZE;
cur_pages += PMD_SIZE >> PAGE_SHIFT;
}
/*
* Map trailing 4K pages.
*/
if (start < end) {
pmd = pmd_offset(pud, start);
if (pmd_none(*pmd))
if (alloc_pte_page(pmd))
return -1;
populate_pte(cpa, start, end, num_pages - cur_pages,
pmd, pgprot);
}
return num_pages;
}
static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
pgprot_t pgprot)
{
pud_t *pud;
unsigned long end;
int cur_pages = 0;
end = start + (cpa->numpages << PAGE_SHIFT);
/*
* Not on a Gb page boundary? => map everything up to it with
* smaller pages.
*/
if (start & (PUD_SIZE - 1)) {
unsigned long pre_end;
unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
pre_end = min_t(unsigned long, end, next_page);
cur_pages = (pre_end - start) >> PAGE_SHIFT;
cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
pud = pud_offset(pgd, start);
/*
* Need a PMD page?
*/
if (pud_none(*pud))
if (alloc_pmd_page(pud))
return -1;
cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
pud, pgprot);
if (cur_pages < 0)
return cur_pages;
start = pre_end;
}
/* We mapped them all? */
if (cpa->numpages == cur_pages)
return cur_pages;
pud = pud_offset(pgd, start);
/*
* Map everything starting from the Gb boundary, possibly with 1G pages
*/
while (end - start >= PUD_SIZE) {
set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot)));
start += PUD_SIZE;
cpa->pfn += PUD_SIZE;
cur_pages += PUD_SIZE >> PAGE_SHIFT;
pud++;
}
/* Map trailing leftover */
if (start < end) {
int tmp;
pud = pud_offset(pgd, start);
if (pud_none(*pud))
if (alloc_pmd_page(pud))
return -1;
tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
pud, pgprot);
if (tmp < 0)
return cur_pages;
cur_pages += tmp;
}
return cur_pages;
}
/*
* Restrictions for kernel page table do not necessarily apply when mapping in
* an alternate PGD.
*/
static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
{
pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
bool allocd_pgd = false;
pgd_t *pgd_entry;
pud_t *pud = NULL; /* shut up gcc */
int ret;
pgd_entry = cpa->pgd + pgd_index(addr);
/*
* Allocate a PUD page and hand it down for mapping.
*/
if (pgd_none(*pgd_entry)) {
pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
if (!pud)
return -1;
set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
allocd_pgd = true;
}
pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(pgprot) |= pgprot_val(cpa->mask_set);
ret = populate_pud(cpa, addr, pgd_entry, pgprot);
if (ret < 0) {
unmap_pud_range(pgd_entry, addr,
addr + (cpa->numpages << PAGE_SHIFT));
if (allocd_pgd) {
/*
* If I allocated this PUD page, I can just as well
* free it in this error path.
*/
pgd_clear(pgd_entry);
free_page((unsigned long)pud);
}
return ret;
}
cpa->numpages = ret;
return 0;
}
static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
int primary)
{
if (cpa->pgd)
return populate_pgd(cpa, vaddr);
/*
* Ignore all non primary paths.
*/
@@ -697,7 +1090,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
else
address = *cpa->vaddr;
repeat:
kpte = lookup_address(address, &level);
kpte = _lookup_address_cpa(cpa, address, &level);
if (!kpte)
return __cpa_process_fault(cpa, address, primary);
@@ -761,7 +1154,7 @@ repeat:
/*
* We have to split the large page:
*/
err = split_large_page(kpte, address);
err = split_large_page(cpa, kpte, address);
if (!err) {
/*
* Do a global flush tlb after splitting the large page
@@ -910,6 +1303,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
int ret, cache, checkalias;
unsigned long baddr = 0;
memset(&cpa, 0, sizeof(cpa));
/*
* Check, if we are requested to change a not supported
* feature:
@@ -1356,6 +1751,7 @@ static int __set_pages_p(struct page *page, int numpages)
{
unsigned long tempaddr = (unsigned long) page_address(page);
struct cpa_data cpa = { .vaddr = &tempaddr,
.pgd = NULL,
.numpages = numpages,
.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
.mask_clr = __pgprot(0),
@@ -1374,6 +1770,7 @@ static int __set_pages_np(struct page *page, int numpages)
{
unsigned long tempaddr = (unsigned long) page_address(page);
struct cpa_data cpa = { .vaddr = &tempaddr,
.pgd = NULL,
.numpages = numpages,
.mask_set = __pgprot(0),
.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
@@ -1434,6 +1831,36 @@ bool kernel_page_present(struct page *page)
#endif /* CONFIG_DEBUG_PAGEALLOC */
int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags)
{
int retval = -EINVAL;
struct cpa_data cpa = {
.vaddr = &address,
.pfn = pfn,
.pgd = pgd,
.numpages = numpages,
.mask_set = __pgprot(0),
.mask_clr = __pgprot(0),
.flags = 0,
};
if (!(__supported_pte_mask & _PAGE_NX))
goto out;
if (!(page_flags & _PAGE_NX))
cpa.mask_clr = __pgprot(_PAGE_NX);
cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
retval = __change_page_attr_set_clr(&cpa, 0);
__flush_tlb_all();
out:
return retval;
}
/*
* The testcases use internal knowledge of the implementation that shouldn't
* be exposed to the rest of the kernel. Include these directly here.