Merge tag 'stable/for-linus-3.7-x86-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
Pull Xen update from Konrad Rzeszutek Wilk: "Features: - When hotplugging PCI devices in a PV guest we can allocate Xen-SWIOTLB later. - Cleanup Xen SWIOTLB. - Support pages out grants from HVM domains in the backends. - Support wild cards in xen-pciback.hide=(BDF) arguments. - Update grant status updates with upstream hypervisor. - Boot PV guests with more than 128GB. - Cleanup Xen MMU code/add comments. - Obtain XENVERS using a preferred method. - Lay out generic changes to support Xen ARM. - Allow privcmd ioctl for HVM (used to do only PV). - Do v2 of mmap_batch for privcmd ioctls. - If hypervisor saves the LED keyboard light - we will now instruct the kernel about its state. Fixes: - More fixes to Xen PCI backend for various calls/FLR/etc. - With more than 4GB in a 64-bit PV guest disable native SWIOTLB. - Fix up smatch warnings. - Fix up various return values in privmcmd and mm." * tag 'stable/for-linus-3.7-x86-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: (48 commits) xen/pciback: Restore the PCI config space after an FLR. xen-pciback: properly clean up after calling pcistub_device_find() xen/vga: add the xen EFI video mode support xen/x86: retrieve keyboard shift status flags from hypervisor. xen/gndev: Xen backend support for paged out grant targets V4. xen-pciback: support wild cards in slot specifications xen/swiotlb: Fix compile warnings when using plain integer instead of NULL pointer. xen/swiotlb: Remove functions not needed anymore. xen/pcifront: Use Xen-SWIOTLB when initting if required. xen/swiotlb: For early initialization, return zero on success. xen/swiotlb: Use the swiotlb_late_init_with_tbl to init Xen-SWIOTLB late when PV PCI is used. xen/swiotlb: Move the error strings to its own function. xen/swiotlb: Move the nr_tbl determination in its own function. xen/arm: compile and run xenbus xen: resynchronise grant table status codes with upstream xen/privcmd: return -EFAULT on error xen/privcmd: Fix mmap batch ioctl error status copy back. xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl xen/mm: return more precise error from xen_remap_domain_range() xen/mmu: If the revector fails, don't attempt to revector anything else. ...
This commit is contained in:
@@ -6,8 +6,9 @@
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/interface/physdev.h>
|
||||
#include "xen-ops.h"
|
||||
|
||||
unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
|
||||
static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
|
||||
{
|
||||
struct physdev_apic apic_op;
|
||||
int ret;
|
||||
|
@@ -80,6 +80,8 @@
|
||||
#include "smp.h"
|
||||
#include "multicalls.h"
|
||||
|
||||
#include <xen/events.h>
|
||||
|
||||
EXPORT_SYMBOL_GPL(hypercall_page);
|
||||
|
||||
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
|
||||
@@ -1288,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
{
|
||||
struct physdev_set_iopl set_iopl;
|
||||
int rc;
|
||||
pgd_t *pgd;
|
||||
|
||||
if (!xen_start_info)
|
||||
return;
|
||||
@@ -1380,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
acpi_numa = -1;
|
||||
#endif
|
||||
|
||||
pgd = (pgd_t *)xen_start_info->pt_base;
|
||||
|
||||
/* Don't do the full vcpu_info placement stuff until we have a
|
||||
possible map and a non-dummy shared_info. */
|
||||
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
|
||||
@@ -1390,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
early_boot_irqs_disabled = true;
|
||||
|
||||
xen_raw_console_write("mapping kernel into physical memory\n");
|
||||
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
|
||||
xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
|
||||
|
||||
/* Allocate and initialize top and mid mfn levels for p2m structure */
|
||||
xen_build_mfn_list_list();
|
||||
@@ -1441,11 +1440,19 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
const struct dom0_vga_console_info *info =
|
||||
(void *)((char *)xen_start_info +
|
||||
xen_start_info->console.dom0.info_off);
|
||||
struct xen_platform_op op = {
|
||||
.cmd = XENPF_firmware_info,
|
||||
.interface_version = XENPF_INTERFACE_VERSION,
|
||||
.u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
|
||||
};
|
||||
|
||||
xen_init_vga(info, xen_start_info->console.dom0.info_size);
|
||||
xen_start_info->console.domU.mfn = 0;
|
||||
xen_start_info->console.domU.evtchn = 0;
|
||||
|
||||
if (HYPERVISOR_dom0_op(&op) == 0)
|
||||
boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
|
||||
|
||||
xen_init_apic();
|
||||
|
||||
/* Make sure ACS will be enabled */
|
||||
|
@@ -84,6 +84,7 @@
|
||||
*/
|
||||
DEFINE_SPINLOCK(xen_reservation_lock);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Identity map, in addition to plain kernel map. This needs to be
|
||||
* large enough to allocate page table pages to allocate the rest.
|
||||
@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock);
|
||||
*/
|
||||
#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
|
||||
static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
|
||||
|
||||
#endif
|
||||
#ifdef CONFIG_X86_64
|
||||
/* l3 pud for userspace vsyscall mapping */
|
||||
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
|
||||
@@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm)
|
||||
|
||||
static void xen_post_allocator_init(void);
|
||||
|
||||
static void __init xen_pagetable_init(void)
|
||||
{
|
||||
paging_init();
|
||||
xen_setup_shared_info();
|
||||
xen_post_allocator_init();
|
||||
}
|
||||
|
||||
static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
|
||||
{
|
||||
/* reserve the range used */
|
||||
@@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void __init xen_cleanhighmap(unsigned long vaddr,
|
||||
unsigned long vaddr_end)
|
||||
{
|
||||
unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
|
||||
pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
|
||||
|
||||
/* NOTE: The loop is more greedy than the cleanup_highmap variant.
|
||||
* We include the PMD passed in on _both_ boundaries. */
|
||||
for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
|
||||
pmd++, vaddr += PMD_SIZE) {
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
if (vaddr < (unsigned long) _text || vaddr > kernel_end)
|
||||
set_pmd(pmd, __pmd(0));
|
||||
}
|
||||
/* In case we did something silly, we should crash in this function
|
||||
* instead of somewhere later and be confusing. */
|
||||
xen_mc_flush();
|
||||
}
|
||||
#endif
|
||||
static void __init xen_pagetable_init(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long size;
|
||||
unsigned long addr;
|
||||
#endif
|
||||
paging_init();
|
||||
xen_setup_shared_info();
|
||||
#ifdef CONFIG_X86_64
|
||||
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
||||
unsigned long new_mfn_list;
|
||||
|
||||
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
|
||||
|
||||
/* On 32-bit, we get zero so this never gets executed. */
|
||||
new_mfn_list = xen_revector_p2m_tree();
|
||||
if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
|
||||
/* using __ka address and sticking INVALID_P2M_ENTRY! */
|
||||
memset((void *)xen_start_info->mfn_list, 0xff, size);
|
||||
|
||||
/* We should be in __ka space. */
|
||||
BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
|
||||
addr = xen_start_info->mfn_list;
|
||||
/* We roundup to the PMD, which means that if anybody at this stage is
|
||||
* using the __ka address of xen_start_info or xen_start_info->shared_info
|
||||
* they are in going to crash. Fortunatly we have already revectored
|
||||
* in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
|
||||
size = roundup(size, PMD_SIZE);
|
||||
xen_cleanhighmap(addr, addr + size);
|
||||
|
||||
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
|
||||
memblock_free(__pa(xen_start_info->mfn_list), size);
|
||||
/* And revector! Bye bye old array */
|
||||
xen_start_info->mfn_list = new_mfn_list;
|
||||
} else
|
||||
goto skip;
|
||||
}
|
||||
/* At this stage, cleanup_highmap has already cleaned __ka space
|
||||
* from _brk_limit way up to the max_pfn_mapped (which is the end of
|
||||
* the ramdisk). We continue on, erasing PMD entries that point to page
|
||||
* tables - do note that they are accessible at this stage via __va.
|
||||
* For good measure we also round up to the PMD - which means that if
|
||||
* anybody is using __ka address to the initial boot-stack - and try
|
||||
* to use it - they are going to crash. The xen_start_info has been
|
||||
* taken care of already in xen_setup_kernel_pagetable. */
|
||||
addr = xen_start_info->pt_base;
|
||||
size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
|
||||
|
||||
xen_cleanhighmap(addr, addr + size);
|
||||
xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
|
||||
#ifdef DEBUG
|
||||
/* This is superflous and is not neccessary, but you know what
|
||||
* lets do it. The MODULES_VADDR -> MODULES_END should be clear of
|
||||
* anything at this stage. */
|
||||
xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
|
||||
#endif
|
||||
skip:
|
||||
#endif
|
||||
xen_post_allocator_init();
|
||||
}
|
||||
static void xen_write_cr2(unsigned long cr2)
|
||||
{
|
||||
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
|
||||
@@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
|
||||
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
|
||||
BUG();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
|
||||
{
|
||||
unsigned pmdidx, pteidx;
|
||||
@@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
|
||||
|
||||
set_page_prot(pmd, PAGE_KERNEL_RO);
|
||||
}
|
||||
|
||||
#endif
|
||||
void __init xen_setup_machphys_mapping(void)
|
||||
{
|
||||
struct xen_machphys_mapping mapping;
|
||||
@@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v)
|
||||
for (i = 0; i < PTRS_PER_PTE; i++)
|
||||
pte[i] = xen_make_pte(pte[i].pte);
|
||||
}
|
||||
|
||||
static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
|
||||
unsigned long addr)
|
||||
{
|
||||
if (*pt_base == PFN_DOWN(__pa(addr))) {
|
||||
set_page_prot((void *)addr, PAGE_KERNEL);
|
||||
clear_page((void *)addr);
|
||||
(*pt_base)++;
|
||||
}
|
||||
if (*pt_end == PFN_DOWN(__pa(addr))) {
|
||||
set_page_prot((void *)addr, PAGE_KERNEL);
|
||||
clear_page((void *)addr);
|
||||
(*pt_end)--;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Set up the initial kernel pagetable.
|
||||
*
|
||||
@@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v)
|
||||
* of the physical mapping once some sort of allocator has been set
|
||||
* up.
|
||||
*/
|
||||
pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
unsigned long max_pfn)
|
||||
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
||||
{
|
||||
pud_t *l3;
|
||||
pmd_t *l2;
|
||||
unsigned long addr[3];
|
||||
unsigned long pt_base, pt_end;
|
||||
unsigned i;
|
||||
|
||||
/* max_pfn_mapped is the last pfn mapped in the initial memory
|
||||
* mappings. Considering that on Xen after the kernel mappings we
|
||||
@@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
* set max_pfn_mapped to the last real pfn mapped. */
|
||||
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
|
||||
|
||||
pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
|
||||
pt_end = pt_base + xen_start_info->nr_pt_frames;
|
||||
|
||||
/* Zap identity mapping */
|
||||
init_level4_pgt[0] = __pgd(0);
|
||||
|
||||
/* Pre-constructed entries are in pfn, so convert to mfn */
|
||||
/* L4[272] -> level3_ident_pgt
|
||||
* L4[511] -> level3_kernel_pgt */
|
||||
convert_pfn_mfn(init_level4_pgt);
|
||||
|
||||
/* L3_i[0] -> level2_ident_pgt */
|
||||
convert_pfn_mfn(level3_ident_pgt);
|
||||
/* L3_k[510] -> level2_kernel_pgt
|
||||
* L3_i[511] -> level2_fixmap_pgt */
|
||||
convert_pfn_mfn(level3_kernel_pgt);
|
||||
|
||||
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
|
||||
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
|
||||
l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
|
||||
|
||||
memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
addr[0] = (unsigned long)pgd;
|
||||
addr[1] = (unsigned long)l3;
|
||||
addr[2] = (unsigned long)l2;
|
||||
/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
|
||||
* Both L4[272][0] and L4[511][511] have entries that point to the same
|
||||
* L2 (PMD) tables. Meaning that if you modify it in __va space
|
||||
* it will be also modified in the __ka space! (But if you just
|
||||
* modify the PMD table to point to other PTE's or none, then you
|
||||
* are OK - which is what cleanup_highmap does) */
|
||||
copy_page(level2_ident_pgt, l2);
|
||||
/* Graft it onto L4[511][511] */
|
||||
copy_page(level2_kernel_pgt, l2);
|
||||
|
||||
/* Get [511][510] and graft that in level2_fixmap_pgt */
|
||||
l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
|
||||
l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
|
||||
memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
|
||||
/* Set up identity map */
|
||||
xen_map_identity_early(level2_ident_pgt, max_pfn);
|
||||
copy_page(level2_fixmap_pgt, l2);
|
||||
/* Note that we don't do anything with level1_fixmap_pgt which
|
||||
* we don't need. */
|
||||
|
||||
/* Make pagetable pieces RO */
|
||||
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
|
||||
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
|
||||
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
|
||||
|
||||
@@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
/* Unpin Xen-provided one */
|
||||
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
|
||||
|
||||
/* Switch over */
|
||||
pgd = init_level4_pgt;
|
||||
|
||||
/*
|
||||
* At this stage there can be no user pgd, and no page
|
||||
* structure to attach it to, so make sure we just set kernel
|
||||
* pgd.
|
||||
*/
|
||||
xen_mc_batch();
|
||||
__xen_write_cr3(true, __pa(pgd));
|
||||
__xen_write_cr3(true, __pa(init_level4_pgt));
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
|
||||
memblock_reserve(__pa(xen_start_info->pt_base),
|
||||
xen_start_info->nr_pt_frames * PAGE_SIZE);
|
||||
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
|
||||
* set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
|
||||
* the initial domain. For guests using the toolstack, they are in:
|
||||
* [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
|
||||
* rip out the [L4] (pgd), but for guests we shave off three pages.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(addr); i++)
|
||||
check_pt_base(&pt_base, &pt_end, addr[i]);
|
||||
|
||||
return pgd;
|
||||
/* Our (by three pages) smaller Xen pagetable that we are using */
|
||||
memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
|
||||
/* Revector the xen_start_info */
|
||||
xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
|
||||
}
|
||||
#else /* !CONFIG_X86_64 */
|
||||
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
|
||||
@@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
|
||||
*/
|
||||
swapper_kernel_pmd =
|
||||
extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
|
||||
memcpy(swapper_kernel_pmd, initial_kernel_pmd,
|
||||
sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
copy_page(swapper_kernel_pmd, initial_kernel_pmd);
|
||||
swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
|
||||
__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
|
||||
set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
|
||||
@@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
|
||||
pv_mmu_ops.write_cr3 = &xen_write_cr3;
|
||||
}
|
||||
|
||||
pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
unsigned long max_pfn)
|
||||
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
|
||||
{
|
||||
pmd_t *kernel_pmd;
|
||||
|
||||
@@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
512*1024);
|
||||
|
||||
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
|
||||
memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
copy_page(initial_kernel_pmd, kernel_pmd);
|
||||
|
||||
xen_map_identity_early(initial_kernel_pmd, max_pfn);
|
||||
|
||||
memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
|
||||
copy_page(initial_page_table, pgd);
|
||||
initial_page_table[KERNEL_PGD_BOUNDARY] =
|
||||
__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
|
||||
|
||||
@@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
|
||||
|
||||
memblock_reserve(__pa(xen_start_info->pt_base),
|
||||
xen_start_info->nr_pt_frames * PAGE_SIZE);
|
||||
|
||||
return initial_page_table;
|
||||
}
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
@@ -2333,6 +2446,9 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
unsigned long range;
|
||||
int err = 0;
|
||||
|
||||
if (xen_feature(XENFEAT_auto_translated_physmap))
|
||||
return -EINVAL;
|
||||
|
||||
prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
|
||||
|
||||
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
|
||||
@@ -2351,8 +2467,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -EFAULT;
|
||||
if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
|
||||
err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
nr -= batch;
|
||||
|
@@ -22,7 +22,7 @@
|
||||
*
|
||||
* P2M_PER_PAGE depends on the architecture, as a mfn is always
|
||||
* unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
|
||||
* 512 and 1024 entries respectively.
|
||||
* 512 and 1024 entries respectively.
|
||||
*
|
||||
* In short, these structures contain the Machine Frame Number (MFN) of the PFN.
|
||||
*
|
||||
@@ -139,11 +139,11 @@
|
||||
* / | ~0, ~0, .... |
|
||||
* | \---------------/
|
||||
* |
|
||||
* p2m_missing p2m_missing
|
||||
* /------------------\ /------------\
|
||||
* | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
|
||||
* | [p2m_mid_missing]+---->| ..., ~0 |
|
||||
* \------------------/ \------------/
|
||||
* p2m_mid_missing p2m_missing
|
||||
* /-----------------\ /------------\
|
||||
* | [p2m_missing] +---->| ~0, ~0, ~0 |
|
||||
* | [p2m_missing] +---->| ..., ~0 |
|
||||
* \-----------------/ \------------/
|
||||
*
|
||||
* where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
|
||||
*/
|
||||
@@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void)
|
||||
|
||||
m2p_override_init();
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <linux/bootmem.h>
|
||||
unsigned long __init xen_revector_p2m_tree(void)
|
||||
{
|
||||
unsigned long va_start;
|
||||
unsigned long va_end;
|
||||
unsigned long pfn;
|
||||
unsigned long pfn_free = 0;
|
||||
unsigned long *mfn_list = NULL;
|
||||
unsigned long size;
|
||||
|
||||
va_start = xen_start_info->mfn_list;
|
||||
/*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
|
||||
* so make sure it is rounded up to that */
|
||||
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
|
||||
va_end = va_start + size;
|
||||
|
||||
/* If we were revectored already, don't do it again. */
|
||||
if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
|
||||
return 0;
|
||||
|
||||
mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
|
||||
if (!mfn_list) {
|
||||
pr_warn("Could not allocate space for a new P2M tree!\n");
|
||||
return xen_start_info->mfn_list;
|
||||
}
|
||||
/* Fill it out with INVALID_P2M_ENTRY value */
|
||||
memset(mfn_list, 0xFF, size);
|
||||
|
||||
for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
|
||||
unsigned topidx = p2m_top_index(pfn);
|
||||
unsigned mididx;
|
||||
unsigned long *mid_p;
|
||||
|
||||
if (!p2m_top[topidx])
|
||||
continue;
|
||||
|
||||
if (p2m_top[topidx] == p2m_mid_missing)
|
||||
continue;
|
||||
|
||||
mididx = p2m_mid_index(pfn);
|
||||
mid_p = p2m_top[topidx][mididx];
|
||||
if (!mid_p)
|
||||
continue;
|
||||
if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
|
||||
continue;
|
||||
|
||||
if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
|
||||
continue;
|
||||
|
||||
/* The old va. Rebase it on mfn_list */
|
||||
if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
|
||||
unsigned long *new;
|
||||
|
||||
if (pfn_free > (size / sizeof(unsigned long))) {
|
||||
WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
|
||||
size / sizeof(unsigned long), pfn_free);
|
||||
return 0;
|
||||
}
|
||||
new = &mfn_list[pfn_free];
|
||||
|
||||
copy_page(new, mid_p);
|
||||
p2m_top[topidx][mididx] = &mfn_list[pfn_free];
|
||||
p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
|
||||
|
||||
pfn_free += P2M_PER_PAGE;
|
||||
|
||||
}
|
||||
/* This should be the leafs allocated for identity from _brk. */
|
||||
}
|
||||
return (unsigned long)mfn_list;
|
||||
|
||||
}
|
||||
#else
|
||||
unsigned long __init xen_revector_p2m_tree(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
unsigned long get_phys_to_machine(unsigned long pfn)
|
||||
{
|
||||
unsigned topidx, mididx, idx;
|
||||
@@ -430,7 +508,7 @@ static void free_p2m_page(void *p)
|
||||
free_page((unsigned long)p);
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* Fully allocate the p2m structure for a given pfn. We need to check
|
||||
* that both the top and mid levels are allocated, and make sure the
|
||||
* parallel mfn tree is kept in sync. We may race with other cpus, so
|
||||
|
@@ -8,6 +8,14 @@
|
||||
#include <xen/xen.h>
|
||||
#include <asm/iommu_table.h>
|
||||
|
||||
|
||||
#include <asm/xen/swiotlb-xen.h>
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/iommu.h>
|
||||
#include <asm/dma.h>
|
||||
#endif
|
||||
#include <linux/export.h>
|
||||
|
||||
int xen_swiotlb __read_mostly;
|
||||
|
||||
static struct dma_map_ops xen_swiotlb_dma_ops = {
|
||||
@@ -34,34 +42,64 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
|
||||
int __init pci_xen_swiotlb_detect(void)
|
||||
{
|
||||
|
||||
if (!xen_pv_domain())
|
||||
return 0;
|
||||
|
||||
/* If running as PV guest, either iommu=soft, or swiotlb=force will
|
||||
* activate this IOMMU. If running as PV privileged, activate it
|
||||
* irregardless.
|
||||
*/
|
||||
if ((xen_initial_domain() || swiotlb || swiotlb_force) &&
|
||||
(xen_pv_domain()))
|
||||
if ((xen_initial_domain() || swiotlb || swiotlb_force))
|
||||
xen_swiotlb = 1;
|
||||
|
||||
/* If we are running under Xen, we MUST disable the native SWIOTLB.
|
||||
* Don't worry about swiotlb_force flag activating the native, as
|
||||
* the 'swiotlb' flag is the only one turning it on. */
|
||||
if (xen_pv_domain())
|
||||
swiotlb = 0;
|
||||
swiotlb = 0;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0
|
||||
* (so no iommu=X command line over-writes).
|
||||
* Considering that PV guests do not want the *native SWIOTLB* but
|
||||
* only Xen SWIOTLB it is not useful to us so set no_iommu=1 here.
|
||||
*/
|
||||
if (max_pfn > MAX_DMA32_PFN)
|
||||
no_iommu = 1;
|
||||
#endif
|
||||
return xen_swiotlb;
|
||||
}
|
||||
|
||||
void __init pci_xen_swiotlb_init(void)
|
||||
{
|
||||
if (xen_swiotlb) {
|
||||
xen_swiotlb_init(1);
|
||||
xen_swiotlb_init(1, true /* early */);
|
||||
dma_ops = &xen_swiotlb_dma_ops;
|
||||
|
||||
/* Make sure ACS will be enabled */
|
||||
pci_request_acs();
|
||||
}
|
||||
}
|
||||
|
||||
int pci_xen_swiotlb_init_late(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (xen_swiotlb)
|
||||
return 0;
|
||||
|
||||
rc = xen_swiotlb_init(1, false /* late */);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
dma_ops = &xen_swiotlb_dma_ops;
|
||||
/* Make sure ACS will be enabled */
|
||||
pci_request_acs();
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
|
||||
|
||||
IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
|
||||
0,
|
||||
NULL,
|
||||
pci_xen_swiotlb_init,
|
||||
0);
|
||||
NULL);
|
||||
|
@@ -24,6 +24,7 @@
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <xen/platform_pci.h>
|
||||
#include "xen-ops.h"
|
||||
|
||||
#define XEN_PLATFORM_ERR_MAGIC -1
|
||||
#define XEN_PLATFORM_ERR_PROTOCOL -2
|
||||
|
@@ -432,6 +432,24 @@ char * __init xen_memory_setup(void)
|
||||
* - mfn_list
|
||||
* - xen_start_info
|
||||
* See comment above "struct start_info" in <xen/interface/xen.h>
|
||||
* We tried to make the the memblock_reserve more selective so
|
||||
* that it would be clear what region is reserved. Sadly we ran
|
||||
* in the problem wherein on a 64-bit hypervisor with a 32-bit
|
||||
* initial domain, the pt_base has the cr3 value which is not
|
||||
* neccessarily where the pagetable starts! As Jan put it: "
|
||||
* Actually, the adjustment turns out to be correct: The page
|
||||
* tables for a 32-on-64 dom0 get allocated in the order "first L1",
|
||||
* "first L2", "first L3", so the offset to the page table base is
|
||||
* indeed 2. When reading xen/include/public/xen.h's comment
|
||||
* very strictly, this is not a violation (since there nothing is said
|
||||
* that the first thing in the page table space is pointed to by
|
||||
* pt_base; I admit that this seems to be implied though, namely
|
||||
* do I think that it is implied that the page table space is the
|
||||
* range [pt_base, pt_base + nt_pt_frames), whereas that
|
||||
* range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
|
||||
* which - without a priori knowledge - the kernel would have
|
||||
* difficulty to figure out)." - so lets just fall back to the
|
||||
* easy way and reserve the whole region.
|
||||
*/
|
||||
memblock_reserve(__pa(xen_start_info->mfn_list),
|
||||
xen_start_info->pt_base - xen_start_info->mfn_list);
|
||||
|
@@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
|
||||
info->u.text_mode_3.font_height;
|
||||
break;
|
||||
|
||||
case XEN_VGATYPE_EFI_LFB:
|
||||
case XEN_VGATYPE_VESA_LFB:
|
||||
if (size < offsetof(struct dom0_vga_console_info,
|
||||
u.vesa_lfb.gbl_caps))
|
||||
@@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
|
||||
screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
|
||||
screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
|
||||
screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
|
||||
|
||||
if (info->video_type == XEN_VGATYPE_EFI_LFB) {
|
||||
screen_info->orig_video_isVGA = VIDEO_TYPE_EFI;
|
||||
break;
|
||||
}
|
||||
|
||||
if (size >= offsetof(struct dom0_vga_console_info,
|
||||
u.vesa_lfb.gbl_caps)
|
||||
+ sizeof(info->u.vesa_lfb.gbl_caps))
|
||||
|
@@ -28,9 +28,61 @@ ENTRY(startup_xen)
|
||||
__FINIT
|
||||
|
||||
.pushsection .text
|
||||
.align PAGE_SIZE
|
||||
.balign PAGE_SIZE
|
||||
ENTRY(hypercall_page)
|
||||
.skip PAGE_SIZE
|
||||
#define NEXT_HYPERCALL(x) \
|
||||
ENTRY(xen_hypercall_##x) \
|
||||
.skip 32
|
||||
|
||||
NEXT_HYPERCALL(set_trap_table)
|
||||
NEXT_HYPERCALL(mmu_update)
|
||||
NEXT_HYPERCALL(set_gdt)
|
||||
NEXT_HYPERCALL(stack_switch)
|
||||
NEXT_HYPERCALL(set_callbacks)
|
||||
NEXT_HYPERCALL(fpu_taskswitch)
|
||||
NEXT_HYPERCALL(sched_op_compat)
|
||||
NEXT_HYPERCALL(platform_op)
|
||||
NEXT_HYPERCALL(set_debugreg)
|
||||
NEXT_HYPERCALL(get_debugreg)
|
||||
NEXT_HYPERCALL(update_descriptor)
|
||||
NEXT_HYPERCALL(ni)
|
||||
NEXT_HYPERCALL(memory_op)
|
||||
NEXT_HYPERCALL(multicall)
|
||||
NEXT_HYPERCALL(update_va_mapping)
|
||||
NEXT_HYPERCALL(set_timer_op)
|
||||
NEXT_HYPERCALL(event_channel_op_compat)
|
||||
NEXT_HYPERCALL(xen_version)
|
||||
NEXT_HYPERCALL(console_io)
|
||||
NEXT_HYPERCALL(physdev_op_compat)
|
||||
NEXT_HYPERCALL(grant_table_op)
|
||||
NEXT_HYPERCALL(vm_assist)
|
||||
NEXT_HYPERCALL(update_va_mapping_otherdomain)
|
||||
NEXT_HYPERCALL(iret)
|
||||
NEXT_HYPERCALL(vcpu_op)
|
||||
NEXT_HYPERCALL(set_segment_base)
|
||||
NEXT_HYPERCALL(mmuext_op)
|
||||
NEXT_HYPERCALL(xsm_op)
|
||||
NEXT_HYPERCALL(nmi_op)
|
||||
NEXT_HYPERCALL(sched_op)
|
||||
NEXT_HYPERCALL(callback_op)
|
||||
NEXT_HYPERCALL(xenoprof_op)
|
||||
NEXT_HYPERCALL(event_channel_op)
|
||||
NEXT_HYPERCALL(physdev_op)
|
||||
NEXT_HYPERCALL(hvm_op)
|
||||
NEXT_HYPERCALL(sysctl)
|
||||
NEXT_HYPERCALL(domctl)
|
||||
NEXT_HYPERCALL(kexec_op)
|
||||
NEXT_HYPERCALL(tmem_op) /* 38 */
|
||||
ENTRY(xen_hypercall_rsvr)
|
||||
.skip 320
|
||||
NEXT_HYPERCALL(mca) /* 48 */
|
||||
NEXT_HYPERCALL(arch_1)
|
||||
NEXT_HYPERCALL(arch_2)
|
||||
NEXT_HYPERCALL(arch_3)
|
||||
NEXT_HYPERCALL(arch_4)
|
||||
NEXT_HYPERCALL(arch_5)
|
||||
NEXT_HYPERCALL(arch_6)
|
||||
.balign PAGE_SIZE
|
||||
.popsection
|
||||
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
|
||||
|
@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void);
|
||||
void xen_setup_shared_info(void);
|
||||
void xen_build_mfn_list_list(void);
|
||||
void xen_setup_machphys_mapping(void);
|
||||
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
|
||||
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
|
||||
void xen_reserve_top(void);
|
||||
extern unsigned long xen_max_p2m_pfn;
|
||||
|
||||
@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void);
|
||||
void xen_unplug_emulated_devices(void);
|
||||
|
||||
void __init xen_build_dynamic_phys_to_machine(void);
|
||||
unsigned long __init xen_revector_p2m_tree(void);
|
||||
|
||||
void xen_init_irq_ops(void);
|
||||
void xen_setup_timer(int cpu);
|
||||
|
Reference in New Issue
Block a user