Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "This is bigger than usual - the reason is partly a pent-up stream of
  fixes after the merge window and partly accidental.  The fixes are:

   - five patches to fix a boot failure on Andy Lutomirsky's laptop
   - four SGI UV platform fixes
   - KASAN fix
   - warning fix
   - documentation update
   - swap entry definition fix
   - pkeys fix
   - irq stats fix"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/apic/x2apic, smp/hotplug: Don't use before alloc in x2apic_cluster_probe()
  x86/efi: Allocate a trampoline if needed in efi_free_boot_services()
  x86/boot: Rework reserve_real_mode() to allow multiple tries
  x86/boot: Defer setup_real_mode() to early_initcall time
  x86/boot: Synchronize trampoline_cr4_features and mmu_cr4_features directly
  x86/boot: Run reserve_bios_regions() after we initialize the memory map
  x86/irq: Do not substract irq_tlb_count from irq_call_count
  x86/mm: Fix swap entry comment and macro
  x86/mm/kaslr: Fix -Wformat-security warning
  x86/mm/pkeys: Fix compact mode by removing protection keys' XSAVE buffer manipulation
  x86/build: Reduce the W=1 warnings noise when compiling x86 syscall tables
  x86/platform/UV: Fix kernel panic running RHEL kdump kernel on UV systems
  x86/platform/UV: Fix problem with UV4 BIOS providing incorrect PXM values
  x86/platform/UV: Fix bug with iounmap() of the UV4 EFI System Table causing a crash
  x86/platform/UV: Fix problem with UV4 Socket IDs not being contiguous
  x86/entry: Clarify the RF saving/restoring situation with SYSCALL/SYSRET
  x86/mm: Disable preemption during CR3 read+write
  x86/mm/KASLR: Increase BRK pages for KASLR memory randomization
  x86/mm/KASLR: Fix physical memory calculation on KASLR memory randomization
  x86, kasan, ftrace: Put APIC interrupt handlers into .irqentry.text
This commit is contained in:
Linus Torvalds
2016-08-12 14:31:10 -07:00
20 changed files with 185 additions and 198 deletions

View File

@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
/*
* At CPU state changes, update the x2apic cluster sibling info.
*/
int x2apic_prepare_cpu(unsigned int cpu)
static int x2apic_prepare_cpu(unsigned int cpu)
{
if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
return -ENOMEM;
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
return 0;
}
int x2apic_dead_cpu(unsigned int this_cpu)
static int x2apic_dead_cpu(unsigned int this_cpu)
{
int cpu;
@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
static int x2apic_cluster_probe(void)
{
int cpu = smp_processor_id();
int ret;
if (!x2apic_mode)
return 0;
ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
x2apic_prepare_cpu, x2apic_dead_cpu);
if (ret < 0) {
pr_err("Failed to register X2APIC_PREPARE\n");
return 0;
}
cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
x2apic_prepare_cpu, x2apic_dead_cpu);
return 1;
}

View File

@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (strncmp(oem_id, "SGI", 3) != 0)
return 0;
if (numa_off) {
pr_err("UV: NUMA is off, disabling UV support\n");
return 0;
}
/* Setup early hub type field in uv_hub_info for Node 0 */
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
struct uv_gam_range_entry *gre = uv_gre_table;
struct uv_gam_range_s *grt;
unsigned long last_limit = 0, ram_limit = 0;
int bytes, i, sid, lsid = -1;
int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
if (!gre)
return;
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
}
sid = gre->sockid - _min_socket;
if (lsid < sid) { /* new range */
grt = &_gr_table[sid];
grt->base = lsid;
grt = &_gr_table[indx];
grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
lsid = sid;
lindx = indx++;
continue;
}
if (lsid == sid && !ram_limit) { /* update range */
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
}
if (!ram_limit) { /* non-contiguous ram range */
grt++;
grt->base = sid - 1;
grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
continue;
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (!index) {
pr_info("UV: GAM Range Table...\n");
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
"Range", "", "Size", "Type", "NASID",
"SID", "PN", "PXM");
"SID", "PN");
}
pr_info(
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
((unsigned long)(gre->limit - lgre)) >>
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
gre->type, gre->nasid, gre->sockid,
gre->pnode, gre->pxm);
gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
if (sock_min > gre->sockid)
@@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void)
_pnode_to_socket[i] = SOCK_EMPTY;
/* fill in pnode/node/addr conversion list values */
pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
@@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void)
if (_socket_to_pnode[i] != SOCK_EMPTY)
continue; /* duplicate */
_socket_to_pnode[i] = gre->pnode;
_socket_to_node[i] = gre->pxm;
i = gre->pnode - minpnode;
_pnode_to_socket[i] = gre->sockid;
pr_info(
"UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
"UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
_socket_to_pnode[gre->sockid - minsock],
_socket_to_node[gre->sockid - minsock],
_pnode_to_socket[gre->pnode - minpnode]);
}
/* check socket -> node values */
/* Set socket -> node values */
lnid = -1;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
lnid = nid;
apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
i = sockid - minsock;
if (nid != _socket_to_node[i]) {
pr_warn(
"UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
i, sockid, gre->type, _socket_to_node[i], nid);
_socket_to_node[i] = nid;
}
_socket_to_node[sockid - minsock] = nid;
pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
sockid, apicid, nid);
}
/* Setup physical blade to pnode translation from GAM Range Table */

View File

@@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state)
return get_xsave_addr(&fpu->state.xsave, xsave_state);
}
/*
* Set xfeatures (aka XSTATE_BV) bit for a feature that we want
* to take out of its "init state". This will ensure that an
* XRSTOR actually restores the state.
*/
static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
int xstate_feature_mask)
{
xsave->header.xfeatures |= xstate_feature_mask;
}
/*
* This function is safe to call whether the FPU is in use or not.
*
* Note that this only works on the current task.
*
* Inputs:
* @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
* XFEATURE_MASK_SSE, etc...)
* @xsave_state_ptr: a pointer to a copy of the state that you would
* like written in to the current task's FPU xsave state. This pointer
* must not be located in the current tasks's xsave area.
* Output:
* address of the state in the xsave area or NULL if the state
* is not present or is in its 'init state'.
*/
static void fpu__xfeature_set_state(int xstate_feature_mask,
void *xstate_feature_src, size_t len)
{
struct xregs_state *xsave = &current->thread.fpu.state.xsave;
struct fpu *fpu = &current->thread.fpu;
void *dst;
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
return;
}
/*
* Tell the FPU code that we need the FPU state to be in
* 'fpu' (not in the registers), and that we need it to
* be stable while we write to it.
*/
fpu__current_fpstate_write_begin();
/*
* This method *WILL* *NOT* work for compact-format
* buffers. If the 'xstate_feature_mask' is unset in
* xcomp_bv then we may need to move other feature state
* "up" in the buffer.
*/
if (xsave->header.xcomp_bv & xstate_feature_mask) {
WARN_ON_ONCE(1);
goto out;
}
/* find the location in the xsave buffer of the desired state */
dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
/*
* Make sure that the pointer being passed in did not
* come from the xsave buffer itself.
*/
WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
/* put the caller-provided data in the location */
memcpy(dst, xstate_feature_src, len);
/*
* Mark the xfeature so that the CPU knows there is state
* in the buffer now.
*/
fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
out:
/*
* We are done writing to the 'fpu'. Reenable preeption
* and (possibly) move the fpstate back in to the fpregs.
*/
fpu__current_fpstate_write_end();
}
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
/*
* This will go out and modify the XSAVE buffer so that PKRU is
* set to a particular state for access to 'pkey'.
*
* PKRU state does affect kernel access to user memory. We do
* not modfiy PKRU *itself* here, only the XSAVE state that will
* be restored in to PKRU when we return back to userspace.
* This will go out and modify PKRU register to set the access
* rights for @pkey to @init_val.
*/
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val)
{
struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
struct pkru_state *old_pkru_state;
struct pkru_state new_pkru_state;
u32 old_pkru;
int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
u32 new_pkru_bits = 0;
@@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
/*
* For most XSAVE components, this would be an arduous task:
* brining fpstate up to date with fpregs, updating fpstate,
* then re-populating fpregs. But, for components that are
* never lazily managed, we can just access the fpregs
* directly. PKRU is never managed lazily, so we can just
* manipulate it directly. Make sure it stays that way.
*/
WARN_ON_ONCE(!use_eager_fpu());
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
@@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
/* Shift the bits in to the correct place in PKRU for pkey: */
new_pkru_bits <<= pkey_shift;
/* Locate old copy of the state in the xsave buffer: */
old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
/* Get old PKRU and mask off any old bits in place: */
old_pkru = read_pkru();
old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
/*
* When state is not in the buffer, it is in the init
* state, set it manually. Otherwise, copy out the old
* state.
*/
if (!old_pkru_state)
new_pkru_state.pkru = 0;
else
new_pkru_state.pkru = old_pkru_state->pkru;
/* Mask off any old bits in place: */
new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
/* Set the newly-requested bits: */
new_pkru_state.pkru |= new_pkru_bits;
/*
* We could theoretically live without zeroing pkru.pad.
* The current XSAVE feature state definition says that
* only bytes 0->3 are used. But we do not want to
* chance leaking kernel stack out to userspace in case a
* memcpy() of the whole xsave buffer was done.
*
* They're in the same cacheline anyway.
*/
new_pkru_state.pad = 0;
fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
/* Write old part along with new part: */
write_pkru(old_pkru | new_pkru_bits);
return 0;
}

View File

@@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void)
/* Initialize 32bit specific setup functions */
x86_init.resources.reserve_resources = i386_reserve_resources;
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
reserve_bios_regions();
}
asmlinkage __visible void __init i386_start_kernel(void)

View File

@@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
copy_bootdata(__va(real_mode_data));
x86_early_init_platform_quirks();
reserve_bios_regions();
switch (boot_params.hdr.hardware_subarch) {
case X86_SUBARCH_INTEL_MID:

View File

@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
irq_stats(j)->irq_tlb_count);
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_puts(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)

View File

@@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
kernel_randomize_memory();
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
@@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p)
max_possible_pfn = max_pfn;
/*
* Define random base addresses for memory sections after max_pfn is
* defined and before each memory section base is used.
*/
kernel_randomize_memory();
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
find_low_pfn_range();
@@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p)
efi_find_mirror();
}
reserve_bios_regions();
/*
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
@@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p)
early_trap_pf_init();
setup_real_mode();
/*
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
* with the current CR4 value. This may not be necessary, but
* auditing all the early-boot CR4 manipulation would be needed to
* rule it out.
*/
if (boot_cpu_data.cpuid_level >= 0)
/* A CPU has %cr4 if and only if it has CPUID. */
mmu_cr4_features = __read_cr4();
memblock_set_current_limit(get_max_mapped());
@@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p)
kasan_init();
if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */
mmu_cr4_features = __read_cr4();
if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features;
}
#ifdef CONFIG_X86_32
/* sync back kernel address range */
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,