Merge branch 'x86/urgent' into x86/mm, to pick up dependent fix

Signed-off-by: Ingo Molnar <mingo@kernel.org>
此提交包含在:
Ingo Molnar
2018-12-17 18:48:25 +01:00
當前提交 02117e42db
共有 792 個檔案被更改,包括 8949 行新增4850 行删除

查看文件

@@ -55,10 +55,10 @@ struct addr_marker {
enum address_markers_idx {
USER_SPACE_NR = 0,
KERNEL_SPACE_NR,
LOW_KERNEL_NR,
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
#ifdef CONFIG_MODIFY_LDT_SYSCALL
LDT_NR,
#endif
LOW_KERNEL_NR,
VMALLOC_START_NR,
VMEMMAP_START_NR,
#ifdef CONFIG_KASAN
@@ -66,9 +66,6 @@ enum address_markers_idx {
KASAN_SHADOW_END_NR,
#endif
CPU_ENTRY_AREA_NR,
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
LDT_NR,
#endif
#ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
#endif
@@ -512,11 +509,11 @@ static inline bool is_hypervisor_range(int idx)
{
#ifdef CONFIG_X86_64
/*
* ffff800000000000 - ffff87ffffffffff is reserved for
* the hypervisor.
* A hole in the beginning of kernel address space reserved
* for a hypervisor.
*/
return (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
(idx < pgd_index(__PAGE_OFFSET));
return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) &&
(idx < pgd_index(GUARD_HOLE_END_ADDR));
#else
return false;
#endif

查看文件

@@ -285,20 +285,16 @@ static void cpa_flush_all(unsigned long cache)
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
static bool __cpa_flush_range(unsigned long start, int numpages, int cache)
static bool __inv_flush_all(int cache)
{
BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
WARN_ON(PAGE_ALIGN(start) != start);
if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
cpa_flush_all(cache);
return true;
}
flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
return !cache;
return false;
}
static void cpa_flush_range(unsigned long start, int numpages, int cache)
@@ -306,7 +302,14 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
unsigned int i, level;
unsigned long addr;
if (__cpa_flush_range(start, numpages, cache))
WARN_ON(PAGE_ALIGN(start) != start);
if (__inv_flush_all(cache))
return;
flush_tlb_kernel_range(start, start + PAGE_SIZE * numpages);
if (!cache)
return;
/*
@@ -332,7 +335,12 @@ static void cpa_flush_array(unsigned long baddr, unsigned long *start,
{
unsigned int i, level;
if (__cpa_flush_range(baddr, numpages, cache))
if (__inv_flush_all(cache))
return;
flush_tlb_all();
if (!cache)
return;
/*

查看文件

@@ -519,8 +519,13 @@ static u64 sanitize_phys(u64 address)
* for a "decoy" virtual address (bit 63 clear) passed to
* set_memory_X(). __pa() on a "decoy" address results in a
* physical address with bit 63 set.
*
* Decoy addresses are not present for 32-bit builds, see
* set_mce_nospec().
*/
return address & __PHYSICAL_MASK;
if (IS_ENABLED(CONFIG_X86_64))
return address & __PHYSICAL_MASK;
return address;
}
/*
@@ -546,7 +551,11 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
start = sanitize_phys(start);
end = sanitize_phys(end);
BUG_ON(start >= end); /* end is exclusive */
if (start >= end) {
WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__,
start, end - 1, cattr_name(req_type));
return -EINVAL;
}
if (!pat_enabled()) {
/* This is identical to page table setting without PAT */

查看文件

@@ -7,7 +7,6 @@
#include <linux/export.h>
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/ptrace.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -30,6 +29,12 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
/*
* Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
* stored in cpu_tlb_state.last_user_mm_ibpb.
*/
#define LAST_USER_MM_IBPB 0x1UL
/*
* We get here when we do something requiring a TLB invalidation
* but could not go invalidate all of the contexts. We do the
@@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
}
}
static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id)
static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
{
unsigned long next_tif = task_thread_info(next)->flags;
unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
return (unsigned long)next->mm | ibpb;
}
static void cond_ibpb(struct task_struct *next)
{
if (!next || !next->mm)
return;
/*
* Check if the current (previous) task has access to the memory
* of the @tsk (next) task. If access is denied, make sure to
* issue a IBPB to stop user->user Spectre-v2 attacks.
*
* Note: __ptrace_may_access() returns 0 or -ERRNO.
* Both, the conditional and the always IBPB mode use the mm
* pointer to avoid the IBPB when switching between tasks of the
* same process. Using the mm pointer instead of mm->context.ctx_id
* opens a hypothetical hole vs. mm_struct reuse, which is more or
* less impossible to control by an attacker. Aside of that it
* would only affect the first schedule so the theoretically
* exposed data is not really interesting.
*/
return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id &&
ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB));
if (static_branch_likely(&switch_mm_cond_ibpb)) {
unsigned long prev_mm, next_mm;
/*
* This is a bit more complex than the always mode because
* it has to handle two cases:
*
* 1) Switch from a user space task (potential attacker)
* which has TIF_SPEC_IB set to a user space task
* (potential victim) which has TIF_SPEC_IB not set.
*
* 2) Switch from a user space task (potential attacker)
* which has TIF_SPEC_IB not set to a user space task
* (potential victim) which has TIF_SPEC_IB set.
*
* This could be done by unconditionally issuing IBPB when
* a task which has TIF_SPEC_IB set is either scheduled in
* or out. Though that results in two flushes when:
*
* - the same user space task is scheduled out and later
* scheduled in again and only a kernel thread ran in
* between.
*
* - a user space task belonging to the same process is
* scheduled in after a kernel thread ran in between
*
* - a user space task belonging to the same process is
* scheduled in immediately.
*
* Optimize this with reasonably small overhead for the
* above cases. Mangle the TIF_SPEC_IB bit into the mm
* pointer of the incoming task which is stored in
* cpu_tlbstate.last_user_mm_ibpb for comparison.
*/
next_mm = mm_mangle_tif_spec_ib(next);
prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
/*
* Issue IBPB only if the mm's are different and one or
* both have the IBPB bit set.
*/
if (next_mm != prev_mm &&
(next_mm | prev_mm) & LAST_USER_MM_IBPB)
indirect_branch_prediction_barrier();
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
}
if (static_branch_unlikely(&switch_mm_always_ibpb)) {
/*
* Only flush when switching to a user space task with a
* different context than the user space task which ran
* last on this CPU.
*/
if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
indirect_branch_prediction_barrier();
this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
}
}
}
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
@@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
new_asid = prev_asid;
need_flush = true;
} else {
u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
/*
* Avoid user/user BTB poisoning by flushing the branch
* predictor when switching between processes. This stops
* one process from doing Spectre-v2 attacks on another.
*
* As an optimization, flush indirect branches only when
* switching into a processes that can't be ptrace by the
* current one (as in such case, attacker has much more
* convenient way how to tamper with the next process than
* branch buffer poisoning).
*/
if (static_cpu_has(X86_FEATURE_USE_IBPB) &&
ibpb_needed(tsk, last_ctx_id))
indirect_branch_prediction_barrier();
cond_ibpb(tsk);
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
@@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
/*
* Record last user mm's context id, so we can avoid
* flushing branch buffer with IBPB if we switch back
* to the same user.
*/
if (next != &init_mm)
this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
/* Make sure we write CR3 before loaded_mm. */
barrier();
@@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void)
write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);