Merge tag 'v5.8-rc6' into x86/cpu, to refresh the branch before adding new commits
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -2316,12 +2316,12 @@ static int mp_irqdomain_create(int ioapic)
|
||||
ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
|
||||
(void *)(long)ioapic);
|
||||
|
||||
/* Release fw handle if it was allocated above */
|
||||
if (!cfg->dev)
|
||||
irq_domain_free_fwnode(fn);
|
||||
|
||||
if (!ip->irqdomain)
|
||||
if (!ip->irqdomain) {
|
||||
/* Release fw handle if it was allocated above */
|
||||
if (!cfg->dev)
|
||||
irq_domain_free_fwnode(fn);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ip->irqdomain->parent = parent;
|
||||
|
||||
|
@@ -263,12 +263,13 @@ void __init arch_init_msi_domain(struct irq_domain *parent)
|
||||
msi_default_domain =
|
||||
pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
|
||||
parent);
|
||||
irq_domain_free_fwnode(fn);
|
||||
}
|
||||
if (!msi_default_domain)
|
||||
if (!msi_default_domain) {
|
||||
irq_domain_free_fwnode(fn);
|
||||
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
|
||||
else
|
||||
} else {
|
||||
msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IRQ_REMAP
|
||||
@@ -301,7 +302,8 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
|
||||
if (!fn)
|
||||
return NULL;
|
||||
d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent);
|
||||
irq_domain_free_fwnode(fn);
|
||||
if (!d)
|
||||
irq_domain_free_fwnode(fn);
|
||||
return d;
|
||||
}
|
||||
#endif
|
||||
@@ -364,7 +366,8 @@ static struct irq_domain *dmar_get_irq_domain(void)
|
||||
if (fn) {
|
||||
dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info,
|
||||
x86_vector_domain);
|
||||
irq_domain_free_fwnode(fn);
|
||||
if (!dmar_domain)
|
||||
irq_domain_free_fwnode(fn);
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&dmar_lock);
|
||||
@@ -489,7 +492,10 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
|
||||
}
|
||||
|
||||
d = msi_create_irq_domain(fn, domain_info, parent);
|
||||
irq_domain_free_fwnode(fn);
|
||||
if (!d) {
|
||||
irq_domain_free_fwnode(fn);
|
||||
kfree(domain_info);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
|
@@ -446,12 +446,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
|
||||
trace_vector_activate(irqd->irq, apicd->is_managed,
|
||||
apicd->can_reserve, reserve);
|
||||
|
||||
/* Nothing to do for fixed assigned vectors */
|
||||
if (!apicd->can_reserve && !apicd->is_managed)
|
||||
return 0;
|
||||
|
||||
raw_spin_lock_irqsave(&vector_lock, flags);
|
||||
if (reserve || irqd_is_managed_and_shutdown(irqd))
|
||||
if (!apicd->can_reserve && !apicd->is_managed)
|
||||
assign_irq_vector_any_locked(irqd);
|
||||
else if (reserve || irqd_is_managed_and_shutdown(irqd))
|
||||
vector_assign_managed_shutdown(irqd);
|
||||
else if (apicd->is_managed)
|
||||
ret = activate_managed(irqd);
|
||||
@@ -709,7 +707,6 @@ int __init arch_early_irq_init(void)
|
||||
x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
|
||||
NULL);
|
||||
BUG_ON(x86_vector_domain == NULL);
|
||||
irq_domain_free_fwnode(fn);
|
||||
irq_set_default_host(x86_vector_domain);
|
||||
|
||||
arch_init_msi_domain(x86_vector_domain);
|
||||
@@ -775,20 +772,10 @@ void lapic_offline(void)
|
||||
static int apic_set_affinity(struct irq_data *irqd,
|
||||
const struct cpumask *dest, bool force)
|
||||
{
|
||||
struct apic_chip_data *apicd = apic_chip_data(irqd);
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Core code can call here for inactive interrupts. For inactive
|
||||
* interrupts which use managed or reservation mode there is no
|
||||
* point in going through the vector assignment right now as the
|
||||
* activation will assign a vector which fits the destination
|
||||
* cpumask. Let the core code store the destination mask and be
|
||||
* done with it.
|
||||
*/
|
||||
if (!irqd_is_activated(irqd) &&
|
||||
(apicd->is_managed || apicd->can_reserve))
|
||||
return IRQ_SET_MASK_OK;
|
||||
if (WARN_ON_ONCE(!irqd_is_activated(irqd)))
|
||||
return -EIO;
|
||||
|
||||
raw_spin_lock(&vector_lock);
|
||||
cpumask_and(vector_searchmask, dest, cpu_online_mask);
|
||||
|
@@ -3,6 +3,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/e820/api.h>
|
||||
#include <asm/mtrr.h>
|
||||
|
@@ -347,6 +347,9 @@ out:
|
||||
cr4_clear_bits(X86_CR4_UMIP);
|
||||
}
|
||||
|
||||
/* These bits should not change their value after CPU init is finished. */
|
||||
static const unsigned long cr4_pinned_mask =
|
||||
X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
|
||||
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
|
||||
static unsigned long cr4_pinned_bits __ro_after_init;
|
||||
|
||||
@@ -371,20 +374,20 @@ EXPORT_SYMBOL(native_write_cr0);
|
||||
|
||||
void native_write_cr4(unsigned long val)
|
||||
{
|
||||
unsigned long bits_missing = 0;
|
||||
unsigned long bits_changed = 0;
|
||||
|
||||
set_register:
|
||||
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
|
||||
|
||||
if (static_branch_likely(&cr_pinning)) {
|
||||
if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
|
||||
bits_missing = ~val & cr4_pinned_bits;
|
||||
val |= bits_missing;
|
||||
if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
|
||||
bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
|
||||
val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
|
||||
goto set_register;
|
||||
}
|
||||
/* Warn after we've set the missing bits. */
|
||||
WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
|
||||
bits_missing);
|
||||
/* Warn after we've corrected the changed bits. */
|
||||
WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
|
||||
bits_changed);
|
||||
}
|
||||
}
|
||||
#if IS_MODULE(CONFIG_LKDTM)
|
||||
@@ -419,7 +422,7 @@ void cr4_init(void)
|
||||
if (boot_cpu_has(X86_FEATURE_PCID))
|
||||
cr4 |= X86_CR4_PCIDE;
|
||||
if (static_branch_likely(&cr_pinning))
|
||||
cr4 |= cr4_pinned_bits;
|
||||
cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
|
||||
|
||||
__write_cr4(cr4);
|
||||
|
||||
@@ -434,10 +437,7 @@ void cr4_init(void)
|
||||
*/
|
||||
static void __init setup_cr_pinning(void)
|
||||
{
|
||||
unsigned long mask;
|
||||
|
||||
mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
|
||||
cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
|
||||
cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
|
||||
static_key_enable(&cr_pinning.key);
|
||||
}
|
||||
|
||||
|
@@ -81,8 +81,4 @@ extern void update_srbds_msr(void);
|
||||
|
||||
extern u64 x86_read_arch_cap_msr(void);
|
||||
|
||||
#ifdef CONFIG_IA32_FEAT_CTL
|
||||
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
|
||||
#endif
|
||||
|
||||
#endif /* ARCH_X86_CPU_H */
|
||||
|
@@ -49,6 +49,13 @@ enum split_lock_detect_state {
|
||||
static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
|
||||
static u64 msr_test_ctrl_cache __ro_after_init;
|
||||
|
||||
/*
|
||||
* With a name like MSR_TEST_CTL it should go without saying, but don't touch
|
||||
* MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
|
||||
* on CPUs that do not support SLD can cause fireworks, even when writing '0'.
|
||||
*/
|
||||
static bool cpu_model_supports_sld __ro_after_init;
|
||||
|
||||
/*
|
||||
* Processors which have self-snooping capability can handle conflicting
|
||||
* memory type across CPUs by snooping its own cache. However, there exists
|
||||
@@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on)
|
||||
|
||||
static void split_lock_init(void)
|
||||
{
|
||||
split_lock_verify_msr(sld_state != sld_off);
|
||||
if (cpu_model_supports_sld)
|
||||
split_lock_verify_msr(sld_state != sld_off);
|
||||
}
|
||||
|
||||
static void split_lock_warn(unsigned long ip)
|
||||
@@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
|
||||
return;
|
||||
}
|
||||
|
||||
cpu_model_supports_sld = true;
|
||||
split_lock_setup();
|
||||
}
|
||||
|
@@ -1083,7 +1083,7 @@ static noinstr bool mce_check_crashing_cpu(void)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
if (cpu_is_offline(cpu) ||
|
||||
if (arch_cpu_is_offline(cpu) ||
|
||||
(crashing_cpu != -1 && crashing_cpu != cpu)) {
|
||||
u64 mcgstatus;
|
||||
|
||||
@@ -1901,6 +1901,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
|
||||
|
||||
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
|
||||
{
|
||||
WARN_ON_ONCE(user_mode(regs));
|
||||
|
||||
/*
|
||||
* Only required when from kernel mode. See
|
||||
* mce_check_crashing_cpu() for details.
|
||||
@@ -1954,7 +1956,7 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check)
|
||||
}
|
||||
#else
|
||||
/* 32bit unified entry point */
|
||||
DEFINE_IDTENTRY_MCE(exc_machine_check)
|
||||
DEFINE_IDTENTRY_RAW(exc_machine_check)
|
||||
{
|
||||
unsigned long dr7;
|
||||
|
||||
|
@@ -981,10 +981,10 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
|
||||
|
||||
c->x86_cache_max_rmid = ecx;
|
||||
c->x86_cache_occ_scale = ebx;
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL)
|
||||
c->x86_cache_mbm_width_offset = eax & 0xff;
|
||||
else
|
||||
c->x86_cache_mbm_width_offset = -1;
|
||||
c->x86_cache_mbm_width_offset = eax & 0xff;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset)
|
||||
c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#define MBA_IS_LINEAR 0x4
|
||||
#define MBA_MAX_MBPS U32_MAX
|
||||
#define MAX_MBA_BW_AMD 0x800
|
||||
#define MBM_CNTR_WIDTH_OFFSET_AMD 20
|
||||
|
||||
#define RMID_VAL_ERROR BIT_ULL(63)
|
||||
#define RMID_VAL_UNAVAIL BIT_ULL(62)
|
||||
|
@@ -1117,6 +1117,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
|
||||
_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
|
||||
if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
|
||||
_r_cdp = NULL;
|
||||
_d_cdp = NULL;
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
|
@@ -18,12 +18,6 @@
|
||||
*/
|
||||
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
|
||||
|
||||
u32 get_umwait_control_msr(void)
|
||||
{
|
||||
return umwait_control_cached;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_umwait_control_msr);
|
||||
|
||||
/*
|
||||
* Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
|
||||
* hardware or BIOS before kernel boot.
|
||||
|
@@ -2,6 +2,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
@@ -106,7 +106,7 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
|
||||
bad_ip = user_mode(regs) &&
|
||||
__chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX);
|
||||
|
||||
if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue,
|
||||
if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue,
|
||||
OPCODE_BUFSIZE)) {
|
||||
printk("%sCode: Bad RIP value.\n", loglvl);
|
||||
} else {
|
||||
|
@@ -101,6 +101,12 @@ void kernel_fpu_begin(void)
|
||||
copy_fpregs_to_fpstate(¤t->thread.fpu);
|
||||
}
|
||||
__cpu_invalidate_fpregs_state();
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_XMM))
|
||||
ldmxcsr(MXCSR_DEFAULT);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_FPU))
|
||||
asm volatile ("fninit");
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
|
||||
|
||||
|
@@ -86,7 +86,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code)
|
||||
* sure what we read is what we expected it to be before modifying it.
|
||||
*/
|
||||
/* read the text we want to modify */
|
||||
if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) {
|
||||
if (copy_from_kernel_nofault(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) {
|
||||
WARN_ON(1);
|
||||
return -EFAULT;
|
||||
}
|
||||
@@ -355,7 +355,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
||||
npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
|
||||
|
||||
/* Copy ftrace_caller onto the trampoline memory */
|
||||
ret = probe_kernel_read(trampoline, (void *)start_offset, size);
|
||||
ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size);
|
||||
if (WARN_ON(ret < 0))
|
||||
goto fail;
|
||||
|
||||
@@ -363,13 +363,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
|
||||
|
||||
/* The trampoline ends with ret(q) */
|
||||
retq = (unsigned long)ftrace_stub;
|
||||
ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
|
||||
ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
|
||||
if (WARN_ON(ret < 0))
|
||||
goto fail;
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
|
||||
ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller);
|
||||
ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
|
||||
ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE);
|
||||
if (WARN_ON(ret < 0))
|
||||
goto fail;
|
||||
}
|
||||
@@ -506,7 +506,7 @@ static void *addr_from_call(void *ptr)
|
||||
union text_poke_insn call;
|
||||
int ret;
|
||||
|
||||
ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE);
|
||||
ret = copy_from_kernel_nofault(&call, ptr, CALL_INSN_SIZE);
|
||||
if (WARN_ON_ONCE(ret < 0))
|
||||
return NULL;
|
||||
|
||||
|
@@ -732,11 +732,11 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
|
||||
int err;
|
||||
|
||||
bpt->type = BP_BREAKPOINT;
|
||||
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
|
||||
err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr,
|
||||
BREAK_INSTR_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
err = probe_kernel_write((char *)bpt->bpt_addr,
|
||||
err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
|
||||
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
|
||||
if (!err)
|
||||
return err;
|
||||
@@ -768,7 +768,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
|
||||
return 0;
|
||||
|
||||
knl_write:
|
||||
return probe_kernel_write((char *)bpt->bpt_addr,
|
||||
return copy_to_kernel_nofault((char *)bpt->bpt_addr,
|
||||
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
|
||||
}
|
||||
|
||||
|
@@ -243,7 +243,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
|
||||
* Fortunately, we know that the original code is the ideal 5-byte
|
||||
* long NOP.
|
||||
*/
|
||||
if (probe_kernel_read(buf, (void *)addr,
|
||||
if (copy_from_kernel_nofault(buf, (void *)addr,
|
||||
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
||||
return 0UL;
|
||||
|
||||
@@ -346,7 +346,8 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
|
||||
return 0;
|
||||
|
||||
/* This can access kernel text if given address is not recovered */
|
||||
if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE))
|
||||
if (copy_from_kernel_nofault(dest, (void *)recovered_insn,
|
||||
MAX_INSN_SIZE))
|
||||
return 0;
|
||||
|
||||
kernel_insn_init(insn, dest, MAX_INSN_SIZE);
|
||||
@@ -753,16 +754,11 @@ asm(
|
||||
NOKPROBE_SYMBOL(kretprobe_trampoline);
|
||||
STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
|
||||
|
||||
static struct kprobe kretprobe_kprobe = {
|
||||
.addr = (void *)kretprobe_trampoline,
|
||||
};
|
||||
|
||||
/*
|
||||
* Called from kretprobe_trampoline
|
||||
*/
|
||||
__used __visible void *trampoline_handler(struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe_ctlblk *kcb;
|
||||
struct kretprobe_instance *ri = NULL;
|
||||
struct hlist_head *head, empty_rp;
|
||||
struct hlist_node *tmp;
|
||||
@@ -772,16 +768,12 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
|
||||
void *frame_pointer;
|
||||
bool skipped = false;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/*
|
||||
* Set a dummy kprobe for avoiding kretprobe recursion.
|
||||
* Since kretprobe never run in kprobe handler, kprobe must not
|
||||
* be running at this point.
|
||||
*/
|
||||
kcb = get_kprobe_ctlblk();
|
||||
__this_cpu_write(current_kprobe, &kretprobe_kprobe);
|
||||
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
kprobe_busy_begin();
|
||||
|
||||
INIT_HLIST_HEAD(&empty_rp);
|
||||
kretprobe_hash_lock(current, &head, &flags);
|
||||
@@ -857,7 +849,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
|
||||
__this_cpu_write(current_kprobe, &ri->rp->kp);
|
||||
ri->ret_addr = correct_ret_addr;
|
||||
ri->rp->handler(ri, regs);
|
||||
__this_cpu_write(current_kprobe, &kretprobe_kprobe);
|
||||
__this_cpu_write(current_kprobe, &kprobe_busy);
|
||||
}
|
||||
|
||||
recycle_rp_inst(ri, &empty_rp);
|
||||
@@ -873,8 +865,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
|
||||
|
||||
kretprobe_hash_unlock(current, &flags);
|
||||
|
||||
__this_cpu_write(current_kprobe, NULL);
|
||||
preempt_enable();
|
||||
kprobe_busy_end();
|
||||
|
||||
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
|
||||
hlist_del(&ri->hlist);
|
||||
|
@@ -56,7 +56,7 @@ found:
|
||||
* overwritten by jump destination address. In this case, original
|
||||
* bytes must be recovered from op->optinsn.copied_insn buffer.
|
||||
*/
|
||||
if (probe_kernel_read(buf, (void *)addr,
|
||||
if (copy_from_kernel_nofault(buf, (void *)addr,
|
||||
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
||||
return 0UL;
|
||||
|
||||
|
@@ -29,6 +29,8 @@
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pgtable_areas.h>
|
||||
|
||||
#include <xen/xen.h>
|
||||
|
||||
/* This is a multiple of PAGE_SIZE. */
|
||||
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
|
||||
|
||||
@@ -543,6 +545,28 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount)
|
||||
return bytecount;
|
||||
}
|
||||
|
||||
static bool allow_16bit_segments(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_X86_16BIT))
|
||||
return false;
|
||||
|
||||
#ifdef CONFIG_XEN_PV
|
||||
/*
|
||||
* Xen PV does not implement ESPFIX64, which means that 16-bit
|
||||
* segments will not work correctly. Until either Xen PV implements
|
||||
* ESPFIX64 and can signal this fact to the guest or unless someone
|
||||
* provides compelling evidence that allowing broken 16-bit segments
|
||||
* is worthwhile, disallow 16-bit segments under Xen PV.
|
||||
*/
|
||||
if (xen_pv_domain()) {
|
||||
pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
@@ -574,7 +598,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
|
||||
/* The user wants to clear the entry. */
|
||||
memset(&ldt, 0, sizeof(ldt));
|
||||
} else {
|
||||
if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
|
||||
if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
@@ -478,7 +478,7 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
|
||||
|
||||
DEFINE_IDTENTRY_RAW(exc_nmi)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id()))
|
||||
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
|
||||
return;
|
||||
|
||||
if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
|
||||
|
@@ -324,7 +324,8 @@ struct paravirt_patch_template pv_ops = {
|
||||
.cpu.swapgs = native_swapgs,
|
||||
|
||||
#ifdef CONFIG_X86_IOPL_IOPERM
|
||||
.cpu.update_io_bitmap = native_tss_update_io_bitmap,
|
||||
.cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap,
|
||||
.cpu.update_io_bitmap = native_tss_update_io_bitmap,
|
||||
#endif
|
||||
|
||||
.cpu.start_context_switch = paravirt_nop,
|
||||
|
@@ -94,12 +94,12 @@ static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short
|
||||
}
|
||||
|
||||
static bool probe_list(struct pci_dev *pdev, unsigned short vendor,
|
||||
const unsigned char *rom_list)
|
||||
const void *rom_list)
|
||||
{
|
||||
unsigned short device;
|
||||
|
||||
do {
|
||||
if (probe_kernel_address(rom_list, device) != 0)
|
||||
if (get_kernel_nofault(device, rom_list) != 0)
|
||||
device = 0;
|
||||
|
||||
if (device && match_id(pdev, vendor, device))
|
||||
@@ -119,19 +119,19 @@ static struct resource *find_oprom(struct pci_dev *pdev)
|
||||
for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
|
||||
struct resource *res = &adapter_rom_resources[i];
|
||||
unsigned short offset, vendor, device, list, rev;
|
||||
const unsigned char *rom;
|
||||
const void *rom;
|
||||
|
||||
if (res->end == 0)
|
||||
break;
|
||||
|
||||
rom = isa_bus_to_virt(res->start);
|
||||
if (probe_kernel_address(rom + 0x18, offset) != 0)
|
||||
if (get_kernel_nofault(offset, rom + 0x18) != 0)
|
||||
continue;
|
||||
|
||||
if (probe_kernel_address(rom + offset + 0x4, vendor) != 0)
|
||||
if (get_kernel_nofault(vendor, rom + offset + 0x4) != 0)
|
||||
continue;
|
||||
|
||||
if (probe_kernel_address(rom + offset + 0x6, device) != 0)
|
||||
if (get_kernel_nofault(device, rom + offset + 0x6) != 0)
|
||||
continue;
|
||||
|
||||
if (match_id(pdev, vendor, device)) {
|
||||
@@ -139,8 +139,8 @@ static struct resource *find_oprom(struct pci_dev *pdev)
|
||||
break;
|
||||
}
|
||||
|
||||
if (probe_kernel_address(rom + offset + 0x8, list) == 0 &&
|
||||
probe_kernel_address(rom + offset + 0xc, rev) == 0 &&
|
||||
if (get_kernel_nofault(list, rom + offset + 0x8) == 0 &&
|
||||
get_kernel_nofault(rev, rom + offset + 0xc) == 0 &&
|
||||
rev >= 3 && list &&
|
||||
probe_list(pdev, vendor, rom + offset + list)) {
|
||||
oprom = res;
|
||||
@@ -183,14 +183,14 @@ static int __init romsignature(const unsigned char *rom)
|
||||
const unsigned short * const ptr = (const unsigned short *)rom;
|
||||
unsigned short sig;
|
||||
|
||||
return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
|
||||
return get_kernel_nofault(sig, ptr) == 0 && sig == ROMSIGNATURE;
|
||||
}
|
||||
|
||||
static int __init romchecksum(const unsigned char *rom, unsigned long length)
|
||||
{
|
||||
unsigned char sum, c;
|
||||
|
||||
for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
|
||||
for (sum = 0; length && get_kernel_nofault(c, rom++) == 0; length--)
|
||||
sum += c;
|
||||
return !length && !sum;
|
||||
}
|
||||
@@ -211,7 +211,7 @@ void __init probe_roms(void)
|
||||
|
||||
video_rom_resource.start = start;
|
||||
|
||||
if (probe_kernel_address(rom + 2, c) != 0)
|
||||
if (get_kernel_nofault(c, rom + 2) != 0)
|
||||
continue;
|
||||
|
||||
/* 0 < length <= 0x7f * 512, historically */
|
||||
@@ -249,7 +249,7 @@ void __init probe_roms(void)
|
||||
if (!romsignature(rom))
|
||||
continue;
|
||||
|
||||
if (probe_kernel_address(rom + 2, c) != 0)
|
||||
if (get_kernel_nofault(c, rom + 2) != 0)
|
||||
continue;
|
||||
|
||||
/* 0 < length <= 0x7f * 512, historically */
|
||||
|
@@ -322,20 +322,6 @@ void arch_setup_new_exec(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_IOPL_IOPERM
|
||||
static inline void tss_invalidate_io_bitmap(struct tss_struct *tss)
|
||||
{
|
||||
/*
|
||||
* Invalidate the I/O bitmap by moving io_bitmap_base outside the
|
||||
* TSS limit so any subsequent I/O access from user space will
|
||||
* trigger a #GP.
|
||||
*
|
||||
* This is correct even when VMEXIT rewrites the TSS limit
|
||||
* to 0x67 as the only requirement is that the base points
|
||||
* outside the limit.
|
||||
*/
|
||||
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
|
||||
}
|
||||
|
||||
static inline void switch_to_bitmap(unsigned long tifp)
|
||||
{
|
||||
/*
|
||||
@@ -346,7 +332,7 @@ static inline void switch_to_bitmap(unsigned long tifp)
|
||||
* user mode.
|
||||
*/
|
||||
if (tifp & _TIF_IO_BITMAP)
|
||||
tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw));
|
||||
tss_invalidate_io_bitmap();
|
||||
}
|
||||
|
||||
static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
|
||||
@@ -380,7 +366,7 @@ void native_tss_update_io_bitmap(void)
|
||||
u16 *base = &tss->x86_tss.io_bitmap_base;
|
||||
|
||||
if (!test_thread_flag(TIF_IO_BITMAP)) {
|
||||
tss_invalidate_io_bitmap(tss);
|
||||
native_tss_invalidate_io_bitmap();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -84,17 +84,16 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
int is_valid_bugaddr(unsigned long addr)
|
||||
__always_inline int is_valid_bugaddr(unsigned long addr)
|
||||
{
|
||||
unsigned short ud;
|
||||
|
||||
if (addr < TASK_SIZE_MAX)
|
||||
return 0;
|
||||
|
||||
if (probe_kernel_address((unsigned short *)addr, ud))
|
||||
return 0;
|
||||
|
||||
return ud == INSN_UD0 || ud == INSN_UD2;
|
||||
/*
|
||||
* We got #UD, if the text isn't readable we'd have gotten
|
||||
* a different exception.
|
||||
*/
|
||||
return *(unsigned short *)addr == INSN_UD2;
|
||||
}
|
||||
|
||||
static nokprobe_inline int
|
||||
@@ -216,40 +215,45 @@ static inline void handle_invalid_op(struct pt_regs *regs)
|
||||
ILL_ILLOPN, error_get_trap_addr(regs));
|
||||
}
|
||||
|
||||
static noinstr bool handle_bug(struct pt_regs *regs)
|
||||
{
|
||||
bool handled = false;
|
||||
|
||||
if (!is_valid_bugaddr(regs->ip))
|
||||
return handled;
|
||||
|
||||
/*
|
||||
* All lies, just get the WARN/BUG out.
|
||||
*/
|
||||
instrumentation_begin();
|
||||
/*
|
||||
* Since we're emulating a CALL with exceptions, restore the interrupt
|
||||
* state to what it was at the exception site.
|
||||
*/
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
raw_local_irq_enable();
|
||||
if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
|
||||
regs->ip += LEN_UD2;
|
||||
handled = true;
|
||||
}
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
raw_local_irq_disable();
|
||||
instrumentation_end();
|
||||
|
||||
return handled;
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_RAW(exc_invalid_op)
|
||||
{
|
||||
bool rcu_exit;
|
||||
|
||||
/*
|
||||
* Handle BUG/WARN like NMIs instead of like normal idtentries:
|
||||
* if we bugged/warned in a bad RCU context, for example, the last
|
||||
* thing we want is to BUG/WARN again in the idtentry code, ad
|
||||
* infinitum.
|
||||
* We use UD2 as a short encoding for 'CALL __WARN', as such
|
||||
* handle it before exception entry to avoid recursive WARN
|
||||
* in case exception entry is the one triggering WARNs.
|
||||
*/
|
||||
if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) {
|
||||
enum bug_trap_type type;
|
||||
|
||||
nmi_enter();
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
type = report_bug(regs->ip, regs);
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
trace_hardirqs_on_prepare();
|
||||
instrumentation_end();
|
||||
nmi_exit();
|
||||
|
||||
if (type == BUG_TRAP_TYPE_WARN) {
|
||||
/* Skip the ud2. */
|
||||
regs->ip += LEN_UD2;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Else, if this was a BUG and report_bug returns or if this
|
||||
* was just a normal #UD, we want to continue onward and
|
||||
* crash.
|
||||
*/
|
||||
}
|
||||
if (!user_mode(regs) && handle_bug(regs))
|
||||
return;
|
||||
|
||||
rcu_exit = idtentry_enter_cond_rcu(regs);
|
||||
instrumentation_begin();
|
||||
@@ -299,6 +303,8 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check)
|
||||
|
||||
do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
|
||||
error_code, BUS_ADRALN, NULL);
|
||||
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
@@ -488,7 +494,8 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
|
||||
u8 insn_buf[MAX_INSN_SIZE];
|
||||
struct insn insn;
|
||||
|
||||
if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
|
||||
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip,
|
||||
MAX_INSN_SIZE))
|
||||
return GP_NO_HINT;
|
||||
|
||||
kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
|
||||
@@ -690,13 +697,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
||||
(struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
||||
|
||||
/* Copy the IRET target to the temporary storage. */
|
||||
memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
|
||||
__memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
|
||||
|
||||
/* Copy the remainder of the stack from the current stack. */
|
||||
memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
|
||||
__memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
|
||||
|
||||
/* Update the entry stack */
|
||||
memcpy(new_stack, &tmp, sizeof(tmp));
|
||||
__memcpy(new_stack, &tmp, sizeof(tmp));
|
||||
|
||||
BUG_ON(!user_mode(&new_stack->regs));
|
||||
return new_stack;
|
||||
@@ -864,6 +871,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
|
||||
instrumentation_begin();
|
||||
trace_hardirqs_off_finish();
|
||||
|
||||
/*
|
||||
* If something gets miswired and we end up here for a user mode
|
||||
* #DB, we will malfunction.
|
||||
*/
|
||||
WARN_ON_ONCE(user_mode(regs));
|
||||
|
||||
/*
|
||||
* Catch SYSENTER with TF set and clear DR_STEP. If this hit a
|
||||
* watchpoint at the same time then that will still be handled.
|
||||
@@ -882,6 +895,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
|
||||
static __always_inline void exc_debug_user(struct pt_regs *regs,
|
||||
unsigned long dr6)
|
||||
{
|
||||
/*
|
||||
* If something gets miswired and we end up here for a kernel mode
|
||||
* #DB, we will malfunction.
|
||||
*/
|
||||
WARN_ON_ONCE(!user_mode(regs));
|
||||
|
||||
idtentry_enter_user(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
@@ -912,7 +931,7 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
|
||||
}
|
||||
#else
|
||||
/* 32 bit does not have separate entry points. */
|
||||
DEFINE_IDTENTRY_DEBUG(exc_debug)
|
||||
DEFINE_IDTENTRY_RAW(exc_debug)
|
||||
{
|
||||
unsigned long dr6, dr7;
|
||||
|
||||
|
Reference in New Issue
Block a user