Merge drm/drm-next into drm-intel-next-queued
Catching up with 5.3-rc* Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
This commit is contained in:
@@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o := n
|
||||
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_test_nx.o := y
|
||||
OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
|
||||
|
||||
ifdef CONFIG_FRAME_POINTER
|
||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||
@@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB) += amd_nb.o
|
||||
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
|
||||
|
||||
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
|
||||
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
|
||||
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
|
||||
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
|
||||
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
|
||||
obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
|
||||
|
@@ -64,6 +64,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
|
||||
c->x86_stepping >= 0x0e))
|
||||
flags->bm_check = 1;
|
||||
}
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
|
||||
/*
|
||||
* All Zhaoxin CPUs that support C3 share cache.
|
||||
* And caches should not be flushed by software while
|
||||
* entering C3 type state.
|
||||
*/
|
||||
flags->bm_check = 1;
|
||||
/*
|
||||
* On all recent Zhaoxin platforms, ARB_DISABLE is a nop.
|
||||
* So, set bm_control to zero to indicate that ARB_DISABLE
|
||||
* is not required while entering C3 type state.
|
||||
*/
|
||||
flags->bm_control = 0;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
|
||||
|
||||
|
@@ -14,6 +14,7 @@
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/bsearch.h>
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/sections.h>
|
||||
@@ -277,7 +278,7 @@ static inline bool is_jmp(const u8 opcode)
|
||||
}
|
||||
|
||||
static void __init_or_module
|
||||
recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
|
||||
recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
|
||||
{
|
||||
u8 *next_rip, *tgt_rip;
|
||||
s32 n_dspl, o_dspl;
|
||||
@@ -286,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
|
||||
if (a->replacementlen != 5)
|
||||
return;
|
||||
|
||||
o_dspl = *(s32 *)(insnbuf + 1);
|
||||
o_dspl = *(s32 *)(insn_buff + 1);
|
||||
|
||||
/* next_rip of the replacement JMP */
|
||||
next_rip = repl_insn + a->replacementlen;
|
||||
@@ -312,9 +313,9 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
|
||||
two_byte_jmp:
|
||||
n_dspl -= 2;
|
||||
|
||||
insnbuf[0] = 0xeb;
|
||||
insnbuf[1] = (s8)n_dspl;
|
||||
add_nops(insnbuf + 2, 3);
|
||||
insn_buff[0] = 0xeb;
|
||||
insn_buff[1] = (s8)n_dspl;
|
||||
add_nops(insn_buff + 2, 3);
|
||||
|
||||
repl_len = 2;
|
||||
goto done;
|
||||
@@ -322,8 +323,8 @@ two_byte_jmp:
|
||||
five_byte_jmp:
|
||||
n_dspl -= 5;
|
||||
|
||||
insnbuf[0] = 0xe9;
|
||||
*(s32 *)&insnbuf[1] = n_dspl;
|
||||
insn_buff[0] = 0xe9;
|
||||
*(s32 *)&insn_buff[1] = n_dspl;
|
||||
|
||||
repl_len = 5;
|
||||
|
||||
@@ -370,7 +371,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
|
||||
{
|
||||
struct alt_instr *a;
|
||||
u8 *instr, *replacement;
|
||||
u8 insnbuf[MAX_PATCH_LEN];
|
||||
u8 insn_buff[MAX_PATCH_LEN];
|
||||
|
||||
DPRINTK("alt table %px, -> %px", start, end);
|
||||
/*
|
||||
@@ -383,11 +384,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
|
||||
* order.
|
||||
*/
|
||||
for (a = start; a < end; a++) {
|
||||
int insnbuf_sz = 0;
|
||||
int insn_buff_sz = 0;
|
||||
|
||||
instr = (u8 *)&a->instr_offset + a->instr_offset;
|
||||
replacement = (u8 *)&a->repl_offset + a->repl_offset;
|
||||
BUG_ON(a->instrlen > sizeof(insnbuf));
|
||||
BUG_ON(a->instrlen > sizeof(insn_buff));
|
||||
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
|
||||
if (!boot_cpu_has(a->cpuid)) {
|
||||
if (a->padlen > 1)
|
||||
@@ -405,8 +406,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
|
||||
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
|
||||
DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
|
||||
|
||||
memcpy(insnbuf, replacement, a->replacementlen);
|
||||
insnbuf_sz = a->replacementlen;
|
||||
memcpy(insn_buff, replacement, a->replacementlen);
|
||||
insn_buff_sz = a->replacementlen;
|
||||
|
||||
/*
|
||||
* 0xe8 is a relative jump; fix the offset.
|
||||
@@ -414,24 +415,24 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
|
||||
* Instruction length is checked before the opcode to avoid
|
||||
* accessing uninitialized bytes for zero-length replacements.
|
||||
*/
|
||||
if (a->replacementlen == 5 && *insnbuf == 0xe8) {
|
||||
*(s32 *)(insnbuf + 1) += replacement - instr;
|
||||
if (a->replacementlen == 5 && *insn_buff == 0xe8) {
|
||||
*(s32 *)(insn_buff + 1) += replacement - instr;
|
||||
DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
|
||||
*(s32 *)(insnbuf + 1),
|
||||
(unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
|
||||
*(s32 *)(insn_buff + 1),
|
||||
(unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
|
||||
}
|
||||
|
||||
if (a->replacementlen && is_jmp(replacement[0]))
|
||||
recompute_jump(a, instr, replacement, insnbuf);
|
||||
recompute_jump(a, instr, replacement, insn_buff);
|
||||
|
||||
if (a->instrlen > a->replacementlen) {
|
||||
add_nops(insnbuf + a->replacementlen,
|
||||
add_nops(insn_buff + a->replacementlen,
|
||||
a->instrlen - a->replacementlen);
|
||||
insnbuf_sz += a->instrlen - a->replacementlen;
|
||||
insn_buff_sz += a->instrlen - a->replacementlen;
|
||||
}
|
||||
DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
|
||||
DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
|
||||
|
||||
text_poke_early(instr, insnbuf, insnbuf_sz);
|
||||
text_poke_early(instr, insn_buff, insn_buff_sz);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -593,33 +594,119 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
|
||||
struct paravirt_patch_site *end)
|
||||
{
|
||||
struct paravirt_patch_site *p;
|
||||
char insnbuf[MAX_PATCH_LEN];
|
||||
char insn_buff[MAX_PATCH_LEN];
|
||||
|
||||
for (p = start; p < end; p++) {
|
||||
unsigned int used;
|
||||
|
||||
BUG_ON(p->len > MAX_PATCH_LEN);
|
||||
/* prep the buffer with the original instructions */
|
||||
memcpy(insnbuf, p->instr, p->len);
|
||||
used = pv_ops.init.patch(p->instrtype, insnbuf,
|
||||
(unsigned long)p->instr, p->len);
|
||||
memcpy(insn_buff, p->instr, p->len);
|
||||
used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
|
||||
|
||||
BUG_ON(used > p->len);
|
||||
|
||||
/* Pad the rest with nops */
|
||||
add_nops(insnbuf + used, p->len - used);
|
||||
text_poke_early(p->instr, insnbuf, p->len);
|
||||
add_nops(insn_buff + used, p->len - used);
|
||||
text_poke_early(p->instr, insn_buff, p->len);
|
||||
}
|
||||
}
|
||||
extern struct paravirt_patch_site __start_parainstructions[],
|
||||
__stop_parainstructions[];
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
|
||||
/*
|
||||
* Self-test for the INT3 based CALL emulation code.
|
||||
*
|
||||
* This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
|
||||
* properly and that there is a stack gap between the INT3 frame and the
|
||||
* previous context. Without this gap doing a virtual PUSH on the interrupted
|
||||
* stack would corrupt the INT3 IRET frame.
|
||||
*
|
||||
* See entry_{32,64}.S for more details.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We define the int3_magic() function in assembly to control the calling
|
||||
* convention such that we can 'call' it from assembly.
|
||||
*/
|
||||
|
||||
extern void int3_magic(unsigned int *ptr); /* defined in asm */
|
||||
|
||||
asm (
|
||||
" .pushsection .init.text, \"ax\", @progbits\n"
|
||||
" .type int3_magic, @function\n"
|
||||
"int3_magic:\n"
|
||||
" movl $1, (%" _ASM_ARG1 ")\n"
|
||||
" ret\n"
|
||||
" .size int3_magic, .-int3_magic\n"
|
||||
" .popsection\n"
|
||||
);
|
||||
|
||||
extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */
|
||||
|
||||
static int __init
|
||||
int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
|
||||
{
|
||||
struct die_args *args = data;
|
||||
struct pt_regs *regs = args->regs;
|
||||
|
||||
if (!regs || user_mode(regs))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (val != DIE_INT3)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
int3_emulate_call(regs, (unsigned long)&int3_magic);
|
||||
return NOTIFY_STOP;
|
||||
}
|
||||
|
||||
static void __init int3_selftest(void)
|
||||
{
|
||||
static __initdata struct notifier_block int3_exception_nb = {
|
||||
.notifier_call = int3_exception_notify,
|
||||
.priority = INT_MAX-1, /* last */
|
||||
};
|
||||
unsigned int val = 0;
|
||||
|
||||
BUG_ON(register_die_notifier(&int3_exception_nb));
|
||||
|
||||
/*
|
||||
* Basically: int3_magic(&val); but really complicated :-)
|
||||
*
|
||||
* Stick the address of the INT3 instruction into int3_selftest_ip,
|
||||
* then trigger the INT3, padded with NOPs to match a CALL instruction
|
||||
* length.
|
||||
*/
|
||||
asm volatile ("1: int3; nop; nop; nop; nop\n\t"
|
||||
".pushsection .init.data,\"aw\"\n\t"
|
||||
".align " __ASM_SEL(4, 8) "\n\t"
|
||||
".type int3_selftest_ip, @object\n\t"
|
||||
".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t"
|
||||
"int3_selftest_ip:\n\t"
|
||||
__ASM_SEL(.long, .quad) " 1b\n\t"
|
||||
".popsection\n\t"
|
||||
: ASM_CALL_CONSTRAINT
|
||||
: __ASM_SEL_RAW(a, D) (&val)
|
||||
: "memory");
|
||||
|
||||
BUG_ON(val != 1);
|
||||
|
||||
unregister_die_notifier(&int3_exception_nb);
|
||||
}
|
||||
|
||||
void __init alternative_instructions(void)
|
||||
{
|
||||
/* The patching is not fully atomic, so try to avoid local interruptions
|
||||
that might execute the to be patched code.
|
||||
Other CPUs are not running. */
|
||||
int3_selftest();
|
||||
|
||||
/*
|
||||
* The patching is not fully atomic, so try to avoid local
|
||||
* interruptions that might execute the to be patched code.
|
||||
* Other CPUs are not running.
|
||||
*/
|
||||
stop_nmi();
|
||||
|
||||
/*
|
||||
@@ -644,10 +731,11 @@ void __init alternative_instructions(void)
|
||||
_text, _etext);
|
||||
}
|
||||
|
||||
if (!uniproc_patched || num_possible_cpus() == 1)
|
||||
if (!uniproc_patched || num_possible_cpus() == 1) {
|
||||
free_init_pages("SMP alternatives",
|
||||
(unsigned long)__smp_locks,
|
||||
(unsigned long)__smp_locks_end);
|
||||
}
|
||||
#endif
|
||||
|
||||
apply_paravirt(__parainstructions, __parainstructions_end);
|
||||
@@ -848,81 +936,133 @@ static void do_sync_core(void *info)
|
||||
sync_core();
|
||||
}
|
||||
|
||||
static bool bp_patching_in_progress;
|
||||
static void *bp_int3_handler, *bp_int3_addr;
|
||||
static struct bp_patching_desc {
|
||||
struct text_poke_loc *vec;
|
||||
int nr_entries;
|
||||
} bp_patching;
|
||||
|
||||
static int patch_cmp(const void *key, const void *elt)
|
||||
{
|
||||
struct text_poke_loc *tp = (struct text_poke_loc *) elt;
|
||||
|
||||
if (key < tp->addr)
|
||||
return -1;
|
||||
if (key > tp->addr)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(patch_cmp);
|
||||
|
||||
int poke_int3_handler(struct pt_regs *regs)
|
||||
{
|
||||
struct text_poke_loc *tp;
|
||||
unsigned char int3 = 0xcc;
|
||||
void *ip;
|
||||
|
||||
/*
|
||||
* Having observed our INT3 instruction, we now must observe
|
||||
* bp_patching_in_progress.
|
||||
* bp_patching.nr_entries.
|
||||
*
|
||||
* in_progress = TRUE INT3
|
||||
* nr_entries != 0 INT3
|
||||
* WMB RMB
|
||||
* write INT3 if (in_progress)
|
||||
* write INT3 if (nr_entries)
|
||||
*
|
||||
* Idem for bp_int3_handler.
|
||||
* Idem for other elements in bp_patching.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
if (likely(!bp_patching_in_progress))
|
||||
if (likely(!bp_patching.nr_entries))
|
||||
return 0;
|
||||
|
||||
if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
|
||||
if (user_mode(regs))
|
||||
return 0;
|
||||
|
||||
/* set up the specified breakpoint handler */
|
||||
regs->ip = (unsigned long) bp_int3_handler;
|
||||
/*
|
||||
* Discount the sizeof(int3). See text_poke_bp_batch().
|
||||
*/
|
||||
ip = (void *) regs->ip - sizeof(int3);
|
||||
|
||||
/*
|
||||
* Skip the binary search if there is a single member in the vector.
|
||||
*/
|
||||
if (unlikely(bp_patching.nr_entries > 1)) {
|
||||
tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
|
||||
sizeof(struct text_poke_loc),
|
||||
patch_cmp);
|
||||
if (!tp)
|
||||
return 0;
|
||||
} else {
|
||||
tp = bp_patching.vec;
|
||||
if (tp->addr != ip)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* set up the specified breakpoint detour */
|
||||
regs->ip = (unsigned long) tp->detour;
|
||||
|
||||
return 1;
|
||||
}
|
||||
NOKPROBE_SYMBOL(poke_int3_handler);
|
||||
|
||||
/**
|
||||
* text_poke_bp() -- update instructions on live kernel on SMP
|
||||
* @addr: address to patch
|
||||
* @opcode: opcode of new instruction
|
||||
* @len: length to copy
|
||||
* @handler: address to jump to when the temporary breakpoint is hit
|
||||
* text_poke_bp_batch() -- update instructions on live kernel on SMP
|
||||
* @tp: vector of instructions to patch
|
||||
* @nr_entries: number of entries in the vector
|
||||
*
|
||||
* Modify multi-byte instruction by using int3 breakpoint on SMP.
|
||||
* We completely avoid stop_machine() here, and achieve the
|
||||
* synchronization using int3 breakpoint.
|
||||
*
|
||||
* The way it is done:
|
||||
* - add a int3 trap to the address that will be patched
|
||||
* - For each entry in the vector:
|
||||
* - add a int3 trap to the address that will be patched
|
||||
* - sync cores
|
||||
* - update all but the first byte of the patched range
|
||||
* - For each entry in the vector:
|
||||
* - update all but the first byte of the patched range
|
||||
* - sync cores
|
||||
* - replace the first byte (int3) by the first byte of
|
||||
* replacing opcode
|
||||
* - For each entry in the vector:
|
||||
* - replace the first byte (int3) by the first byte of
|
||||
* replacing opcode
|
||||
* - sync cores
|
||||
*/
|
||||
void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
|
||||
void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
|
||||
{
|
||||
int patched_all_but_first = 0;
|
||||
unsigned char int3 = 0xcc;
|
||||
|
||||
bp_int3_handler = handler;
|
||||
bp_int3_addr = (u8 *)addr + sizeof(int3);
|
||||
bp_patching_in_progress = true;
|
||||
unsigned int i;
|
||||
|
||||
lockdep_assert_held(&text_mutex);
|
||||
|
||||
bp_patching.vec = tp;
|
||||
bp_patching.nr_entries = nr_entries;
|
||||
|
||||
/*
|
||||
* Corresponding read barrier in int3 notifier for making sure the
|
||||
* in_progress and handler are correctly ordered wrt. patching.
|
||||
* nr_entries and handler are correctly ordered wrt. patching.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
text_poke(addr, &int3, sizeof(int3));
|
||||
/*
|
||||
* First step: add a int3 trap to the address that will be patched.
|
||||
*/
|
||||
for (i = 0; i < nr_entries; i++)
|
||||
text_poke(tp[i].addr, &int3, sizeof(int3));
|
||||
|
||||
on_each_cpu(do_sync_core, NULL, 1);
|
||||
|
||||
if (len - sizeof(int3) > 0) {
|
||||
/* patch all but the first byte */
|
||||
text_poke((char *)addr + sizeof(int3),
|
||||
(const char *) opcode + sizeof(int3),
|
||||
len - sizeof(int3));
|
||||
/*
|
||||
* Second step: update all but the first byte of the patched range.
|
||||
*/
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
if (tp[i].len - sizeof(int3) > 0) {
|
||||
text_poke((char *)tp[i].addr + sizeof(int3),
|
||||
(const char *)tp[i].opcode + sizeof(int3),
|
||||
tp[i].len - sizeof(int3));
|
||||
patched_all_but_first++;
|
||||
}
|
||||
}
|
||||
|
||||
if (patched_all_but_first) {
|
||||
/*
|
||||
* According to Intel, this core syncing is very likely
|
||||
* not necessary and we'd be safe even without it. But
|
||||
@@ -931,14 +1071,47 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
|
||||
on_each_cpu(do_sync_core, NULL, 1);
|
||||
}
|
||||
|
||||
/* patch the first byte */
|
||||
text_poke(addr, opcode, sizeof(int3));
|
||||
/*
|
||||
* Third step: replace the first byte (int3) by the first byte of
|
||||
* replacing opcode.
|
||||
*/
|
||||
for (i = 0; i < nr_entries; i++)
|
||||
text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
|
||||
|
||||
on_each_cpu(do_sync_core, NULL, 1);
|
||||
/*
|
||||
* sync_core() implies an smp_mb() and orders this store against
|
||||
* the writing of the new instruction.
|
||||
*/
|
||||
bp_patching_in_progress = false;
|
||||
bp_patching.vec = NULL;
|
||||
bp_patching.nr_entries = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* text_poke_bp() -- update instructions on live kernel on SMP
|
||||
* @addr: address to patch
|
||||
* @opcode: opcode of new instruction
|
||||
* @len: length to copy
|
||||
* @handler: address to jump to when the temporary breakpoint is hit
|
||||
*
|
||||
* Update a single instruction with the vector in the stack, avoiding
|
||||
* dynamically allocated memory. This function should be used when it is
|
||||
* not possible to allocate memory.
|
||||
*/
|
||||
void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
|
||||
{
|
||||
struct text_poke_loc tp = {
|
||||
.detour = handler,
|
||||
.addr = addr,
|
||||
.len = len,
|
||||
};
|
||||
|
||||
if (len > POKE_MAX_OPCODE_SIZE) {
|
||||
WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy((void *)tp.opcode, opcode, len);
|
||||
|
||||
text_poke_bp_batch(&tp, 1);
|
||||
}
|
||||
|
@@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Shared support code for AMD K8 northbridges and derivates.
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
@@ -71,7 +72,7 @@ static const struct pci_device_id hygon_root_ids[] = {
|
||||
{}
|
||||
};
|
||||
|
||||
const struct pci_device_id hygon_nb_misc_ids[] = {
|
||||
static const struct pci_device_id hygon_nb_misc_ids[] = {
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
|
||||
{}
|
||||
};
|
||||
|
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
|
||||
/*
|
||||
* Debug level, exported for io_apic.c
|
||||
*/
|
||||
unsigned int apic_verbosity;
|
||||
int apic_verbosity;
|
||||
|
||||
int pic_mode;
|
||||
|
||||
@@ -195,7 +195,7 @@ static struct resource lapic_resource = {
|
||||
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
|
||||
};
|
||||
|
||||
unsigned int lapic_timer_frequency = 0;
|
||||
unsigned int lapic_timer_period = 0;
|
||||
|
||||
static void apic_pm_activate(void);
|
||||
|
||||
@@ -501,7 +501,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
|
||||
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
|
||||
return 0;
|
||||
|
||||
__setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
|
||||
__setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -805,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
|
||||
|
||||
static int __init lapic_init_clockevent(void)
|
||||
{
|
||||
if (!lapic_timer_frequency)
|
||||
if (!lapic_timer_period)
|
||||
return -1;
|
||||
|
||||
/* Calculate the scaled math multiplication factor */
|
||||
lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
|
||||
lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
|
||||
TICK_NSEC, lapic_clockevent.shift);
|
||||
lapic_clockevent.max_delta_ns =
|
||||
clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
|
||||
@@ -821,6 +821,33 @@ static int __init lapic_init_clockevent(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool __init apic_needs_pit(void)
|
||||
{
|
||||
/*
|
||||
* If the frequencies are not known, PIT is required for both TSC
|
||||
* and apic timer calibration.
|
||||
*/
|
||||
if (!tsc_khz || !cpu_khz)
|
||||
return true;
|
||||
|
||||
/* Is there an APIC at all? */
|
||||
if (!boot_cpu_has(X86_FEATURE_APIC))
|
||||
return true;
|
||||
|
||||
/* Deadline timer is based on TSC so no further PIT action required */
|
||||
if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
|
||||
return false;
|
||||
|
||||
/* APIC timer disabled? */
|
||||
if (disable_apic_timer)
|
||||
return true;
|
||||
/*
|
||||
* The APIC timer frequency is known already, no PIT calibration
|
||||
* required. If unknown, let the PIT be initialized.
|
||||
*/
|
||||
return lapic_timer_period == 0;
|
||||
}
|
||||
|
||||
static int __init calibrate_APIC_clock(void)
|
||||
{
|
||||
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
|
||||
@@ -839,7 +866,7 @@ static int __init calibrate_APIC_clock(void)
|
||||
*/
|
||||
if (!lapic_init_clockevent()) {
|
||||
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
|
||||
lapic_timer_frequency);
|
||||
lapic_timer_period);
|
||||
/*
|
||||
* Direct calibration methods must have an always running
|
||||
* local APIC timer, no need for broadcast timer.
|
||||
@@ -884,13 +911,13 @@ static int __init calibrate_APIC_clock(void)
|
||||
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
|
||||
&delta, &deltatsc);
|
||||
|
||||
lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
|
||||
lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
|
||||
lapic_init_clockevent();
|
||||
|
||||
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
|
||||
apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
|
||||
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
|
||||
lapic_timer_frequency);
|
||||
lapic_timer_period);
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_TSC)) {
|
||||
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
|
||||
@@ -901,13 +928,13 @@ static int __init calibrate_APIC_clock(void)
|
||||
|
||||
apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
|
||||
"%u.%04u MHz.\n",
|
||||
lapic_timer_frequency / (1000000 / HZ),
|
||||
lapic_timer_frequency % (1000000 / HZ));
|
||||
lapic_timer_period / (1000000 / HZ),
|
||||
lapic_timer_period % (1000000 / HZ));
|
||||
|
||||
/*
|
||||
* Do a sanity check on the APIC calibration result
|
||||
*/
|
||||
if (lapic_timer_frequency < (1000000 / HZ)) {
|
||||
if (lapic_timer_period < (1000000 / HZ)) {
|
||||
local_irq_enable();
|
||||
pr_warning("APIC frequency too slow, disabling apic timer\n");
|
||||
return -1;
|
||||
@@ -1351,6 +1378,8 @@ void __init init_bsp_APIC(void)
|
||||
apic_write(APIC_LVT1, value);
|
||||
}
|
||||
|
||||
static void __init apic_bsp_setup(bool upmode);
|
||||
|
||||
/* Init the interrupt delivery mode for the BSP */
|
||||
void __init apic_intr_mode_init(void)
|
||||
{
|
||||
@@ -1464,7 +1493,8 @@ static void apic_pending_intr_clear(void)
|
||||
if (queued) {
|
||||
if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
|
||||
ntsc = rdtsc();
|
||||
max_loops = (cpu_khz << 10) - (ntsc - tsc);
|
||||
max_loops = (long long)cpu_khz << 10;
|
||||
max_loops -= ntsc - tsc;
|
||||
} else {
|
||||
max_loops--;
|
||||
}
|
||||
@@ -2040,21 +2070,32 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
|
||||
entering_irq();
|
||||
trace_spurious_apic_entry(vector);
|
||||
|
||||
/*
|
||||
* Check if this really is a spurious interrupt and ACK it
|
||||
* if it is a vectored one. Just in case...
|
||||
* Spurious interrupts should not be ACKed.
|
||||
*/
|
||||
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
|
||||
if (v & (1 << (vector & 0x1f)))
|
||||
ack_APIC_irq();
|
||||
|
||||
inc_irq_stat(irq_spurious_count);
|
||||
|
||||
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
|
||||
pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
|
||||
"should never happen.\n", vector, smp_processor_id());
|
||||
/*
|
||||
* If this is a spurious interrupt then do not acknowledge
|
||||
*/
|
||||
if (vector == SPURIOUS_APIC_VECTOR) {
|
||||
/* See SDM vol 3 */
|
||||
pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
|
||||
smp_processor_id());
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If it is a vectored one, verify it's set in the ISR. If set,
|
||||
* acknowledge it.
|
||||
*/
|
||||
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
|
||||
if (v & (1 << (vector & 0x1f))) {
|
||||
pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
|
||||
vector, smp_processor_id());
|
||||
ack_APIC_irq();
|
||||
} else {
|
||||
pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
|
||||
vector, smp_processor_id());
|
||||
}
|
||||
out:
|
||||
trace_spurious_apic_exit(vector);
|
||||
exiting_irq();
|
||||
}
|
||||
@@ -2415,11 +2456,8 @@ static void __init apic_bsp_up_setup(void)
|
||||
/**
|
||||
* apic_bsp_setup - Setup function for local apic and io-apic
|
||||
* @upmode: Force UP mode (for APIC_init_uniprocessor)
|
||||
*
|
||||
* Returns:
|
||||
* apic_id of BSP APIC
|
||||
*/
|
||||
void __init apic_bsp_setup(bool upmode)
|
||||
static void __init apic_bsp_setup(bool upmode)
|
||||
{
|
||||
connect_bsp_APIC();
|
||||
if (upmode)
|
||||
|
@@ -78,7 +78,7 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
if (cpu < BITS_PER_LONG)
|
||||
clear_bit(cpu, &mask);
|
||||
__clear_bit(cpu, &mask);
|
||||
|
||||
_flat_send_IPI_mask(mask, vector);
|
||||
}
|
||||
@@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int vector)
|
||||
unsigned long mask = cpumask_bits(cpu_online_mask)[0];
|
||||
|
||||
if (cpu < BITS_PER_LONG)
|
||||
clear_bit(cpu, &mask);
|
||||
__clear_bit(cpu, &mask);
|
||||
|
||||
_flat_send_IPI_mask(mask, vector);
|
||||
}
|
||||
|
@@ -58,6 +58,7 @@
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/dma.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/time.h>
|
||||
#include <asm/i8259.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/irq_remapping.h>
|
||||
@@ -1893,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interrupt shutdown masks the ioapic pin, but the interrupt might already
|
||||
* be in flight, but not yet serviced by the target CPU. That means
|
||||
* __synchronize_hardirq() would return and claim that everything is calmed
|
||||
* down. So free_irq() would proceed and deactivate the interrupt and free
|
||||
* resources.
|
||||
*
|
||||
* Once the target CPU comes around to service it it will find a cleared
|
||||
* vector and complain. While the spurious interrupt is harmless, the full
|
||||
* release of resources might prevent the interrupt from being acknowledged
|
||||
* which keeps the hardware in a weird state.
|
||||
*
|
||||
* Verify that the corresponding Remote-IRR bits are clear.
|
||||
*/
|
||||
static int ioapic_irq_get_chip_state(struct irq_data *irqd,
|
||||
enum irqchip_irq_state which,
|
||||
bool *state)
|
||||
{
|
||||
struct mp_chip_data *mcd = irqd->chip_data;
|
||||
struct IO_APIC_route_entry rentry;
|
||||
struct irq_pin_list *p;
|
||||
|
||||
if (which != IRQCHIP_STATE_ACTIVE)
|
||||
return -EINVAL;
|
||||
|
||||
*state = false;
|
||||
raw_spin_lock(&ioapic_lock);
|
||||
for_each_irq_pin(p, mcd->irq_2_pin) {
|
||||
rentry = __ioapic_read_entry(p->apic, p->pin);
|
||||
/*
|
||||
* The remote IRR is only valid in level trigger mode. It's
|
||||
* meaning is undefined for edge triggered interrupts and
|
||||
* irrelevant because the IO-APIC treats them as fire and
|
||||
* forget.
|
||||
*/
|
||||
if (rentry.irr && rentry.trigger) {
|
||||
*state = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&ioapic_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct irq_chip ioapic_chip __read_mostly = {
|
||||
.name = "IO-APIC",
|
||||
.irq_startup = startup_ioapic_irq,
|
||||
@@ -1902,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
|
||||
.irq_eoi = ioapic_ack_level,
|
||||
.irq_set_affinity = ioapic_set_affinity,
|
||||
.irq_retrigger = irq_chip_retrigger_hierarchy,
|
||||
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
|
||||
.flags = IRQCHIP_SKIP_SET_WAKE,
|
||||
};
|
||||
|
||||
@@ -1914,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
|
||||
.irq_eoi = ioapic_ir_ack_level,
|
||||
.irq_set_affinity = ioapic_set_affinity,
|
||||
.irq_retrigger = irq_chip_retrigger_hierarchy,
|
||||
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
|
||||
.flags = IRQCHIP_SKIP_SET_WAKE,
|
||||
};
|
||||
|
||||
@@ -2083,6 +2130,9 @@ static inline void __init check_timer(void)
|
||||
unsigned long flags;
|
||||
int no_pin1 = 0;
|
||||
|
||||
if (!global_clock_event)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
/*
|
||||
|
@@ -1,3 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Support of MSI, HPET and DMAR interrupts.
|
||||
*
|
||||
@@ -5,10 +6,6 @@
|
||||
* Moved from arch/x86/kernel/apic/io_apic.c.
|
||||
* Jiang Liu <jiang.liu@linux.intel.com>
|
||||
* Convert to hierarchical irqdomain
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/interrupt.h>
|
||||
@@ -373,14 +370,14 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
|
||||
return d;
|
||||
}
|
||||
|
||||
int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
|
||||
int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
|
||||
int dev_num)
|
||||
{
|
||||
struct irq_alloc_info info;
|
||||
|
||||
init_irq_alloc_info(&info, NULL);
|
||||
info.type = X86_IRQ_ALLOC_TYPE_HPET;
|
||||
info.hpet_data = dev;
|
||||
info.hpet_data = hc;
|
||||
info.hpet_id = hpet_dev_id(domain);
|
||||
info.hpet_index = dev_num;
|
||||
|
||||
|
@@ -1,3 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Local APIC related interfaces to support IOAPIC, MSI, etc.
|
||||
*
|
||||
@@ -5,10 +6,6 @@
|
||||
* Moved from arch/x86/kernel/apic/io_apic.c.
|
||||
* Jiang Liu <jiang.liu@linux.intel.com>
|
||||
* Enable support of hierarchical irqdomains
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
@@ -343,7 +340,7 @@ static void clear_irq_vector(struct irq_data *irqd)
|
||||
trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
|
||||
apicd->prev_cpu);
|
||||
|
||||
per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
|
||||
per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
|
||||
irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
|
||||
apicd->vector = 0;
|
||||
|
||||
@@ -352,7 +349,7 @@ static void clear_irq_vector(struct irq_data *irqd)
|
||||
if (!vector)
|
||||
return;
|
||||
|
||||
per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
|
||||
per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
|
||||
irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
|
||||
apicd->prev_vector = 0;
|
||||
apicd->move_in_progress = 0;
|
||||
|
@@ -50,7 +50,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
|
||||
cpumask_copy(tmpmsk, mask);
|
||||
/* If IPI should not be sent to self, clear current CPU */
|
||||
if (apic_dest != APIC_DEST_ALLINC)
|
||||
cpumask_clear_cpu(smp_processor_id(), tmpmsk);
|
||||
__cpumask_clear_cpu(smp_processor_id(), tmpmsk);
|
||||
|
||||
/* Collapse cpus in a cluster so a single IPI per cluster is sent */
|
||||
for_each_cpu(cpu, tmpmsk) {
|
||||
|
@@ -38,7 +38,6 @@ static void __used common(void)
|
||||
#endif
|
||||
|
||||
BLANK();
|
||||
OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
|
||||
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
|
||||
|
||||
BLANK();
|
||||
@@ -77,6 +76,7 @@ static void __used common(void)
|
||||
BLANK();
|
||||
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
|
||||
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
|
||||
OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
|
||||
#endif
|
||||
|
||||
BLANK();
|
||||
|
@@ -24,6 +24,7 @@ obj-y += match.o
|
||||
obj-y += bugs.o
|
||||
obj-y += aperfmperf.o
|
||||
obj-y += cpuid-deps.o
|
||||
obj-y += umwait.o
|
||||
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
||||
@@ -38,6 +39,7 @@ obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
|
||||
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
|
||||
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
|
||||
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
|
||||
obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o
|
||||
|
||||
obj-$(CONFIG_X86_MCE) += mce/
|
||||
obj-$(CONFIG_MTRR) += mtrr/
|
||||
@@ -47,6 +49,7 @@ obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
|
||||
|
||||
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
|
||||
obj-$(CONFIG_ACRN_GUEST) += acrn.o
|
||||
|
||||
ifdef CONFIG_X86_FEATURE_NAMES
|
||||
quiet_cmd_mkcapflags = MKCAP $@
|
||||
@@ -54,8 +57,7 @@ quiet_cmd_mkcapflags = MKCAP $@
|
||||
|
||||
cpufeature = $(src)/../../include/asm/cpufeatures.h
|
||||
|
||||
targets += capflags.c
|
||||
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
|
||||
$(call if_changed,mkcapflags)
|
||||
endif
|
||||
clean-files += capflags.c
|
||||
targets += capflags.c
|
||||
|
69
arch/x86/kernel/cpu/acrn.c
Normal file
69
arch/x86/kernel/cpu/acrn.c
Normal file
@@ -0,0 +1,69 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* ACRN detection support
|
||||
*
|
||||
* Copyright (C) 2019 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* Jason Chen CJ <jason.cj.chen@intel.com>
|
||||
* Zhao Yakui <yakui.zhao@intel.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <asm/acrn.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/irq_regs.h>
|
||||
|
||||
static uint32_t __init acrn_detect(void)
|
||||
{
|
||||
return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0);
|
||||
}
|
||||
|
||||
static void __init acrn_init_platform(void)
|
||||
{
|
||||
/* Setup the IDT for ACRN hypervisor callback */
|
||||
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector);
|
||||
}
|
||||
|
||||
static bool acrn_x2apic_available(void)
|
||||
{
|
||||
/*
|
||||
* x2apic is not supported for now. Future enablement will have to check
|
||||
* X86_FEATURE_X2APIC to determine whether x2apic is supported in the
|
||||
* guest.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
static void (*acrn_intr_handler)(void);
|
||||
|
||||
__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
/*
|
||||
* The hypervisor requires that the APIC EOI should be acked.
|
||||
* If the APIC EOI is not acked, the APIC ISR bit for the
|
||||
* HYPERVISOR_CALLBACK_VECTOR will not be cleared and then it
|
||||
* will block the interrupt whose vector is lower than
|
||||
* HYPERVISOR_CALLBACK_VECTOR.
|
||||
*/
|
||||
entering_ack_irq();
|
||||
inc_irq_stat(irq_hv_callback_count);
|
||||
|
||||
if (acrn_intr_handler)
|
||||
acrn_intr_handler();
|
||||
|
||||
exiting_irq();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
const __initconst struct hypervisor_x86 x86_hyper_acrn = {
|
||||
.name = "ACRN",
|
||||
.detect = acrn_detect,
|
||||
.type = X86_HYPER_ACRN,
|
||||
.init.init_platform = acrn_init_platform,
|
||||
.init.x2apic_available = acrn_x2apic_available,
|
||||
};
|
@@ -13,6 +13,7 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
@@ -85,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu)
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
|
||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
||||
return 0;
|
||||
|
||||
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
|
||||
return per_cpu(samples.khz, cpu);
|
||||
}
|
||||
@@ -101,9 +105,12 @@ void arch_freq_prepare_all(void)
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return;
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
for_each_online_cpu(cpu) {
|
||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
||||
continue;
|
||||
if (!aperfmperf_snapshot_cpu(cpu, now, false))
|
||||
wait = true;
|
||||
}
|
||||
|
||||
if (wait)
|
||||
msleep(APERFMPERF_REFRESH_DELAY_MS);
|
||||
@@ -117,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu)
|
||||
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
|
||||
if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
|
||||
return 0;
|
||||
|
||||
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
|
||||
return per_cpu(samples.khz, cpu);
|
||||
|
||||
|
@@ -835,6 +835,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
|
||||
* bit in the mask to allow guests to use the mitigation even in the
|
||||
* case where the host does not enable it.
|
||||
*/
|
||||
if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
|
||||
static_cpu_has(X86_FEATURE_AMD_SSBD)) {
|
||||
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have three CPU feature flags that are in play here:
|
||||
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
|
||||
@@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
|
||||
x86_amd_ssb_disable();
|
||||
} else {
|
||||
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
|
||||
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
|
||||
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
|
||||
}
|
||||
}
|
||||
|
@@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
|
||||
if (c->x86 < 0x17) {
|
||||
/* LLC is at the node level. */
|
||||
per_cpu(cpu_llc_id, cpu) = node_id;
|
||||
} else if (c->x86 == 0x17 &&
|
||||
c->x86_model >= 0 && c->x86_model <= 0x1F) {
|
||||
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
|
||||
/*
|
||||
* LLC is at the core complex level.
|
||||
* Core complex ID is ApicId[3] for these processors.
|
||||
|
@@ -366,6 +366,77 @@ out:
|
||||
cr4_clear_bits(X86_CR4_UMIP);
|
||||
}
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
|
||||
static unsigned long cr4_pinned_bits __ro_after_init;
|
||||
|
||||
void native_write_cr0(unsigned long val)
|
||||
{
|
||||
unsigned long bits_missing = 0;
|
||||
|
||||
set_register:
|
||||
asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
|
||||
|
||||
if (static_branch_likely(&cr_pinning)) {
|
||||
if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
|
||||
bits_missing = X86_CR0_WP;
|
||||
val |= bits_missing;
|
||||
goto set_register;
|
||||
}
|
||||
/* Warn after we've set the missing bits. */
|
||||
WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(native_write_cr0);
|
||||
|
||||
void native_write_cr4(unsigned long val)
|
||||
{
|
||||
unsigned long bits_missing = 0;
|
||||
|
||||
set_register:
|
||||
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
|
||||
|
||||
if (static_branch_likely(&cr_pinning)) {
|
||||
if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
|
||||
bits_missing = ~val & cr4_pinned_bits;
|
||||
val |= bits_missing;
|
||||
goto set_register;
|
||||
}
|
||||
/* Warn after we've set the missing bits. */
|
||||
WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
|
||||
bits_missing);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(native_write_cr4);
|
||||
|
||||
void cr4_init(void)
|
||||
{
|
||||
unsigned long cr4 = __read_cr4();
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PCID))
|
||||
cr4 |= X86_CR4_PCIDE;
|
||||
if (static_branch_likely(&cr_pinning))
|
||||
cr4 |= cr4_pinned_bits;
|
||||
|
||||
__write_cr4(cr4);
|
||||
|
||||
/* Initialize cr4 shadow for this CPU. */
|
||||
this_cpu_write(cpu_tlbstate.cr4, cr4);
|
||||
}
|
||||
|
||||
/*
|
||||
* Once CPU feature detection is finished (and boot params have been
|
||||
* parsed), record any of the sensitive CR bits that are set, and
|
||||
* enable CR pinning.
|
||||
*/
|
||||
static void __init setup_cr_pinning(void)
|
||||
{
|
||||
unsigned long mask;
|
||||
|
||||
mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
|
||||
cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
|
||||
static_key_enable(&cr_pinning.key);
|
||||
}
|
||||
|
||||
/*
|
||||
* Protection Keys are not available in 32-bit mode.
|
||||
*/
|
||||
@@ -801,6 +872,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
static void init_cqm(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
|
||||
c->x86_cache_max_rmid = -1;
|
||||
c->x86_cache_occ_scale = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* will be overridden if occupancy monitoring exists */
|
||||
c->x86_cache_max_rmid = cpuid_ebx(0xf);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
|
||||
cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
|
||||
cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
|
||||
cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
c->x86_cache_max_rmid = ecx;
|
||||
c->x86_cache_occ_scale = ebx;
|
||||
}
|
||||
}
|
||||
|
||||
void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
@@ -823,6 +918,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
c->x86_capability[CPUID_7_0_EBX] = ebx;
|
||||
c->x86_capability[CPUID_7_ECX] = ecx;
|
||||
c->x86_capability[CPUID_7_EDX] = edx;
|
||||
|
||||
/* Check valid sub-leaf index before accessing it */
|
||||
if (eax >= 1) {
|
||||
cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
|
||||
c->x86_capability[CPUID_7_1_EAX] = eax;
|
||||
}
|
||||
}
|
||||
|
||||
/* Extended state features: level 0x0000000d */
|
||||
@@ -832,33 +933,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
c->x86_capability[CPUID_D_1_EAX] = eax;
|
||||
}
|
||||
|
||||
/* Additional Intel-defined flags: level 0x0000000F */
|
||||
if (c->cpuid_level >= 0x0000000F) {
|
||||
|
||||
/* QoS sub-leaf, EAX=0Fh, ECX=0 */
|
||||
cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
|
||||
c->x86_capability[CPUID_F_0_EDX] = edx;
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
|
||||
/* will be overridden if occupancy monitoring exists */
|
||||
c->x86_cache_max_rmid = ebx;
|
||||
|
||||
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
|
||||
cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
|
||||
c->x86_capability[CPUID_F_1_EDX] = edx;
|
||||
|
||||
if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
|
||||
((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
|
||||
(cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
|
||||
c->x86_cache_max_rmid = ecx;
|
||||
c->x86_cache_occ_scale = ebx;
|
||||
}
|
||||
} else {
|
||||
c->x86_cache_max_rmid = -1;
|
||||
c->x86_cache_occ_scale = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* AMD-defined flags: level 0x80000001 */
|
||||
eax = cpuid_eax(0x80000000);
|
||||
c->extended_cpuid_level = eax;
|
||||
@@ -889,6 +963,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
|
||||
|
||||
init_scattered_cpuid_features(c);
|
||||
init_speculation_control(c);
|
||||
init_cqm(c);
|
||||
|
||||
/*
|
||||
* Clear/Set all flags overridden by options, after probe.
|
||||
@@ -1299,6 +1374,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
|
||||
cpu, apicid, c->initial_apicid);
|
||||
}
|
||||
BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
|
||||
BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
|
||||
#else
|
||||
c->logical_proc_id = 0;
|
||||
#endif
|
||||
@@ -1464,6 +1540,7 @@ void __init identify_boot_cpu(void)
|
||||
enable_sep_cpu();
|
||||
#endif
|
||||
cpu_detect_tlb(&boot_cpu_data);
|
||||
setup_cr_pinning();
|
||||
}
|
||||
|
||||
void identify_secondary_cpu(struct cpuinfo_x86 *c)
|
||||
@@ -1698,12 +1775,6 @@ void cpu_init(void)
|
||||
|
||||
wait_for_master_cpu(cpu);
|
||||
|
||||
/*
|
||||
* Initialize the CR4 shadow before doing anything that could
|
||||
* try to read it.
|
||||
*/
|
||||
cr4_init_shadow();
|
||||
|
||||
if (cpu)
|
||||
load_ucode_ap();
|
||||
|
||||
@@ -1798,12 +1869,6 @@ void cpu_init(void)
|
||||
|
||||
wait_for_master_cpu(cpu);
|
||||
|
||||
/*
|
||||
* Initialize the CR4 shadow before doing anything that could
|
||||
* try to read it.
|
||||
*/
|
||||
cr4_init_shadow();
|
||||
|
||||
show_ucode_info_early();
|
||||
|
||||
pr_info("Initializing CPU#%d\n", cpu);
|
||||
|
@@ -20,6 +20,7 @@ struct cpuid_dep {
|
||||
* but it's difficult to tell that to the init reference checker.
|
||||
*/
|
||||
static const struct cpuid_dep cpuid_deps[] = {
|
||||
{ X86_FEATURE_FXSR, X86_FEATURE_FPU },
|
||||
{ X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
|
||||
@@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = {
|
||||
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_CMOV, X86_FEATURE_FXSR },
|
||||
{ X86_FEATURE_MMX, X86_FEATURE_FXSR },
|
||||
{ X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
|
||||
{ X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
|
||||
{ X86_FEATURE_XSAVE, X86_FEATURE_FXSR },
|
||||
{ X86_FEATURE_XMM, X86_FEATURE_FXSR },
|
||||
{ X86_FEATURE_XMM2, X86_FEATURE_XMM },
|
||||
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||
@@ -59,6 +64,10 @@ static const struct cpuid_dep cpuid_deps[] = {
|
||||
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC },
|
||||
{ X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
|
||||
{ X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
|
||||
{ X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
|
||||
{}
|
||||
};
|
||||
|
||||
|
@@ -26,13 +26,6 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hypervisor.h>
|
||||
|
||||
extern const struct hypervisor_x86 x86_hyper_vmware;
|
||||
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
|
||||
extern const struct hypervisor_x86 x86_hyper_xen_pv;
|
||||
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
|
||||
extern const struct hypervisor_x86 x86_hyper_kvm;
|
||||
extern const struct hypervisor_x86 x86_hyper_jailhouse;
|
||||
|
||||
static const __initconst struct hypervisor_x86 * const hypervisors[] =
|
||||
{
|
||||
#ifdef CONFIG_XEN_PV
|
||||
@@ -49,11 +42,22 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
|
||||
#ifdef CONFIG_JAILHOUSE_GUEST
|
||||
&x86_hyper_jailhouse,
|
||||
#endif
|
||||
#ifdef CONFIG_ACRN_GUEST
|
||||
&x86_hyper_acrn,
|
||||
#endif
|
||||
};
|
||||
|
||||
enum x86_hypervisor_type x86_hyper_type;
|
||||
EXPORT_SYMBOL(x86_hyper_type);
|
||||
|
||||
bool __initdata nopv;
|
||||
static __init int parse_nopv(char *arg)
|
||||
{
|
||||
nopv = true;
|
||||
return 0;
|
||||
}
|
||||
early_param("nopv", parse_nopv);
|
||||
|
||||
static inline const struct hypervisor_x86 * __init
|
||||
detect_hypervisor_vendor(void)
|
||||
{
|
||||
@@ -61,6 +65,9 @@ detect_hypervisor_vendor(void)
|
||||
uint32_t pri, max_pri = 0;
|
||||
|
||||
for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
|
||||
if (unlikely(nopv) && !(*p)->ignore_nopv)
|
||||
continue;
|
||||
|
||||
pri = (*p)->detect();
|
||||
if (pri > max_pri) {
|
||||
max_pri = pri;
|
||||
|
@@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Processors which have self-snooping capability can handle conflicting
|
||||
* memory type across CPUs by snooping its own cache. However, there exists
|
||||
* CPU models in which having conflicting memory types still leads to
|
||||
* unpredictable behavior, machine check errors, or hangs. Clear this
|
||||
* feature to prevent its use on machines with known erratas.
|
||||
*/
|
||||
static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
|
||||
{
|
||||
switch (c->x86_model) {
|
||||
case INTEL_FAM6_CORE_YONAH:
|
||||
case INTEL_FAM6_CORE2_MEROM:
|
||||
case INTEL_FAM6_CORE2_MEROM_L:
|
||||
case INTEL_FAM6_CORE2_PENRYN:
|
||||
case INTEL_FAM6_CORE2_DUNNINGTON:
|
||||
case INTEL_FAM6_NEHALEM:
|
||||
case INTEL_FAM6_NEHALEM_G:
|
||||
case INTEL_FAM6_NEHALEM_EP:
|
||||
case INTEL_FAM6_NEHALEM_EX:
|
||||
case INTEL_FAM6_WESTMERE:
|
||||
case INTEL_FAM6_WESTMERE_EP:
|
||||
case INTEL_FAM6_SANDYBRIDGE:
|
||||
setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
|
||||
}
|
||||
}
|
||||
|
||||
static bool ring3mwait_disabled __read_mostly;
|
||||
|
||||
static int __init ring3mwait_disable(char *__unused)
|
||||
@@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
|
||||
}
|
||||
|
||||
check_mpx_erratum(c);
|
||||
check_memory_type_self_snoop_errata(c);
|
||||
|
||||
/*
|
||||
* Get the number of SMT siblings early from the extended topology
|
||||
|
@@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = {
|
||||
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
|
||||
};
|
||||
|
||||
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
|
||||
{
|
||||
[0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
|
||||
};
|
||||
|
||||
static const char *smca_get_name(enum smca_bank_types t)
|
||||
{
|
||||
if (t >= N_SMCA_BANK_TYPES)
|
||||
@@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
|
||||
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
|
||||
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
|
||||
|
||||
/* Map of banks that have more than MCA_MISC0 available. */
|
||||
static DEFINE_PER_CPU(u32, smca_misc_banks_map);
|
||||
|
||||
static void amd_threshold_interrupt(void);
|
||||
static void amd_deferred_error_interrupt(void);
|
||||
|
||||
@@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void)
|
||||
}
|
||||
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
|
||||
|
||||
static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
|
||||
{
|
||||
u32 low, high;
|
||||
|
||||
/*
|
||||
* For SMCA enabled processors, BLKPTR field of the first MISC register
|
||||
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
|
||||
*/
|
||||
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
|
||||
return;
|
||||
|
||||
if (!(low & MCI_CONFIG_MCAX))
|
||||
return;
|
||||
|
||||
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
|
||||
return;
|
||||
|
||||
if (low & MASK_BLKPTR_LO)
|
||||
per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
|
||||
|
||||
}
|
||||
|
||||
static void smca_configure(unsigned int bank, unsigned int cpu)
|
||||
{
|
||||
unsigned int i, hwid_mcatype;
|
||||
@@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
|
||||
wrmsr(smca_config, low, high);
|
||||
}
|
||||
|
||||
smca_set_misc_banks_map(bank, cpu);
|
||||
|
||||
/* Return early if this bank was already initialized. */
|
||||
if (smca_banks[bank].hwid)
|
||||
return;
|
||||
@@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
|
||||
wrmsr(MSR_CU_DEF_ERR, low, high);
|
||||
}
|
||||
|
||||
static u32 smca_get_block_address(unsigned int bank, unsigned int block)
|
||||
static u32 smca_get_block_address(unsigned int bank, unsigned int block,
|
||||
unsigned int cpu)
|
||||
{
|
||||
u32 low, high;
|
||||
u32 addr = 0;
|
||||
|
||||
if (smca_get_bank_type(bank) == SMCA_RESERVED)
|
||||
return addr;
|
||||
|
||||
if (!block)
|
||||
return MSR_AMD64_SMCA_MCx_MISC(bank);
|
||||
|
||||
/* Check our cache first: */
|
||||
if (smca_bank_addrs[bank][block] != -1)
|
||||
return smca_bank_addrs[bank][block];
|
||||
if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* For SMCA enabled processors, BLKPTR field of the first MISC register
|
||||
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
|
||||
*/
|
||||
if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
|
||||
goto out;
|
||||
|
||||
if (!(low & MCI_CONFIG_MCAX))
|
||||
goto out;
|
||||
|
||||
if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
|
||||
(low & MASK_BLKPTR_LO))
|
||||
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
|
||||
|
||||
out:
|
||||
smca_bank_addrs[bank][block] = addr;
|
||||
return addr;
|
||||
return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
|
||||
}
|
||||
|
||||
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
|
||||
unsigned int bank, unsigned int block)
|
||||
unsigned int bank, unsigned int block,
|
||||
unsigned int cpu)
|
||||
{
|
||||
u32 addr = 0, offset = 0;
|
||||
|
||||
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
|
||||
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
|
||||
return addr;
|
||||
|
||||
if (mce_flags.smca)
|
||||
return smca_get_block_address(bank, block);
|
||||
return smca_get_block_address(bank, block, cpu);
|
||||
|
||||
/* Fall back to method we used for older processors: */
|
||||
switch (block) {
|
||||
@@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
|
||||
/* cpu init entry point, called from mce.c with preempt off */
|
||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block, cpu = smp_processor_id();
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
int offset = -1;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
|
||||
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
|
||||
if (mce_flags.smca)
|
||||
smca_configure(bank, cpu);
|
||||
|
||||
disable_err_thresholding(c, bank);
|
||||
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
address = get_block_address(address, low, high, bank, block);
|
||||
address = get_block_address(address, low, high, bank, block, cpu);
|
||||
if (!address)
|
||||
break;
|
||||
|
||||
@@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void)
|
||||
{
|
||||
unsigned int bank;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank)
|
||||
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
|
||||
log_error_deferred(bank);
|
||||
}
|
||||
|
||||
@@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void)
|
||||
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
|
||||
unsigned int bank, cpu = smp_processor_id();
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
|
||||
@@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
|
||||
u32 low, high;
|
||||
int err;
|
||||
|
||||
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
|
||||
if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
|
||||
return 0;
|
||||
|
||||
if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
|
||||
@@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
|
||||
if (err)
|
||||
goto out_free;
|
||||
recurse:
|
||||
address = get_block_address(address, low, high, bank, ++block);
|
||||
address = get_block_address(address, low, high, bank, ++block, cpu);
|
||||
if (!address)
|
||||
return 0;
|
||||
|
||||
@@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu)
|
||||
{
|
||||
unsigned int bank;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
threshold_remove_bank(cpu, bank);
|
||||
@@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu)
|
||||
if (bp)
|
||||
return 0;
|
||||
|
||||
bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
|
||||
bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
|
||||
GFP_KERNEL);
|
||||
if (!bp)
|
||||
return -ENOMEM;
|
||||
|
||||
per_cpu(threshold_banks, cpu) = bp;
|
||||
|
||||
for (bank = 0; bank < mca_cfg.banks; ++bank) {
|
||||
for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
|
||||
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
|
||||
continue;
|
||||
err = threshold_create_bank(cpu, bank);
|
||||
|
@@ -65,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
|
||||
|
||||
DEFINE_PER_CPU(unsigned, mce_exception_count);
|
||||
|
||||
struct mce_bank *mce_banks __read_mostly;
|
||||
DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
|
||||
|
||||
struct mce_bank {
|
||||
u64 ctl; /* subevents to enable */
|
||||
bool init; /* initialise bank? */
|
||||
};
|
||||
static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
|
||||
|
||||
#define ATTR_LEN 16
|
||||
/* One object for each MCE bank, shared by all CPUs */
|
||||
struct mce_bank_dev {
|
||||
struct device_attribute attr; /* device attribute */
|
||||
char attrname[ATTR_LEN]; /* attribute name */
|
||||
u8 bank; /* bank number */
|
||||
};
|
||||
static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
|
||||
|
||||
struct mce_vendor_flags mce_flags __read_mostly;
|
||||
|
||||
struct mca_config mca_cfg __read_mostly = {
|
||||
@@ -675,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
|
||||
*/
|
||||
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
{
|
||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||
bool error_seen = false;
|
||||
struct mce m;
|
||||
int i;
|
||||
@@ -686,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
if (flags & MCP_TIMESTAMP)
|
||||
m.tsc = rdtsc();
|
||||
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||
if (!mce_banks[i].ctl || !test_bit(i, *b))
|
||||
continue;
|
||||
|
||||
@@ -788,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
|
||||
char *tmp;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||
m->status = mce_rdmsrl(msr_ops.status(i));
|
||||
if (!(m->status & MCI_STATUS_VAL))
|
||||
continue;
|
||||
@@ -1068,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||
if (test_bit(i, toclear))
|
||||
mce_wrmsrl(msr_ops.status(i), 0);
|
||||
}
|
||||
@@ -1122,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
|
||||
unsigned long *toclear, unsigned long *valid_banks,
|
||||
int no_way_out, int *worst)
|
||||
{
|
||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
int severity, i;
|
||||
|
||||
for (i = 0; i < cfg->banks; i++) {
|
||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||
__clear_bit(i, toclear);
|
||||
if (!test_bit(i, valid_banks))
|
||||
continue;
|
||||
@@ -1330,7 +1348,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
||||
local_irq_enable();
|
||||
|
||||
if (kill_it || do_memory_failure(&m))
|
||||
force_sig(SIGBUS, current);
|
||||
force_sig(SIGBUS);
|
||||
local_irq_disable();
|
||||
ist_end_non_atomic();
|
||||
} else {
|
||||
@@ -1463,27 +1481,29 @@ int mce_notify_irq(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_notify_irq);
|
||||
|
||||
static int __mcheck_cpu_mce_banks_init(void)
|
||||
static void __mcheck_cpu_mce_banks_init(void)
|
||||
{
|
||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||
u8 n_banks = this_cpu_read(mce_num_banks);
|
||||
int i;
|
||||
|
||||
mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
|
||||
if (!mce_banks)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < MAX_NR_BANKS; i++) {
|
||||
for (i = 0; i < n_banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
/*
|
||||
* Init them all, __mcheck_cpu_apply_quirks() is going to apply
|
||||
* the required vendor quirks before
|
||||
* __mcheck_cpu_init_clear_banks() does the final bank setup.
|
||||
*/
|
||||
b->ctl = -1ULL;
|
||||
b->init = 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize Machine Checks for a CPU.
|
||||
*/
|
||||
static int __mcheck_cpu_cap_init(void)
|
||||
static void __mcheck_cpu_cap_init(void)
|
||||
{
|
||||
u64 cap;
|
||||
u8 b;
|
||||
@@ -1491,25 +1511,23 @@ static int __mcheck_cpu_cap_init(void)
|
||||
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
||||
|
||||
b = cap & MCG_BANKCNT_MASK;
|
||||
if (WARN_ON_ONCE(b > MAX_NR_BANKS))
|
||||
|
||||
if (b > MAX_NR_BANKS) {
|
||||
pr_warn("CPU%d: Using only %u machine check banks out of %u\n",
|
||||
smp_processor_id(), MAX_NR_BANKS, b);
|
||||
b = MAX_NR_BANKS;
|
||||
|
||||
mca_cfg.banks = max(mca_cfg.banks, b);
|
||||
|
||||
if (!mce_banks) {
|
||||
int err = __mcheck_cpu_mce_banks_init();
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
this_cpu_write(mce_num_banks, b);
|
||||
|
||||
__mcheck_cpu_mce_banks_init();
|
||||
|
||||
/* Use accurate RIP reporting if available. */
|
||||
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
|
||||
mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
|
||||
|
||||
if (cap & MCG_SER_P)
|
||||
mca_cfg.ser = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __mcheck_cpu_init_generic(void)
|
||||
@@ -1536,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void)
|
||||
|
||||
static void __mcheck_cpu_init_clear_banks(void)
|
||||
{
|
||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
if (!b->init)
|
||||
@@ -1548,6 +1567,33 @@ static void __mcheck_cpu_init_clear_banks(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Do a final check to see if there are any unused/RAZ banks.
|
||||
*
|
||||
* This must be done after the banks have been initialized and any quirks have
|
||||
* been applied.
|
||||
*
|
||||
* Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
|
||||
* Otherwise, a user who disables a bank will not be able to re-enable it
|
||||
* without a system reboot.
|
||||
*/
|
||||
static void __mcheck_cpu_check_banks(void)
|
||||
{
|
||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||
u64 msrval;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
if (!b->init)
|
||||
continue;
|
||||
|
||||
rdmsrl(msr_ops.ctl(i), msrval);
|
||||
b->init = !!msrval;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
|
||||
* EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
|
||||
@@ -1579,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
|
||||
/* Add per CPU specific workarounds here */
|
||||
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
{
|
||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||
struct mca_config *cfg = &mca_cfg;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
|
||||
@@ -1588,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
|
||||
/* This should be disabled by the BIOS, but isn't always */
|
||||
if (c->x86_vendor == X86_VENDOR_AMD) {
|
||||
if (c->x86 == 15 && cfg->banks > 4) {
|
||||
if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
|
||||
/*
|
||||
* disable GART TBL walk error reporting, which
|
||||
* trips off incorrectly with the IOMMU & 3ware
|
||||
@@ -1607,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
* Various K7s with broken bank 0 around. Always disable
|
||||
* by default.
|
||||
*/
|
||||
if (c->x86 == 6 && cfg->banks > 0)
|
||||
if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
|
||||
mce_banks[0].ctl = 0;
|
||||
|
||||
/*
|
||||
@@ -1629,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
||||
* valid event later, merely don't write CTL0.
|
||||
*/
|
||||
|
||||
if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
|
||||
if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
|
||||
mce_banks[0].init = 0;
|
||||
|
||||
/*
|
||||
@@ -1815,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
|
||||
if (!mce_available(c))
|
||||
return;
|
||||
|
||||
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
|
||||
__mcheck_cpu_cap_init();
|
||||
|
||||
if (__mcheck_cpu_apply_quirks(c) < 0) {
|
||||
mca_cfg.disabled = 1;
|
||||
return;
|
||||
}
|
||||
@@ -1832,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
|
||||
__mcheck_cpu_init_generic();
|
||||
__mcheck_cpu_init_vendor(c);
|
||||
__mcheck_cpu_init_clear_banks();
|
||||
__mcheck_cpu_check_banks();
|
||||
__mcheck_cpu_setup_timer();
|
||||
}
|
||||
|
||||
@@ -1863,7 +1913,7 @@ static void __mce_disable_bank(void *arg)
|
||||
|
||||
void mce_disable_bank(int bank)
|
||||
{
|
||||
if (bank >= mca_cfg.banks) {
|
||||
if (bank >= this_cpu_read(mce_num_banks)) {
|
||||
pr_warn(FW_BUG
|
||||
"Ignoring request to disable invalid MCA bank %d.\n",
|
||||
bank);
|
||||
@@ -1949,9 +1999,10 @@ int __init mcheck_init(void)
|
||||
*/
|
||||
static void mce_disable_error_reporting(void)
|
||||
{
|
||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
if (b->init)
|
||||
@@ -2051,26 +2102,47 @@ static struct bus_type mce_subsys = {
|
||||
|
||||
DEFINE_PER_CPU(struct device *, mce_device);
|
||||
|
||||
static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
|
||||
static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
|
||||
{
|
||||
return container_of(attr, struct mce_bank, attr);
|
||||
return container_of(attr, struct mce_bank_dev, attr);
|
||||
}
|
||||
|
||||
static ssize_t show_bank(struct device *s, struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
|
||||
u8 bank = attr_to_bank(attr)->bank;
|
||||
struct mce_bank *b;
|
||||
|
||||
if (bank >= per_cpu(mce_num_banks, s->id))
|
||||
return -EINVAL;
|
||||
|
||||
b = &per_cpu(mce_banks_array, s->id)[bank];
|
||||
|
||||
if (!b->init)
|
||||
return -ENODEV;
|
||||
|
||||
return sprintf(buf, "%llx\n", b->ctl);
|
||||
}
|
||||
|
||||
static ssize_t set_bank(struct device *s, struct device_attribute *attr,
|
||||
const char *buf, size_t size)
|
||||
{
|
||||
u8 bank = attr_to_bank(attr)->bank;
|
||||
struct mce_bank *b;
|
||||
u64 new;
|
||||
|
||||
if (kstrtou64(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
attr_to_bank(attr)->ctl = new;
|
||||
if (bank >= per_cpu(mce_num_banks, s->id))
|
||||
return -EINVAL;
|
||||
|
||||
b = &per_cpu(mce_banks_array, s->id)[bank];
|
||||
|
||||
if (!b->init)
|
||||
return -ENODEV;
|
||||
|
||||
b->ctl = new;
|
||||
mce_restart();
|
||||
|
||||
return size;
|
||||
@@ -2185,7 +2257,7 @@ static void mce_device_release(struct device *dev)
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
/* Per cpu device init. All of the cpus still share the same ctrl bank: */
|
||||
/* Per CPU device init. All of the CPUs still share the same bank device: */
|
||||
static int mce_device_create(unsigned int cpu)
|
||||
{
|
||||
struct device *dev;
|
||||
@@ -2217,8 +2289,8 @@ static int mce_device_create(unsigned int cpu)
|
||||
if (err)
|
||||
goto error;
|
||||
}
|
||||
for (j = 0; j < mca_cfg.banks; j++) {
|
||||
err = device_create_file(dev, &mce_banks[j].attr);
|
||||
for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) {
|
||||
err = device_create_file(dev, &mce_bank_devs[j].attr);
|
||||
if (err)
|
||||
goto error2;
|
||||
}
|
||||
@@ -2228,7 +2300,7 @@ static int mce_device_create(unsigned int cpu)
|
||||
return 0;
|
||||
error2:
|
||||
while (--j >= 0)
|
||||
device_remove_file(dev, &mce_banks[j].attr);
|
||||
device_remove_file(dev, &mce_bank_devs[j].attr);
|
||||
error:
|
||||
while (--i >= 0)
|
||||
device_remove_file(dev, mce_device_attrs[i]);
|
||||
@@ -2249,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu)
|
||||
for (i = 0; mce_device_attrs[i]; i++)
|
||||
device_remove_file(dev, mce_device_attrs[i]);
|
||||
|
||||
for (i = 0; i < mca_cfg.banks; i++)
|
||||
device_remove_file(dev, &mce_banks[i].attr);
|
||||
for (i = 0; i < per_cpu(mce_num_banks, cpu); i++)
|
||||
device_remove_file(dev, &mce_bank_devs[i].attr);
|
||||
|
||||
device_unregister(dev);
|
||||
cpumask_clear_cpu(cpu, mce_device_initialized);
|
||||
@@ -2271,6 +2343,7 @@ static void mce_disable_cpu(void)
|
||||
|
||||
static void mce_reenable_cpu(void)
|
||||
{
|
||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||
int i;
|
||||
|
||||
if (!mce_available(raw_cpu_ptr(&cpu_info)))
|
||||
@@ -2278,7 +2351,7 @@ static void mce_reenable_cpu(void)
|
||||
|
||||
if (!cpuhp_tasks_frozen)
|
||||
cmci_reenable();
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
|
||||
if (b->init)
|
||||
@@ -2328,10 +2401,12 @@ static __init void mce_init_banks(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mca_cfg.banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
for (i = 0; i < MAX_NR_BANKS; i++) {
|
||||
struct mce_bank_dev *b = &mce_bank_devs[i];
|
||||
struct device_attribute *a = &b->attr;
|
||||
|
||||
b->bank = i;
|
||||
|
||||
sysfs_attr_init(&a->attr);
|
||||
a->attr.name = b->attrname;
|
||||
snprintf(b->attrname, ATTR_LEN, "bank%d", i);
|
||||
@@ -2441,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val)
|
||||
DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
|
||||
"%llu\n");
|
||||
|
||||
static int __init mcheck_debugfs_init(void)
|
||||
static void __init mcheck_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dmce, *ffake_panic;
|
||||
struct dentry *dmce;
|
||||
|
||||
dmce = mce_get_debugfs_dir();
|
||||
if (!dmce)
|
||||
return -ENOMEM;
|
||||
ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
|
||||
NULL, &fake_panic_fops);
|
||||
if (!ffake_panic)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL,
|
||||
&fake_panic_fops);
|
||||
}
|
||||
#else
|
||||
static int __init mcheck_debugfs_init(void) { return -EINVAL; }
|
||||
static void __init mcheck_debugfs_init(void) { }
|
||||
#endif
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
|
||||
@@ -2464,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
|
||||
|
||||
static int __init mcheck_late_init(void)
|
||||
{
|
||||
pr_info("Using %d MCE banks\n", mca_cfg.banks);
|
||||
|
||||
if (mca_cfg.recovery)
|
||||
static_branch_inc(&mcsafe_key);
|
||||
|
||||
|
@@ -645,7 +645,6 @@ static const struct file_operations readme_fops = {
|
||||
|
||||
static struct dfs_node {
|
||||
char *name;
|
||||
struct dentry *d;
|
||||
const struct file_operations *fops;
|
||||
umode_t perm;
|
||||
} dfs_fls[] = {
|
||||
@@ -659,49 +658,23 @@ static struct dfs_node {
|
||||
{ .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
|
||||
};
|
||||
|
||||
static int __init debugfs_init(void)
|
||||
static void __init debugfs_init(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
dfs_inj = debugfs_create_dir("mce-inject", NULL);
|
||||
if (!dfs_inj)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
|
||||
dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
|
||||
dfs_fls[i].perm,
|
||||
dfs_inj,
|
||||
&i_mce,
|
||||
dfs_fls[i].fops);
|
||||
|
||||
if (!dfs_fls[i].d)
|
||||
goto err_dfs_add;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_dfs_add:
|
||||
while (i-- > 0)
|
||||
debugfs_remove(dfs_fls[i].d);
|
||||
|
||||
debugfs_remove(dfs_inj);
|
||||
dfs_inj = NULL;
|
||||
|
||||
return -ENODEV;
|
||||
for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
|
||||
debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
|
||||
&i_mce, dfs_fls[i].fops);
|
||||
}
|
||||
|
||||
static int __init inject_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
err = debugfs_init();
|
||||
if (err) {
|
||||
free_cpumask_var(mce_inject_cpumask);
|
||||
return err;
|
||||
}
|
||||
debugfs_init();
|
||||
|
||||
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
|
||||
mce_register_injector_chain(&inject_nb);
|
||||
|
@@ -22,17 +22,8 @@ enum severity_level {
|
||||
|
||||
extern struct blocking_notifier_head x86_mce_decoder_chain;
|
||||
|
||||
#define ATTR_LEN 16
|
||||
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
|
||||
|
||||
/* One object for each MCE bank, shared by all CPUs */
|
||||
struct mce_bank {
|
||||
u64 ctl; /* subevents to enable */
|
||||
unsigned char init; /* initialise bank? */
|
||||
struct device_attribute attr; /* device attribute */
|
||||
char attrname[ATTR_LEN]; /* attribute name */
|
||||
};
|
||||
|
||||
struct mce_evt_llist {
|
||||
struct llist_node llnode;
|
||||
struct mce mce;
|
||||
@@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void);
|
||||
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
|
||||
struct dentry *mce_get_debugfs_dir(void);
|
||||
|
||||
extern struct mce_bank *mce_banks;
|
||||
extern mce_banks_t mce_banks_ce_disabled;
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
@@ -128,7 +118,6 @@ struct mca_config {
|
||||
bios_cmci_threshold : 1,
|
||||
__reserved : 59;
|
||||
|
||||
u8 banks;
|
||||
s8 bootlog;
|
||||
int tolerant;
|
||||
int monarch_timeout;
|
||||
@@ -137,6 +126,7 @@ struct mca_config {
|
||||
};
|
||||
|
||||
extern struct mca_config mca_cfg;
|
||||
DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
|
||||
|
||||
struct mce_vendor_flags {
|
||||
/*
|
||||
|
@@ -400,21 +400,13 @@ static const struct file_operations severities_coverage_fops = {
|
||||
|
||||
static int __init severities_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dmce, *fsev;
|
||||
struct dentry *dmce;
|
||||
|
||||
dmce = mce_get_debugfs_dir();
|
||||
if (!dmce)
|
||||
goto err_out;
|
||||
|
||||
fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
|
||||
&severities_coverage_fops);
|
||||
if (!fsev)
|
||||
goto err_out;
|
||||
|
||||
debugfs_create_file("severities-coverage", 0444, dmce, NULL,
|
||||
&severities_coverage_fops);
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
return -ENOMEM;
|
||||
}
|
||||
late_initcall(severities_debugfs_init);
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
@@ -59,7 +59,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE];
|
||||
|
||||
/*
|
||||
* Microcode patch container file is prepended to the initrd in cpio
|
||||
* format. See Documentation/x86/microcode.txt
|
||||
* format. See Documentation/x86/microcode.rst
|
||||
*/
|
||||
static const char
|
||||
ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
|
||||
|
@@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = {
|
||||
.resume = mc_bp_resume,
|
||||
};
|
||||
|
||||
static int mc_cpu_online(unsigned int cpu)
|
||||
static int mc_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
struct device *dev;
|
||||
|
||||
dev = get_cpu_device(cpu);
|
||||
microcode_update_cpu(cpu);
|
||||
pr_debug("CPU%d added\n", cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mc_cpu_online(unsigned int cpu)
|
||||
{
|
||||
struct device *dev = get_cpu_device(cpu);
|
||||
|
||||
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
|
||||
pr_err("Failed to create group for CPU%d\n", cpu);
|
||||
@@ -872,7 +875,9 @@ int __init microcode_init(void)
|
||||
goto out_ucode_group;
|
||||
|
||||
register_syscore_ops(&mc_syscore_ops);
|
||||
cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
|
||||
cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
|
||||
mc_cpu_starting, NULL);
|
||||
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
|
||||
mc_cpu_online, mc_cpu_down_prep);
|
||||
|
||||
pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
|
||||
|
@@ -4,6 +4,8 @@
|
||||
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
|
||||
#
|
||||
|
||||
set -e
|
||||
|
||||
IN=$1
|
||||
OUT=$2
|
||||
|
||||
|
@@ -17,6 +17,7 @@
|
||||
#include <linux/irq.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/i8253.h>
|
||||
#include <linux/random.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/hyperv-tlfs.h>
|
||||
@@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
|
||||
inc_irq_stat(hyperv_stimer0_count);
|
||||
if (hv_stimer0_handler)
|
||||
hv_stimer0_handler();
|
||||
add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
|
||||
ack_APIC_irq();
|
||||
|
||||
exiting_irq();
|
||||
@@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
|
||||
int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
|
||||
{
|
||||
*vector = HYPERV_STIMER0_VECTOR;
|
||||
*irq = 0; /* Unused on x86/x64 */
|
||||
*irq = -1; /* Unused on x86/x64 */
|
||||
hv_stimer0_handler = handler;
|
||||
return 0;
|
||||
}
|
||||
@@ -266,9 +268,9 @@ static void __init ms_hyperv_init_platform(void)
|
||||
|
||||
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
|
||||
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
|
||||
lapic_timer_frequency = hv_lapic_frequency;
|
||||
lapic_timer_period = hv_lapic_frequency;
|
||||
pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
|
||||
lapic_timer_frequency);
|
||||
lapic_timer_period);
|
||||
}
|
||||
|
||||
register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
|
||||
|
@@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
|
||||
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
|
||||
cr0 = read_cr0() | X86_CR0_CD;
|
||||
write_cr0(cr0);
|
||||
wbinvd();
|
||||
|
||||
/*
|
||||
* Cache flushing is the most time-consuming step when programming
|
||||
* the MTRRs. Fortunately, as per the Intel Software Development
|
||||
* Manual, we can skip it if the processor supports cache self-
|
||||
* snooping.
|
||||
*/
|
||||
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
|
||||
wbinvd();
|
||||
|
||||
/* Save value of CR4 and clear Page Global Enable (bit 7) */
|
||||
if (boot_cpu_has(X86_FEATURE_PGE)) {
|
||||
@@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
|
||||
|
||||
/* Disable MTRRs, and set the default type to uncached */
|
||||
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
|
||||
wbinvd();
|
||||
|
||||
/* Again, only flush caches if we have to. */
|
||||
if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
|
||||
wbinvd();
|
||||
}
|
||||
|
||||
static void post_set(void) __releases(set_atomicity_lock)
|
||||
|
@@ -431,11 +431,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
|
||||
#else
|
||||
register unsigned int line_size asm("esi");
|
||||
register unsigned int size asm("edi");
|
||||
#ifdef CONFIG_X86_64
|
||||
register void *mem_r asm("rbx");
|
||||
#else
|
||||
register void *mem_r asm("ebx");
|
||||
#endif /* CONFIG_X86_64 */
|
||||
register void *mem_r asm(_ASM_BX);
|
||||
#endif /* CONFIG_KASAN */
|
||||
|
||||
/*
|
||||
@@ -1503,7 +1499,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
* may be scheduled elsewhere and invalidate entries in the
|
||||
* pseudo-locked region.
|
||||
*/
|
||||
if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) {
|
||||
if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
|
||||
mutex_unlock(&rdtgroup_mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@@ -796,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
|
||||
struct seq_file *seq, void *v)
|
||||
{
|
||||
struct rdt_resource *r = of->kn->parent->priv;
|
||||
u32 sw_shareable = 0, hw_shareable = 0;
|
||||
u32 exclusive = 0, pseudo_locked = 0;
|
||||
/*
|
||||
* Use unsigned long even though only 32 bits are used to ensure
|
||||
* test_bit() is used safely.
|
||||
*/
|
||||
unsigned long sw_shareable = 0, hw_shareable = 0;
|
||||
unsigned long exclusive = 0, pseudo_locked = 0;
|
||||
struct rdt_domain *dom;
|
||||
int i, hwb, swb, excl, psl;
|
||||
enum rdtgrp_mode mode;
|
||||
@@ -842,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
|
||||
}
|
||||
for (i = r->cache.cbm_len - 1; i >= 0; i--) {
|
||||
pseudo_locked = dom->plr ? dom->plr->cbm : 0;
|
||||
hwb = test_bit(i, (unsigned long *)&hw_shareable);
|
||||
swb = test_bit(i, (unsigned long *)&sw_shareable);
|
||||
excl = test_bit(i, (unsigned long *)&exclusive);
|
||||
psl = test_bit(i, (unsigned long *)&pseudo_locked);
|
||||
hwb = test_bit(i, &hw_shareable);
|
||||
swb = test_bit(i, &sw_shareable);
|
||||
excl = test_bit(i, &exclusive);
|
||||
psl = test_bit(i, &pseudo_locked);
|
||||
if (hwb && swb)
|
||||
seq_putc(seq, 'X');
|
||||
else if (hwb && !swb)
|
||||
@@ -2100,8 +2104,7 @@ static int rdt_init_fs_context(struct fs_context *fc)
|
||||
ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
|
||||
fc->fs_private = &ctx->kfc;
|
||||
fc->ops = &rdt_fs_context_ops;
|
||||
if (fc->user_ns)
|
||||
put_user_ns(fc->user_ns);
|
||||
put_user_ns(fc->user_ns);
|
||||
fc->user_ns = get_user_ns(&init_user_ns);
|
||||
fc->global = true;
|
||||
return 0;
|
||||
@@ -2484,28 +2487,21 @@ out_destroy:
|
||||
* modification to the CBM if the default does not satisfy the
|
||||
* requirements.
|
||||
*/
|
||||
static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
|
||||
static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
|
||||
{
|
||||
/*
|
||||
* Convert the u32 _val to an unsigned long required by all the bit
|
||||
* operations within this function. No more than 32 bits of this
|
||||
* converted value can be accessed because all bit operations are
|
||||
* additionally provided with cbm_len that is initialized during
|
||||
* hardware enumeration using five bits from the EAX register and
|
||||
* thus never can exceed 32 bits.
|
||||
*/
|
||||
unsigned long *val = (unsigned long *)_val;
|
||||
unsigned int cbm_len = r->cache.cbm_len;
|
||||
unsigned long first_bit, zero_bit;
|
||||
unsigned long val = _val;
|
||||
|
||||
if (*val == 0)
|
||||
return;
|
||||
if (!val)
|
||||
return 0;
|
||||
|
||||
first_bit = find_first_bit(val, cbm_len);
|
||||
zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
|
||||
first_bit = find_first_bit(&val, cbm_len);
|
||||
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
|
||||
|
||||
/* Clear any remaining bits to ensure contiguous region */
|
||||
bitmap_clear(val, zero_bit, cbm_len - zero_bit);
|
||||
bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
|
||||
return (u32)val;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2563,7 +2559,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
|
||||
* Force the initial CBM to be valid, user can
|
||||
* modify the CBM based on system availability.
|
||||
*/
|
||||
cbm_ensure_valid(&d->new_ctrl, r);
|
||||
d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
|
||||
/*
|
||||
* Assign the u32 CBM to an unsigned long to ensure that
|
||||
* bitmap_weight() does not access out-of-bound memory.
|
||||
|
@@ -26,6 +26,10 @@ struct cpuid_bit {
|
||||
static const struct cpuid_bit cpuid_bits[] = {
|
||||
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
|
||||
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
|
||||
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
|
||||
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
|
||||
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
|
||||
{ X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
|
||||
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
|
||||
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
|
||||
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
|
||||
|
@@ -15,33 +15,66 @@
|
||||
/* leaf 0xb SMT level */
|
||||
#define SMT_LEVEL 0
|
||||
|
||||
/* leaf 0xb sub-leaf types */
|
||||
/* extended topology sub-leaf types */
|
||||
#define INVALID_TYPE 0
|
||||
#define SMT_TYPE 1
|
||||
#define CORE_TYPE 2
|
||||
#define DIE_TYPE 5
|
||||
|
||||
#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
|
||||
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
|
||||
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned int __max_die_per_package __read_mostly = 1;
|
||||
EXPORT_SYMBOL(__max_die_per_package);
|
||||
|
||||
/*
|
||||
* Check if given CPUID extended toplogy "leaf" is implemented
|
||||
*/
|
||||
static int check_extended_topology_leaf(int leaf)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Return best CPUID Extended Toplogy Leaf supported
|
||||
*/
|
||||
static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->cpuid_level >= 0x1f) {
|
||||
if (check_extended_topology_leaf(0x1f) == 0)
|
||||
return 0x1f;
|
||||
}
|
||||
|
||||
if (c->cpuid_level >= 0xb) {
|
||||
if (check_extended_topology_leaf(0xb) == 0)
|
||||
return 0xb;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
int detect_extended_topology_early(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
int leaf;
|
||||
|
||||
if (c->cpuid_level < 0xb)
|
||||
return -1;
|
||||
|
||||
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/*
|
||||
* check if the cpuid leaf 0xb is actually implemented.
|
||||
*/
|
||||
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
|
||||
leaf = detect_extended_topology_leaf(c);
|
||||
if (leaf < 0)
|
||||
return -1;
|
||||
|
||||
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
|
||||
|
||||
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
|
||||
/*
|
||||
* initial apic id, which also represents 32-bit extended x2apic id.
|
||||
*/
|
||||
@@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for extended topology enumeration cpuid leaf 0xb and if it
|
||||
* Check for extended topology enumeration cpuid leaf, and if it
|
||||
* exists, use it for populating initial_apicid and cpu topology
|
||||
* detection.
|
||||
*/
|
||||
@@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned int eax, ebx, ecx, edx, sub_index;
|
||||
unsigned int ht_mask_width, core_plus_mask_width;
|
||||
unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
|
||||
unsigned int core_select_mask, core_level_siblings;
|
||||
unsigned int die_select_mask, die_level_siblings;
|
||||
int leaf;
|
||||
|
||||
if (detect_extended_topology_early(c) < 0)
|
||||
leaf = detect_extended_topology_leaf(c);
|
||||
if (leaf < 0)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Populate HT related information from sub-leaf level 0.
|
||||
*/
|
||||
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
|
||||
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
|
||||
c->initial_apicid = edx;
|
||||
core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
|
||||
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
|
||||
die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
|
||||
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
|
||||
|
||||
sub_index = 1;
|
||||
do {
|
||||
cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
|
||||
cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
/*
|
||||
* Check for the Core type in the implemented sub leaves.
|
||||
@@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
|
||||
if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
|
||||
core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
|
||||
core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
|
||||
break;
|
||||
die_level_siblings = core_level_siblings;
|
||||
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
|
||||
}
|
||||
if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) {
|
||||
die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
|
||||
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
|
||||
}
|
||||
|
||||
sub_index++;
|
||||
} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
|
||||
|
||||
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
|
||||
die_select_mask = (~(-1 << die_plus_mask_width)) >>
|
||||
core_plus_mask_width;
|
||||
|
||||
c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
|
||||
& core_select_mask;
|
||||
c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
|
||||
c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid,
|
||||
ht_mask_width) & core_select_mask;
|
||||
c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid,
|
||||
core_plus_mask_width) & die_select_mask;
|
||||
c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
|
||||
die_plus_mask_width);
|
||||
/*
|
||||
* Reinit the apicid, now that we have extended initial_apicid.
|
||||
*/
|
||||
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
|
||||
|
||||
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
|
||||
__max_die_per_package = (die_level_siblings / core_level_siblings);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
200
arch/x86/kernel/cpu/umwait.c
Normal file
200
arch/x86/kernel/cpu/umwait.c
Normal file
@@ -0,0 +1,200 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include <asm/msr.h>
|
||||
|
||||
#define UMWAIT_C02_ENABLE 0
|
||||
|
||||
#define UMWAIT_CTRL_VAL(max_time, c02_disable) \
|
||||
(((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \
|
||||
((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE))
|
||||
|
||||
/*
|
||||
* Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default,
|
||||
* umwait max time is 100000 in TSC-quanta and C0.2 is enabled
|
||||
*/
|
||||
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
|
||||
|
||||
/*
|
||||
* Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
|
||||
* the sysfs write functions.
|
||||
*/
|
||||
static DEFINE_MUTEX(umwait_lock);
|
||||
|
||||
static void umwait_update_control_msr(void * unused)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The CPU hotplug callback sets the control MSR to the global control
|
||||
* value.
|
||||
*
|
||||
* Disable interrupts so the read of umwait_control_cached and the WRMSR
|
||||
* are protected against a concurrent sysfs write. Otherwise the sysfs
|
||||
* write could update the cached value after it had been read on this CPU
|
||||
* and issue the IPI before the old value had been written. The IPI would
|
||||
* interrupt, write the new value and after return from IPI the previous
|
||||
* value would be written by this CPU.
|
||||
*
|
||||
* With interrupts disabled the upcoming CPU either sees the new control
|
||||
* value or the IPI is updating this CPU to the new control value after
|
||||
* interrupts have been reenabled.
|
||||
*/
|
||||
static int umwait_cpu_online(unsigned int cpu)
|
||||
{
|
||||
local_irq_disable();
|
||||
umwait_update_control_msr(NULL);
|
||||
local_irq_enable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
|
||||
* is the only active CPU at this time. The MSR is set up on the APs via the
|
||||
* CPU hotplug callback.
|
||||
*
|
||||
* This function is invoked on resume from suspend and hibernation. On
|
||||
* resume from suspend the restore should be not required, but we neither
|
||||
* trust the firmware nor does it matter if the same value is written
|
||||
* again.
|
||||
*/
|
||||
static void umwait_syscore_resume(void)
|
||||
{
|
||||
umwait_update_control_msr(NULL);
|
||||
}
|
||||
|
||||
static struct syscore_ops umwait_syscore_ops = {
|
||||
.resume = umwait_syscore_resume,
|
||||
};
|
||||
|
||||
/* sysfs interface */
|
||||
|
||||
/*
|
||||
* When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled.
|
||||
* Otherwise, C0.2 is enabled.
|
||||
*/
|
||||
static inline bool umwait_ctrl_c02_enabled(u32 ctrl)
|
||||
{
|
||||
return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE);
|
||||
}
|
||||
|
||||
static inline u32 umwait_ctrl_max_time(u32 ctrl)
|
||||
{
|
||||
return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
|
||||
}
|
||||
|
||||
static inline void umwait_update_control(u32 maxtime, bool c02_enable)
|
||||
{
|
||||
u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
|
||||
|
||||
if (!c02_enable)
|
||||
ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE;
|
||||
|
||||
WRITE_ONCE(umwait_control_cached, ctrl);
|
||||
/* Propagate to all CPUs */
|
||||
on_each_cpu(umwait_update_control_msr, NULL, 1);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
u32 ctrl = READ_ONCE(umwait_control_cached);
|
||||
|
||||
return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl));
|
||||
}
|
||||
|
||||
static ssize_t enable_c02_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
bool c02_enable;
|
||||
u32 ctrl;
|
||||
int ret;
|
||||
|
||||
ret = kstrtobool(buf, &c02_enable);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&umwait_lock);
|
||||
|
||||
ctrl = READ_ONCE(umwait_control_cached);
|
||||
if (c02_enable != umwait_ctrl_c02_enabled(ctrl))
|
||||
umwait_update_control(ctrl, c02_enable);
|
||||
|
||||
mutex_unlock(&umwait_lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR_RW(enable_c02);
|
||||
|
||||
static ssize_t
|
||||
max_time_show(struct device *kobj, struct device_attribute *attr, char *buf)
|
||||
{
|
||||
u32 ctrl = READ_ONCE(umwait_control_cached);
|
||||
|
||||
return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl));
|
||||
}
|
||||
|
||||
static ssize_t max_time_store(struct device *kobj,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
u32 max_time, ctrl;
|
||||
int ret;
|
||||
|
||||
ret = kstrtou32(buf, 0, &max_time);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* bits[1:0] must be zero */
|
||||
if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&umwait_lock);
|
||||
|
||||
ctrl = READ_ONCE(umwait_control_cached);
|
||||
if (max_time != umwait_ctrl_max_time(ctrl))
|
||||
umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl));
|
||||
|
||||
mutex_unlock(&umwait_lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR_RW(max_time);
|
||||
|
||||
static struct attribute *umwait_attrs[] = {
|
||||
&dev_attr_enable_c02.attr,
|
||||
&dev_attr_max_time.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group umwait_attr_group = {
|
||||
.attrs = umwait_attrs,
|
||||
.name = "umwait_control",
|
||||
};
|
||||
|
||||
static int __init umwait_init(void)
|
||||
{
|
||||
struct device *dev;
|
||||
int ret;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_WAITPKG))
|
||||
return -ENODEV;
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
|
||||
umwait_cpu_online, NULL);
|
||||
|
||||
register_syscore_ops(&umwait_syscore_ops);
|
||||
|
||||
/*
|
||||
* Add umwait control interface. Ignore failure, so at least the
|
||||
* default values are set up in case the machine manages to boot.
|
||||
*/
|
||||
dev = cpu_subsys.dev_root;
|
||||
return sysfs_create_group(&dev->kobj, &umwait_attr_group);
|
||||
}
|
||||
device_initcall(umwait_init);
|
@@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void)
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
/* Skip lapic calibration since we know the bus frequency. */
|
||||
lapic_timer_frequency = ecx / HZ;
|
||||
lapic_timer_period = ecx / HZ;
|
||||
pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
|
||||
ecx);
|
||||
#endif
|
||||
|
167
arch/x86/kernel/cpu/zhaoxin.c
Normal file
167
arch/x86/kernel/cpu/zhaoxin.c
Normal file
@@ -0,0 +1,167 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
#define MSR_ZHAOXIN_FCR57 0x00001257
|
||||
|
||||
#define ACE_PRESENT (1 << 6)
|
||||
#define ACE_ENABLED (1 << 7)
|
||||
#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */
|
||||
|
||||
#define RNG_PRESENT (1 << 2)
|
||||
#define RNG_ENABLED (1 << 3)
|
||||
#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */
|
||||
|
||||
#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
|
||||
#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
|
||||
#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
|
||||
#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
|
||||
#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
|
||||
#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
|
||||
|
||||
static void init_zhaoxin_cap(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
/* Test for Extended Feature Flags presence */
|
||||
if (cpuid_eax(0xC0000000) >= 0xC0000001) {
|
||||
u32 tmp = cpuid_edx(0xC0000001);
|
||||
|
||||
/* Enable ACE unit, if present and disabled */
|
||||
if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
|
||||
rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
|
||||
/* Enable ACE unit */
|
||||
lo |= ACE_FCR;
|
||||
wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
|
||||
pr_info("CPU: Enabled ACE h/w crypto\n");
|
||||
}
|
||||
|
||||
/* Enable RNG unit, if present and disabled */
|
||||
if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
|
||||
rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
|
||||
/* Enable RNG unit */
|
||||
lo |= RNG_ENABLE;
|
||||
wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
|
||||
pr_info("CPU: Enabled h/w RNG\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Store Extended Feature Flags as word 5 of the CPU
|
||||
* capability bit array
|
||||
*/
|
||||
c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001);
|
||||
}
|
||||
|
||||
if (c->x86 >= 0x6)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
|
||||
cpu_detect_cache_sizes(c);
|
||||
}
|
||||
|
||||
static void early_init_zhaoxin(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86 >= 0x6)
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
#ifdef CONFIG_X86_64
|
||||
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
|
||||
#endif
|
||||
if (c->x86_power & (1 << 8)) {
|
||||
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
||||
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
|
||||
}
|
||||
|
||||
if (c->cpuid_level >= 0x00000001) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
|
||||
/*
|
||||
* If HTT (EDX[28]) is set EBX[16:23] contain the number of
|
||||
* apicids which are reserved per package. Store the resulting
|
||||
* shift value for the package management code.
|
||||
*/
|
||||
if (edx & (1U << 28))
|
||||
c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
|
||||
|
||||
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
|
||||
msr_ctl = vmx_msr_high | vmx_msr_low;
|
||||
|
||||
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
|
||||
set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
|
||||
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
|
||||
set_cpu_cap(c, X86_FEATURE_VNMI);
|
||||
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
|
||||
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
|
||||
vmx_msr_low, vmx_msr_high);
|
||||
msr_ctl2 = vmx_msr_high | vmx_msr_low;
|
||||
if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
|
||||
(msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
|
||||
set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
|
||||
if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
|
||||
set_cpu_cap(c, X86_FEATURE_EPT);
|
||||
if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
|
||||
set_cpu_cap(c, X86_FEATURE_VPID);
|
||||
}
|
||||
}
|
||||
|
||||
static void init_zhaoxin(struct cpuinfo_x86 *c)
|
||||
{
|
||||
early_init_zhaoxin(c);
|
||||
init_intel_cacheinfo(c);
|
||||
detect_num_cpu_cores(c);
|
||||
#ifdef CONFIG_X86_32
|
||||
detect_ht(c);
|
||||
#endif
|
||||
|
||||
if (c->cpuid_level > 9) {
|
||||
unsigned int eax = cpuid_eax(10);
|
||||
|
||||
/*
|
||||
* Check for version and the number of counters
|
||||
* Version(eax[7:0]) can't be 0;
|
||||
* Counters(eax[15:8]) should be greater than 1;
|
||||
*/
|
||||
if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
|
||||
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
|
||||
}
|
||||
|
||||
if (c->x86 >= 0x6)
|
||||
init_zhaoxin_cap(c);
|
||||
#ifdef CONFIG_X86_64
|
||||
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
||||
#endif
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_VMX))
|
||||
zhaoxin_detect_vmx_virtcap(c);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static unsigned int
|
||||
zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size)
|
||||
{
|
||||
return size;
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct cpu_dev zhaoxin_cpu_dev = {
|
||||
.c_vendor = "zhaoxin",
|
||||
.c_ident = { " Shanghai " },
|
||||
.c_early_init = early_init_zhaoxin,
|
||||
.c_init = init_zhaoxin,
|
||||
#ifdef CONFIG_X86_32
|
||||
.legacy_cache_size = zhaoxin_size_cache,
|
||||
#endif
|
||||
.c_x86_vendor = X86_VENDOR_ZHAOXIN,
|
||||
};
|
||||
|
||||
cpu_dev_register(zhaoxin_cpu_dev);
|
@@ -56,7 +56,6 @@ struct crash_memmap_data {
|
||||
*/
|
||||
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
|
||||
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
|
||||
unsigned long crash_zero_bytes;
|
||||
|
||||
static inline void cpu_crash_vmclear_loaded_vmcss(void)
|
||||
{
|
||||
@@ -73,14 +72,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
|
||||
|
||||
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
struct pt_regs fixed_regs;
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
crash_fixup_ss_esp(&fixed_regs, regs);
|
||||
regs = &fixed_regs;
|
||||
}
|
||||
#endif
|
||||
crash_save_cpu(regs, cpu);
|
||||
|
||||
/*
|
||||
@@ -181,6 +172,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KEXEC_FILE
|
||||
|
||||
static unsigned long crash_zero_bytes;
|
||||
|
||||
static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
|
||||
{
|
||||
unsigned int *nr_ranges = arg;
|
||||
@@ -381,6 +375,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
|
||||
walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
|
||||
memmap_entry_callback);
|
||||
|
||||
/* Add e820 reserved ranges */
|
||||
cmd.type = E820_TYPE_RESERVED;
|
||||
flags = IORESOURCE_MEM;
|
||||
walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
|
||||
memmap_entry_callback);
|
||||
|
||||
/* Add crashk_low_res region */
|
||||
if (crashk_low_res.end) {
|
||||
ei.addr = crashk_low_res.start;
|
||||
|
@@ -86,9 +86,9 @@ static bool _e820__mapped_any(struct e820_table *table,
|
||||
continue;
|
||||
if (entry->addr >= end || entry->addr + entry->size <= start)
|
||||
continue;
|
||||
return 1;
|
||||
return true;
|
||||
}
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
|
||||
@@ -1063,10 +1063,10 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
|
||||
case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE;
|
||||
case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
|
||||
case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
|
||||
case E820_TYPE_RESERVED: return IORES_DESC_RESERVED;
|
||||
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
|
||||
case E820_TYPE_RAM: /* Fall-through: */
|
||||
case E820_TYPE_UNUSABLE: /* Fall-through: */
|
||||
case E820_TYPE_RESERVED: /* Fall-through: */
|
||||
default: return IORES_DESC_NONE;
|
||||
}
|
||||
}
|
||||
|
@@ -1,7 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* EISA specific code
|
||||
*
|
||||
* This file is licensed under the GPL V2
|
||||
*/
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/eisa.h>
|
||||
|
@@ -43,18 +43,6 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
|
||||
*/
|
||||
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
|
||||
|
||||
static void kernel_fpu_disable(void)
|
||||
{
|
||||
WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
|
||||
this_cpu_write(in_kernel_fpu, true);
|
||||
}
|
||||
|
||||
static void kernel_fpu_enable(void)
|
||||
{
|
||||
WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
|
||||
this_cpu_write(in_kernel_fpu, false);
|
||||
}
|
||||
|
||||
static bool kernel_fpu_disabled(void)
|
||||
{
|
||||
return this_cpu_read(in_kernel_fpu);
|
||||
@@ -94,42 +82,33 @@ bool irq_fpu_usable(void)
|
||||
}
|
||||
EXPORT_SYMBOL(irq_fpu_usable);
|
||||
|
||||
static void __kernel_fpu_begin(void)
|
||||
{
|
||||
struct fpu *fpu = ¤t->thread.fpu;
|
||||
|
||||
WARN_ON_FPU(!irq_fpu_usable());
|
||||
|
||||
kernel_fpu_disable();
|
||||
|
||||
if (!(current->flags & PF_KTHREAD)) {
|
||||
if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
|
||||
set_thread_flag(TIF_NEED_FPU_LOAD);
|
||||
/*
|
||||
* Ignore return value -- we don't care if reg state
|
||||
* is clobbered.
|
||||
*/
|
||||
copy_fpregs_to_fpstate(fpu);
|
||||
}
|
||||
}
|
||||
__cpu_invalidate_fpregs_state();
|
||||
}
|
||||
|
||||
static void __kernel_fpu_end(void)
|
||||
{
|
||||
kernel_fpu_enable();
|
||||
}
|
||||
|
||||
void kernel_fpu_begin(void)
|
||||
{
|
||||
preempt_disable();
|
||||
__kernel_fpu_begin();
|
||||
|
||||
WARN_ON_FPU(!irq_fpu_usable());
|
||||
WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
|
||||
|
||||
this_cpu_write(in_kernel_fpu, true);
|
||||
|
||||
if (!(current->flags & PF_KTHREAD) &&
|
||||
!test_thread_flag(TIF_NEED_FPU_LOAD)) {
|
||||
set_thread_flag(TIF_NEED_FPU_LOAD);
|
||||
/*
|
||||
* Ignore return value -- we don't care if reg state
|
||||
* is clobbered.
|
||||
*/
|
||||
copy_fpregs_to_fpstate(¤t->thread.fpu);
|
||||
}
|
||||
__cpu_invalidate_fpregs_state();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
|
||||
|
||||
void kernel_fpu_end(void)
|
||||
{
|
||||
__kernel_fpu_end();
|
||||
WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
|
||||
|
||||
this_cpu_write(in_kernel_fpu, false);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_fpu_end);
|
||||
@@ -155,7 +134,6 @@ void fpu__save(struct fpu *fpu)
|
||||
trace_x86_fpu_after_save(fpu);
|
||||
fpregs_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fpu__save);
|
||||
|
||||
/*
|
||||
* Legacy x87 fpstate state init:
|
||||
|
@@ -204,12 +204,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
|
||||
*/
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_FPU)) {
|
||||
/*
|
||||
* Disable xsave as we do not support it if i387
|
||||
* emulation is enabled.
|
||||
*/
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
fpu_kernel_xstate_size = sizeof(struct swregs_state);
|
||||
} else {
|
||||
if (boot_cpu_has(X86_FEATURE_FXSR))
|
||||
@@ -252,17 +246,20 @@ static void __init fpu__init_parse_early_param(void)
|
||||
char *argptr = arg;
|
||||
int bit;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
if (cmdline_find_option_bool(boot_command_line, "no387"))
|
||||
#ifdef CONFIG_MATH_EMULATION
|
||||
setup_clear_cpu_cap(X86_FEATURE_FPU);
|
||||
#else
|
||||
pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
|
||||
#endif
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "nofxsr")) {
|
||||
if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_FXSR);
|
||||
setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XMM);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsave"))
|
||||
fpu__xstate_clear_all_cpu_caps();
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
|
@@ -8,6 +8,8 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/pkeys.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/fpu/internal.h>
|
||||
@@ -67,15 +69,6 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
|
||||
*/
|
||||
unsigned int fpu_user_xstate_size;
|
||||
|
||||
/*
|
||||
* Clear all of the X86_FEATURE_* bits that are unavailable
|
||||
* when the CPU has no XSAVE support.
|
||||
*/
|
||||
void fpu__xstate_clear_all_cpu_caps(void)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return whether the system supports a given xfeature.
|
||||
*
|
||||
@@ -709,7 +702,7 @@ static void fpu__init_disable_system_xstate(void)
|
||||
{
|
||||
xfeatures_mask = 0;
|
||||
cr4_clear_bits(X86_CR4_OSXSAVE);
|
||||
fpu__xstate_clear_all_cpu_caps();
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1240,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
/*
|
||||
* Report the amount of time elapsed in millisecond since last AVX512
|
||||
* use in the task.
|
||||
*/
|
||||
static void avx512_status(struct seq_file *m, struct task_struct *task)
|
||||
{
|
||||
unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
|
||||
long delta;
|
||||
|
||||
if (!timestamp) {
|
||||
/*
|
||||
* Report -1 if no AVX512 usage
|
||||
*/
|
||||
delta = -1;
|
||||
} else {
|
||||
delta = (long)(jiffies - timestamp);
|
||||
/*
|
||||
* Cap to LONG_MAX if time difference > LONG_MAX
|
||||
*/
|
||||
if (delta < 0)
|
||||
delta = LONG_MAX;
|
||||
delta = jiffies_to_msecs(delta);
|
||||
}
|
||||
|
||||
seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
/*
|
||||
* Report architecture specific information
|
||||
*/
|
||||
int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *task)
|
||||
{
|
||||
/*
|
||||
* Report AVX512 state if the processor and build option supported.
|
||||
*/
|
||||
if (cpu_feature_enabled(X86_FEATURE_AVX512F))
|
||||
avx512_status(m, task);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
|
||||
|
@@ -22,6 +22,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/memory.h>
|
||||
|
||||
#include <trace/syscall.h>
|
||||
|
||||
@@ -34,16 +35,25 @@
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
|
||||
int ftrace_arch_code_modify_prepare(void)
|
||||
__acquires(&text_mutex)
|
||||
{
|
||||
/*
|
||||
* Need to grab text_mutex to prevent a race from module loading
|
||||
* and live kernel patching from changing the text permissions while
|
||||
* ftrace has it set to "read/write".
|
||||
*/
|
||||
mutex_lock(&text_mutex);
|
||||
set_kernel_text_rw();
|
||||
set_all_modules_text_rw();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ftrace_arch_code_modify_post_process(void)
|
||||
__releases(&text_mutex)
|
||||
{
|
||||
set_all_modules_text_ro();
|
||||
set_kernel_text_ro();
|
||||
mutex_unlock(&text_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -300,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
|
||||
|
||||
ip = regs->ip - INT3_INSN_SIZE;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (ftrace_location(ip)) {
|
||||
int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
|
||||
return 1;
|
||||
@@ -312,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
|
||||
int3_emulate_call(regs, ftrace_update_func_call);
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
if (ftrace_location(ip) || is_ftrace_caller(ip)) {
|
||||
int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -370,7 +373,7 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
|
||||
return add_break(rec->ip, old);
|
||||
}
|
||||
|
||||
static int add_breakpoints(struct dyn_ftrace *rec, int enable)
|
||||
static int add_breakpoints(struct dyn_ftrace *rec, bool enable)
|
||||
{
|
||||
unsigned long ftrace_addr;
|
||||
int ret;
|
||||
@@ -478,7 +481,7 @@ static int add_update_nop(struct dyn_ftrace *rec)
|
||||
return add_update_code(ip, new);
|
||||
}
|
||||
|
||||
static int add_update(struct dyn_ftrace *rec, int enable)
|
||||
static int add_update(struct dyn_ftrace *rec, bool enable)
|
||||
{
|
||||
unsigned long ftrace_addr;
|
||||
int ret;
|
||||
@@ -524,7 +527,7 @@ static int finish_update_nop(struct dyn_ftrace *rec)
|
||||
return ftrace_write(ip, new, 1);
|
||||
}
|
||||
|
||||
static int finish_update(struct dyn_ftrace *rec, int enable)
|
||||
static int finish_update(struct dyn_ftrace *rec, bool enable)
|
||||
{
|
||||
unsigned long ftrace_addr;
|
||||
int ret;
|
||||
|
@@ -9,6 +9,8 @@
|
||||
#include <asm/export.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/frame.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
# define function_hook __fentry__
|
||||
EXPORT_SYMBOL(__fentry__)
|
||||
@@ -89,26 +91,38 @@ END(ftrace_caller)
|
||||
|
||||
ENTRY(ftrace_regs_caller)
|
||||
/*
|
||||
* i386 does not save SS and ESP when coming from kernel.
|
||||
* Instead, to get sp, ®s->sp is used (see ptrace.h).
|
||||
* Unfortunately, that means eflags must be at the same location
|
||||
* as the current return ip is. We move the return ip into the
|
||||
* regs->ip location, and move flags into the return ip location.
|
||||
* We're here from an mcount/fentry CALL, and the stack frame looks like:
|
||||
*
|
||||
* <previous context>
|
||||
* RET-IP
|
||||
*
|
||||
* The purpose of this function is to call out in an emulated INT3
|
||||
* environment with a stack frame like:
|
||||
*
|
||||
* <previous context>
|
||||
* gap / RET-IP
|
||||
* gap
|
||||
* gap
|
||||
* gap
|
||||
* pt_regs
|
||||
*
|
||||
* We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
|
||||
*/
|
||||
pushl $__KERNEL_CS
|
||||
pushl 4(%esp) /* Save the return ip */
|
||||
pushl $0 /* Load 0 into orig_ax */
|
||||
subl $3*4, %esp # RET-IP + 3 gaps
|
||||
pushl %ss # ss
|
||||
pushl %esp # points at ss
|
||||
addl $5*4, (%esp) # make it point at <previous context>
|
||||
pushfl # flags
|
||||
pushl $__KERNEL_CS # cs
|
||||
pushl 7*4(%esp) # ip <- RET-IP
|
||||
pushl $0 # orig_eax
|
||||
|
||||
pushl %gs
|
||||
pushl %fs
|
||||
pushl %es
|
||||
pushl %ds
|
||||
|
||||
pushl %eax
|
||||
|
||||
/* Get flags and place them into the return ip slot */
|
||||
pushf
|
||||
popl %eax
|
||||
movl %eax, 8*4(%esp)
|
||||
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
@@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller)
|
||||
pushl %ecx
|
||||
pushl %ebx
|
||||
|
||||
movl 12*4(%esp), %eax /* Load ip (1st parameter) */
|
||||
subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
|
||||
movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */
|
||||
movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
|
||||
pushl %esp /* Save pt_regs as 4th parameter */
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
movl PT_EIP(%esp), %eax # 1st argument: IP
|
||||
subl $MCOUNT_INSN_SIZE, %eax
|
||||
movl 21*4(%esp), %edx # 2nd argument: parent ip
|
||||
movl function_trace_op, %ecx # 3rd argument: ftrace_pos
|
||||
pushl %esp # 4th argument: pt_regs
|
||||
|
||||
GLOBAL(ftrace_regs_call)
|
||||
call ftrace_stub
|
||||
|
||||
addl $4, %esp /* Skip pt_regs */
|
||||
addl $4, %esp # skip 4th argument
|
||||
|
||||
/* restore flags */
|
||||
push 14*4(%esp)
|
||||
popf
|
||||
/* place IP below the new SP */
|
||||
movl PT_OLDESP(%esp), %eax
|
||||
movl PT_EIP(%esp), %ecx
|
||||
movl %ecx, -4(%eax)
|
||||
|
||||
/* Move return ip back to its original location */
|
||||
movl 12*4(%esp), %eax
|
||||
movl %eax, 14*4(%esp)
|
||||
/* place EAX below that */
|
||||
movl PT_EAX(%esp), %ecx
|
||||
movl %ecx, -8(%eax)
|
||||
|
||||
popl %ebx
|
||||
popl %ecx
|
||||
@@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call)
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
popl %eax
|
||||
popl %ds
|
||||
popl %es
|
||||
popl %fs
|
||||
popl %gs
|
||||
|
||||
/* use lea to not affect flags */
|
||||
lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */
|
||||
lea -8(%eax), %esp
|
||||
popl %eax
|
||||
|
||||
jmp .Lftrace_ret
|
||||
|
||||
|
@@ -9,6 +9,7 @@
|
||||
#include <asm/export.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
#include <asm/unwind_hints.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
.code64
|
||||
.section .entry.text, "ax"
|
||||
@@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr)
|
||||
leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
|
||||
movq %rcx, RSP(%rsp)
|
||||
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
/* regs go into 4th parameter */
|
||||
leaq (%rsp), %rcx
|
||||
|
||||
|
@@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
||||
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
|
||||
|
||||
if (la57) {
|
||||
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
|
||||
p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
|
||||
physaddr);
|
||||
|
||||
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
|
||||
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
|
||||
pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
|
||||
|
||||
i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
|
||||
p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
|
||||
p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
|
||||
i = physaddr >> P4D_SHIFT;
|
||||
p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
|
||||
p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
|
||||
} else {
|
||||
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
|
||||
pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
|
||||
pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
|
||||
}
|
||||
|
||||
i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
|
||||
pud[i + 0] = (pudval_t)pmd + pgtable_flags;
|
||||
pud[i + 1] = (pudval_t)pmd + pgtable_flags;
|
||||
i = physaddr >> PUD_SHIFT;
|
||||
pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
|
||||
pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
|
||||
|
||||
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
|
||||
/* Filter out unsupported __PAGE_KERNEL_* bits: */
|
||||
@@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
|
||||
pmd_entry += physaddr;
|
||||
|
||||
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
|
||||
int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
|
||||
pmd[idx] = pmd_entry + i * PMD_SIZE;
|
||||
int idx = i + (physaddr >> PMD_SHIFT);
|
||||
|
||||
pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -29,9 +29,7 @@
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/paravirt.h>
|
||||
#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
|
||||
#else
|
||||
#define GET_CR2_INTO(reg) movq %cr2, reg
|
||||
#define INTERRUPT_RETURN iretq
|
||||
#endif
|
||||
|
||||
@@ -253,10 +251,10 @@ END(secondary_startup_64)
|
||||
* start_secondary() via .Ljump_to_C_code.
|
||||
*/
|
||||
ENTRY(start_cpu0)
|
||||
movq initial_stack(%rip), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
movq initial_stack(%rip), %rsp
|
||||
jmp .Ljump_to_C_code
|
||||
ENDPROC(start_cpu0)
|
||||
END(start_cpu0)
|
||||
#endif
|
||||
|
||||
/* Both SMP bootup and ACPI suspend change these variables */
|
||||
@@ -323,7 +321,7 @@ early_idt_handler_common:
|
||||
|
||||
cmpq $14,%rsi /* Page fault? */
|
||||
jnz 10f
|
||||
GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */
|
||||
GET_CR2_INTO(%rdi) /* can clobber %rax if pv */
|
||||
call early_make_pgtable
|
||||
andl %eax,%eax
|
||||
jz 20f /* All good */
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -8,6 +8,7 @@
|
||||
#include <linux/timex.h>
|
||||
#include <linux/i8253.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/time.h>
|
||||
#include <asm/smp.h>
|
||||
@@ -18,10 +19,32 @@
|
||||
*/
|
||||
struct clock_event_device *global_clock_event;
|
||||
|
||||
void __init setup_pit_timer(void)
|
||||
/*
|
||||
* Modern chipsets can disable the PIT clock which makes it unusable. It
|
||||
* would be possible to enable the clock but the registers are chipset
|
||||
* specific and not discoverable. Avoid the whack a mole game.
|
||||
*
|
||||
* These platforms have discoverable TSC/CPU frequencies but this also
|
||||
* requires to know the local APIC timer frequency as it normally is
|
||||
* calibrated against the PIT interrupt.
|
||||
*/
|
||||
static bool __init use_pit(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC))
|
||||
return true;
|
||||
|
||||
/* This also returns true when APIC is disabled */
|
||||
return apic_needs_pit();
|
||||
}
|
||||
|
||||
bool __init pit_timer_init(void)
|
||||
{
|
||||
if (!use_pit())
|
||||
return false;
|
||||
|
||||
clockevent_i8253_init(true);
|
||||
global_clock_event = &i8253_clockevent;
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_X86_64
|
||||
|
@@ -1,7 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Interrupt descriptor table related code
|
||||
*
|
||||
* This file is licensed under the GPL V2
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
@@ -320,7 +319,8 @@ void __init idt_setup_apic_and_irq_gates(void)
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
|
||||
set_bit(i, system_vectors);
|
||||
set_intr_gate(i, spurious_interrupt);
|
||||
entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
|
||||
set_intr_gate(i, entry);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@@ -11,10 +11,11 @@ extern struct boot_params boot_params;
|
||||
static enum efi_secureboot_mode get_sb_mode(void)
|
||||
{
|
||||
efi_char16_t efi_SecureBoot_name[] = L"SecureBoot";
|
||||
efi_char16_t efi_SetupMode_name[] = L"SecureBoot";
|
||||
efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
|
||||
efi_status_t status;
|
||||
unsigned long size;
|
||||
u8 secboot;
|
||||
u8 secboot, setupmode;
|
||||
|
||||
size = sizeof(secboot);
|
||||
|
||||
@@ -36,7 +37,14 @@ static enum efi_secureboot_mode get_sb_mode(void)
|
||||
return efi_secureboot_mode_unknown;
|
||||
}
|
||||
|
||||
if (secboot == 0) {
|
||||
size = sizeof(setupmode);
|
||||
status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid,
|
||||
NULL, &size, &setupmode);
|
||||
|
||||
if (status != EFI_SUCCESS) /* ignore unknown SetupMode */
|
||||
setupmode = 0;
|
||||
|
||||
if (secboot == 0 || setupmode == 1) {
|
||||
pr_info("ima: secureboot mode disabled\n");
|
||||
return efi_secureboot_mode_disabled;
|
||||
}
|
||||
|
@@ -13,7 +13,22 @@
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/io.h>
|
||||
|
||||
int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
|
||||
#define IO_DELAY_TYPE_0X80 0
|
||||
#define IO_DELAY_TYPE_0XED 1
|
||||
#define IO_DELAY_TYPE_UDELAY 2
|
||||
#define IO_DELAY_TYPE_NONE 3
|
||||
|
||||
#if defined(CONFIG_IO_DELAY_0X80)
|
||||
#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0X80
|
||||
#elif defined(CONFIG_IO_DELAY_0XED)
|
||||
#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0XED
|
||||
#elif defined(CONFIG_IO_DELAY_UDELAY)
|
||||
#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_UDELAY
|
||||
#elif defined(CONFIG_IO_DELAY_NONE)
|
||||
#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_NONE
|
||||
#endif
|
||||
|
||||
int io_delay_type __read_mostly = DEFAULT_IO_DELAY_TYPE;
|
||||
|
||||
static int __initdata io_delay_override;
|
||||
|
||||
@@ -24,13 +39,13 @@ void native_io_delay(void)
|
||||
{
|
||||
switch (io_delay_type) {
|
||||
default:
|
||||
case CONFIG_IO_DELAY_TYPE_0X80:
|
||||
case IO_DELAY_TYPE_0X80:
|
||||
asm volatile ("outb %al, $0x80");
|
||||
break;
|
||||
case CONFIG_IO_DELAY_TYPE_0XED:
|
||||
case IO_DELAY_TYPE_0XED:
|
||||
asm volatile ("outb %al, $0xed");
|
||||
break;
|
||||
case CONFIG_IO_DELAY_TYPE_UDELAY:
|
||||
case IO_DELAY_TYPE_UDELAY:
|
||||
/*
|
||||
* 2 usecs is an upper-bound for the outb delay but
|
||||
* note that udelay doesn't have the bus-level
|
||||
@@ -39,7 +54,8 @@ void native_io_delay(void)
|
||||
* are shorter until calibrated):
|
||||
*/
|
||||
udelay(2);
|
||||
case CONFIG_IO_DELAY_TYPE_NONE:
|
||||
break;
|
||||
case IO_DELAY_TYPE_NONE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -47,9 +63,9 @@ EXPORT_SYMBOL(native_io_delay);
|
||||
|
||||
static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
|
||||
{
|
||||
if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
|
||||
if (io_delay_type == IO_DELAY_TYPE_0X80) {
|
||||
pr_notice("%s: using 0xed I/O delay port\n", id->ident);
|
||||
io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
|
||||
io_delay_type = IO_DELAY_TYPE_0XED;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -115,13 +131,13 @@ static int __init io_delay_param(char *s)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcmp(s, "0x80"))
|
||||
io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
|
||||
io_delay_type = IO_DELAY_TYPE_0X80;
|
||||
else if (!strcmp(s, "0xed"))
|
||||
io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
|
||||
io_delay_type = IO_DELAY_TYPE_0XED;
|
||||
else if (!strcmp(s, "udelay"))
|
||||
io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
|
||||
io_delay_type = IO_DELAY_TYPE_UDELAY;
|
||||
else if (!strcmp(s, "none"))
|
||||
io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
|
||||
io_delay_type = IO_DELAY_TYPE_NONE;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
|
@@ -135,7 +135,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
|
||||
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
|
||||
seq_puts(p, " Machine check polls\n");
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
|
||||
#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
|
||||
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
|
||||
seq_printf(p, "%*s: ", prec, "HYP");
|
||||
for_each_online_cpu(j)
|
||||
@@ -247,7 +247,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
|
||||
if (!handle_irq(desc, regs)) {
|
||||
ack_APIC_irq();
|
||||
|
||||
if (desc != VECTOR_RETRIGGERED) {
|
||||
if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
|
||||
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
|
||||
__func__, smp_processor_id(),
|
||||
vector);
|
||||
|
@@ -65,8 +65,6 @@ static int sched_itmt_update_handler(struct ctl_table *table, int write,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned int zero;
|
||||
static unsigned int one = 1;
|
||||
static struct ctl_table itmt_kern_table[] = {
|
||||
{
|
||||
.procname = "sched_itmt_enabled",
|
||||
@@ -74,8 +72,8 @@ static struct ctl_table itmt_kern_table[] = {
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_itmt_update_handler,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
@@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now)
|
||||
|
||||
static void __init jailhouse_timer_init(void)
|
||||
{
|
||||
lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ);
|
||||
lapic_timer_period = setup_data.apic_khz * (1000 / HZ);
|
||||
}
|
||||
|
||||
static unsigned long jailhouse_get_tsc(void)
|
||||
@@ -203,7 +203,7 @@ bool jailhouse_paravirt(void)
|
||||
return jailhouse_cpuid_base() != 0;
|
||||
}
|
||||
|
||||
static bool jailhouse_x2apic_available(void)
|
||||
static bool __init jailhouse_x2apic_available(void)
|
||||
{
|
||||
/*
|
||||
* The x2APIC is only available if the root cell enabled it. Jailhouse
|
||||
@@ -217,4 +217,5 @@ const struct hypervisor_x86 x86_hyper_jailhouse __refconst = {
|
||||
.detect = jailhouse_detect,
|
||||
.init.init_platform = jailhouse_init_platform,
|
||||
.init.x2apic_available = jailhouse_x2apic_available,
|
||||
.ignore_nopv = true,
|
||||
};
|
||||
|
@@ -35,41 +35,43 @@ static void bug_at(unsigned char *ip, int line)
|
||||
BUG();
|
||||
}
|
||||
|
||||
static void __ref __jump_label_transform(struct jump_entry *entry,
|
||||
enum jump_label_type type,
|
||||
int init)
|
||||
static void __jump_label_set_jump_code(struct jump_entry *entry,
|
||||
enum jump_label_type type,
|
||||
union jump_code_union *code,
|
||||
int init)
|
||||
{
|
||||
union jump_code_union jmp;
|
||||
const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
|
||||
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
|
||||
const void *expect, *code;
|
||||
const void *expect;
|
||||
int line;
|
||||
|
||||
jmp.jump = 0xe9;
|
||||
jmp.offset = jump_entry_target(entry) -
|
||||
(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
|
||||
code->jump = 0xe9;
|
||||
code->offset = jump_entry_target(entry) -
|
||||
(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
|
||||
|
||||
if (type == JUMP_LABEL_JMP) {
|
||||
if (init) {
|
||||
expect = default_nop; line = __LINE__;
|
||||
} else {
|
||||
expect = ideal_nop; line = __LINE__;
|
||||
}
|
||||
|
||||
code = &jmp.code;
|
||||
if (init) {
|
||||
expect = default_nop; line = __LINE__;
|
||||
} else if (type == JUMP_LABEL_JMP) {
|
||||
expect = ideal_nop; line = __LINE__;
|
||||
} else {
|
||||
if (init) {
|
||||
expect = default_nop; line = __LINE__;
|
||||
} else {
|
||||
expect = &jmp.code; line = __LINE__;
|
||||
}
|
||||
|
||||
code = ideal_nop;
|
||||
expect = code->code; line = __LINE__;
|
||||
}
|
||||
|
||||
if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
|
||||
bug_at((void *)jump_entry_code(entry), line);
|
||||
|
||||
if (type == JUMP_LABEL_NOP)
|
||||
memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
|
||||
}
|
||||
|
||||
static void __ref __jump_label_transform(struct jump_entry *entry,
|
||||
enum jump_label_type type,
|
||||
int init)
|
||||
{
|
||||
union jump_code_union code;
|
||||
|
||||
__jump_label_set_jump_code(entry, type, &code, init);
|
||||
|
||||
/*
|
||||
* As long as only a single processor is running and the code is still
|
||||
* not marked as RO, text_poke_early() can be used; Checking that
|
||||
@@ -82,12 +84,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
|
||||
* always nop being the 'currently valid' instruction
|
||||
*/
|
||||
if (init || system_state == SYSTEM_BOOTING) {
|
||||
text_poke_early((void *)jump_entry_code(entry), code,
|
||||
text_poke_early((void *)jump_entry_code(entry), &code,
|
||||
JUMP_LABEL_NOP_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
|
||||
text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
|
||||
(void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
|
||||
}
|
||||
|
||||
@@ -99,6 +101,75 @@ void arch_jump_label_transform(struct jump_entry *entry,
|
||||
mutex_unlock(&text_mutex);
|
||||
}
|
||||
|
||||
#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
|
||||
static struct text_poke_loc tp_vec[TP_VEC_MAX];
|
||||
static int tp_vec_nr;
|
||||
|
||||
bool arch_jump_label_transform_queue(struct jump_entry *entry,
|
||||
enum jump_label_type type)
|
||||
{
|
||||
struct text_poke_loc *tp;
|
||||
void *entry_code;
|
||||
|
||||
if (system_state == SYSTEM_BOOTING) {
|
||||
/*
|
||||
* Fallback to the non-batching mode.
|
||||
*/
|
||||
arch_jump_label_transform(entry, type);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* No more space in the vector, tell upper layer to apply
|
||||
* the queue before continuing.
|
||||
*/
|
||||
if (tp_vec_nr == TP_VEC_MAX)
|
||||
return false;
|
||||
|
||||
tp = &tp_vec[tp_vec_nr];
|
||||
|
||||
entry_code = (void *)jump_entry_code(entry);
|
||||
|
||||
/*
|
||||
* The INT3 handler will do a bsearch in the queue, so we need entries
|
||||
* to be sorted. We can survive an unsorted list by rejecting the entry,
|
||||
* forcing the generic jump_label code to apply the queue. Warning once,
|
||||
* to raise the attention to the case of an unsorted entry that is
|
||||
* better not happen, because, in the worst case we will perform in the
|
||||
* same way as we do without batching - with some more overhead.
|
||||
*/
|
||||
if (tp_vec_nr > 0) {
|
||||
int prev = tp_vec_nr - 1;
|
||||
struct text_poke_loc *prev_tp = &tp_vec[prev];
|
||||
|
||||
if (WARN_ON_ONCE(prev_tp->addr > entry_code))
|
||||
return false;
|
||||
}
|
||||
|
||||
__jump_label_set_jump_code(entry, type,
|
||||
(union jump_code_union *) &tp->opcode, 0);
|
||||
|
||||
tp->addr = entry_code;
|
||||
tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
|
||||
tp->len = JUMP_LABEL_NOP_SIZE;
|
||||
|
||||
tp_vec_nr++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void arch_jump_label_transform_apply(void)
|
||||
{
|
||||
if (!tp_vec_nr)
|
||||
return;
|
||||
|
||||
mutex_lock(&text_mutex);
|
||||
text_poke_bp_batch(tp_vec, tp_vec_nr);
|
||||
mutex_unlock(&text_mutex);
|
||||
|
||||
tp_vec_nr = 0;
|
||||
}
|
||||
|
||||
static enum {
|
||||
JL_STATE_START,
|
||||
JL_STATE_NO_UPDATE,
|
||||
|
@@ -67,33 +67,18 @@ static const struct file_operations fops_setup_data = {
|
||||
.llseek = default_llseek,
|
||||
};
|
||||
|
||||
static int __init
|
||||
static void __init
|
||||
create_setup_data_node(struct dentry *parent, int no,
|
||||
struct setup_data_node *node)
|
||||
{
|
||||
struct dentry *d, *type, *data;
|
||||
struct dentry *d;
|
||||
char buf[16];
|
||||
|
||||
sprintf(buf, "%d", no);
|
||||
d = debugfs_create_dir(buf, parent);
|
||||
if (!d)
|
||||
return -ENOMEM;
|
||||
|
||||
type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
|
||||
if (!type)
|
||||
goto err_dir;
|
||||
|
||||
data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
|
||||
if (!data)
|
||||
goto err_type;
|
||||
|
||||
return 0;
|
||||
|
||||
err_type:
|
||||
debugfs_remove(type);
|
||||
err_dir:
|
||||
debugfs_remove(d);
|
||||
return -ENOMEM;
|
||||
debugfs_create_x32("type", S_IRUGO, d, &node->type);
|
||||
debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
|
||||
}
|
||||
|
||||
static int __init create_setup_data_nodes(struct dentry *parent)
|
||||
@@ -106,8 +91,6 @@ static int __init create_setup_data_nodes(struct dentry *parent)
|
||||
int no = 0;
|
||||
|
||||
d = debugfs_create_dir("setup_data", parent);
|
||||
if (!d)
|
||||
return -ENOMEM;
|
||||
|
||||
pa_data = boot_params.hdr.setup_data;
|
||||
|
||||
@@ -128,19 +111,17 @@ static int __init create_setup_data_nodes(struct dentry *parent)
|
||||
node->paddr = pa_data;
|
||||
node->type = data->type;
|
||||
node->len = data->len;
|
||||
error = create_setup_data_node(d, no, node);
|
||||
create_setup_data_node(d, no, node);
|
||||
pa_data = data->next;
|
||||
|
||||
memunmap(data);
|
||||
if (error)
|
||||
goto err_dir;
|
||||
no++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_dir:
|
||||
debugfs_remove(d);
|
||||
debugfs_remove_recursive(d);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -151,35 +132,18 @@ static struct debugfs_blob_wrapper boot_params_blob = {
|
||||
|
||||
static int __init boot_params_kdebugfs_init(void)
|
||||
{
|
||||
struct dentry *dbp, *version, *data;
|
||||
int error = -ENOMEM;
|
||||
struct dentry *dbp;
|
||||
int error;
|
||||
|
||||
dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
|
||||
if (!dbp)
|
||||
return -ENOMEM;
|
||||
|
||||
version = debugfs_create_x16("version", S_IRUGO, dbp,
|
||||
&boot_params.hdr.version);
|
||||
if (!version)
|
||||
goto err_dir;
|
||||
|
||||
data = debugfs_create_blob("data", S_IRUGO, dbp,
|
||||
&boot_params_blob);
|
||||
if (!data)
|
||||
goto err_version;
|
||||
debugfs_create_x16("version", S_IRUGO, dbp, &boot_params.hdr.version);
|
||||
debugfs_create_blob("data", S_IRUGO, dbp, &boot_params_blob);
|
||||
|
||||
error = create_setup_data_nodes(dbp);
|
||||
if (error)
|
||||
goto err_data;
|
||||
debugfs_remove_recursive(dbp);
|
||||
|
||||
return 0;
|
||||
|
||||
err_data:
|
||||
debugfs_remove(data);
|
||||
err_version:
|
||||
debugfs_remove(version);
|
||||
err_dir:
|
||||
debugfs_remove(dbp);
|
||||
return error;
|
||||
}
|
||||
#endif /* CONFIG_DEBUG_BOOT_PARAMS */
|
||||
@@ -189,8 +153,6 @@ static int __init arch_kdebugfs_init(void)
|
||||
int error = 0;
|
||||
|
||||
arch_debugfs_dir = debugfs_create_dir("x86", NULL);
|
||||
if (!arch_debugfs_dir)
|
||||
return -ENOMEM;
|
||||
|
||||
#ifdef CONFIG_DEBUG_BOOT_PARAMS
|
||||
error = boot_params_kdebugfs_init();
|
||||
|
@@ -1,12 +1,10 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Kexec bzImage loader
|
||||
*
|
||||
* Copyright (C) 2014 Red Hat Inc.
|
||||
* Authors:
|
||||
* Vivek Goyal <vgoyal@redhat.com>
|
||||
*
|
||||
* This source code is licensed under the GNU General Public License,
|
||||
* Version 2. See the file COPYING for more details.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "kexec-bzImage64: " fmt
|
||||
@@ -321,6 +319,11 @@ static int bzImage64_probe(const char *buf, unsigned long len)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!(header->xloadflags & XLF_5LEVEL) && pgtable_l5_enabled()) {
|
||||
pr_err("bzImage cannot handle 5-level paging mode.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* I've got a bzImage */
|
||||
pr_debug("It's a relocatable bzImage64\n");
|
||||
ret = 0;
|
||||
@@ -416,7 +419,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
|
||||
efi_map_offset = params_cmdline_sz;
|
||||
efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16);
|
||||
|
||||
/* Copy setup header onto bootparams. Documentation/x86/boot.txt */
|
||||
/* Copy setup header onto bootparams. Documentation/x86/boot.rst */
|
||||
setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
|
||||
|
||||
/* Is there a limit on setup header size? */
|
||||
|
@@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
switch (regno) {
|
||||
case GDB_SS:
|
||||
if (!user_mode(regs))
|
||||
*(unsigned long *)mem = __KERNEL_DS;
|
||||
break;
|
||||
case GDB_SP:
|
||||
if (!user_mode(regs))
|
||||
*(unsigned long *)mem = kernel_stack_pointer(regs);
|
||||
break;
|
||||
case GDB_GS:
|
||||
case GDB_FS:
|
||||
*(unsigned long *)mem = 0xFFFF;
|
||||
|
@@ -5,15 +5,10 @@
|
||||
/* Kprobes and Optprobes common header */
|
||||
|
||||
#include <asm/asm.h>
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
|
||||
" mov %" _ASM_SP ", %" _ASM_BP "\n"
|
||||
#else
|
||||
# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
|
||||
#endif
|
||||
#include <asm/frame.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define SAVE_REGS_STRING \
|
||||
/* Skip cs, ip, orig_ax. */ \
|
||||
" subq $24, %rsp\n" \
|
||||
@@ -27,11 +22,13 @@
|
||||
" pushq %r10\n" \
|
||||
" pushq %r11\n" \
|
||||
" pushq %rbx\n" \
|
||||
SAVE_RBP_STRING \
|
||||
" pushq %rbp\n" \
|
||||
" pushq %r12\n" \
|
||||
" pushq %r13\n" \
|
||||
" pushq %r14\n" \
|
||||
" pushq %r15\n"
|
||||
" pushq %r15\n" \
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
#define RESTORE_REGS_STRING \
|
||||
" popq %r15\n" \
|
||||
" popq %r14\n" \
|
||||
@@ -51,19 +48,22 @@
|
||||
/* Skip orig_ax, ip, cs */ \
|
||||
" addq $24, %rsp\n"
|
||||
#else
|
||||
|
||||
#define SAVE_REGS_STRING \
|
||||
/* Skip cs, ip, orig_ax and gs. */ \
|
||||
" subl $16, %esp\n" \
|
||||
" subl $4*4, %esp\n" \
|
||||
" pushl %fs\n" \
|
||||
" pushl %es\n" \
|
||||
" pushl %ds\n" \
|
||||
" pushl %eax\n" \
|
||||
SAVE_RBP_STRING \
|
||||
" pushl %ebp\n" \
|
||||
" pushl %edi\n" \
|
||||
" pushl %esi\n" \
|
||||
" pushl %edx\n" \
|
||||
" pushl %ecx\n" \
|
||||
" pushl %ebx\n"
|
||||
" pushl %ebx\n" \
|
||||
ENCODE_FRAME_POINTER
|
||||
|
||||
#define RESTORE_REGS_STRING \
|
||||
" popl %ebx\n" \
|
||||
" popl %ecx\n" \
|
||||
@@ -72,8 +72,8 @@
|
||||
" popl %edi\n" \
|
||||
" popl %ebp\n" \
|
||||
" popl %eax\n" \
|
||||
/* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
|
||||
" addl $24, %esp\n"
|
||||
/* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\
|
||||
" addl $7*4, %esp\n"
|
||||
#endif
|
||||
|
||||
/* Ensure if the instruction can be boostable */
|
||||
|
@@ -56,7 +56,7 @@
|
||||
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
|
||||
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
|
||||
|
||||
#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
|
||||
#define stack_addr(regs) ((unsigned long *)regs->sp)
|
||||
|
||||
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
|
||||
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
|
||||
@@ -718,29 +718,27 @@ asm(
|
||||
".global kretprobe_trampoline\n"
|
||||
".type kretprobe_trampoline, @function\n"
|
||||
"kretprobe_trampoline:\n"
|
||||
#ifdef CONFIG_X86_64
|
||||
/* We don't bother saving the ss register */
|
||||
#ifdef CONFIG_X86_64
|
||||
" pushq %rsp\n"
|
||||
" pushfq\n"
|
||||
SAVE_REGS_STRING
|
||||
" movq %rsp, %rdi\n"
|
||||
" call trampoline_handler\n"
|
||||
/* Replace saved sp with true return address. */
|
||||
" movq %rax, 152(%rsp)\n"
|
||||
" movq %rax, 19*8(%rsp)\n"
|
||||
RESTORE_REGS_STRING
|
||||
" popfq\n"
|
||||
#else
|
||||
" pushf\n"
|
||||
" pushl %esp\n"
|
||||
" pushfl\n"
|
||||
SAVE_REGS_STRING
|
||||
" movl %esp, %eax\n"
|
||||
" call trampoline_handler\n"
|
||||
/* Move flags to cs */
|
||||
" movl 56(%esp), %edx\n"
|
||||
" movl %edx, 52(%esp)\n"
|
||||
/* Replace saved flags with true return address. */
|
||||
" movl %eax, 56(%esp)\n"
|
||||
/* Replace saved sp with true return address. */
|
||||
" movl %eax, 15*4(%esp)\n"
|
||||
RESTORE_REGS_STRING
|
||||
" popf\n"
|
||||
" popfl\n"
|
||||
#endif
|
||||
" ret\n"
|
||||
".size kretprobe_trampoline, .-kretprobe_trampoline\n"
|
||||
@@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
|
||||
INIT_HLIST_HEAD(&empty_rp);
|
||||
kretprobe_hash_lock(current, &head, &flags);
|
||||
/* fixup registers */
|
||||
#ifdef CONFIG_X86_64
|
||||
regs->cs = __KERNEL_CS;
|
||||
/* On x86-64, we use pt_regs->sp for return address holder. */
|
||||
frame_pointer = ®s->sp;
|
||||
#else
|
||||
regs->cs = __KERNEL_CS | get_kernel_rpl();
|
||||
#ifdef CONFIG_X86_32
|
||||
regs->cs |= get_kernel_rpl();
|
||||
regs->gs = 0;
|
||||
/* On x86-32, we use pt_regs->flags for return address holder. */
|
||||
frame_pointer = ®s->flags;
|
||||
#endif
|
||||
/* We use pt_regs->sp for return address holder. */
|
||||
frame_pointer = ®s->sp;
|
||||
regs->ip = trampoline_address;
|
||||
regs->orig_ax = ~0UL;
|
||||
|
||||
@@ -813,7 +808,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
|
||||
continue;
|
||||
/*
|
||||
* Return probes must be pushed on this hash list correct
|
||||
* order (same as return order) so that it can be poped
|
||||
* order (same as return order) so that it can be popped
|
||||
* correctly. However, if we find it is pushed it incorrect
|
||||
* order, this means we find a function which should not be
|
||||
* probed, because the wrong order entry is pushed on the
|
||||
|
@@ -102,14 +102,15 @@ asm (
|
||||
"optprobe_template_call:\n"
|
||||
ASM_NOP5
|
||||
/* Move flags to rsp */
|
||||
" movq 144(%rsp), %rdx\n"
|
||||
" movq %rdx, 152(%rsp)\n"
|
||||
" movq 18*8(%rsp), %rdx\n"
|
||||
" movq %rdx, 19*8(%rsp)\n"
|
||||
RESTORE_REGS_STRING
|
||||
/* Skip flags entry */
|
||||
" addq $8, %rsp\n"
|
||||
" popfq\n"
|
||||
#else /* CONFIG_X86_32 */
|
||||
" pushf\n"
|
||||
" pushl %esp\n"
|
||||
" pushfl\n"
|
||||
SAVE_REGS_STRING
|
||||
" movl %esp, %edx\n"
|
||||
".global optprobe_template_val\n"
|
||||
@@ -118,9 +119,13 @@ asm (
|
||||
".global optprobe_template_call\n"
|
||||
"optprobe_template_call:\n"
|
||||
ASM_NOP5
|
||||
/* Move flags into esp */
|
||||
" movl 14*4(%esp), %edx\n"
|
||||
" movl %edx, 15*4(%esp)\n"
|
||||
RESTORE_REGS_STRING
|
||||
" addl $4, %esp\n" /* skip cs */
|
||||
" popf\n"
|
||||
/* Skip flags entry */
|
||||
" addl $4, %esp\n"
|
||||
" popfl\n"
|
||||
#endif
|
||||
".global optprobe_template_end\n"
|
||||
"optprobe_template_end:\n"
|
||||
@@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
|
||||
} else {
|
||||
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
|
||||
/* Save skipped registers */
|
||||
#ifdef CONFIG_X86_64
|
||||
regs->cs = __KERNEL_CS;
|
||||
#else
|
||||
regs->cs = __KERNEL_CS | get_kernel_rpl();
|
||||
#ifdef CONFIG_X86_32
|
||||
regs->cs |= get_kernel_rpl();
|
||||
regs->gs = 0;
|
||||
#endif
|
||||
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
|
||||
@@ -418,7 +422,7 @@ err:
|
||||
void arch_optimize_kprobes(struct list_head *oplist)
|
||||
{
|
||||
struct optimized_kprobe *op, *tmp;
|
||||
u8 insn_buf[RELATIVEJUMP_SIZE];
|
||||
u8 insn_buff[RELATIVEJUMP_SIZE];
|
||||
|
||||
list_for_each_entry_safe(op, tmp, oplist, list) {
|
||||
s32 rel = (s32)((long)op->optinsn.insn -
|
||||
@@ -430,10 +434,10 @@ void arch_optimize_kprobes(struct list_head *oplist)
|
||||
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
|
||||
RELATIVE_ADDR_SIZE);
|
||||
|
||||
insn_buf[0] = RELATIVEJUMP_OPCODE;
|
||||
*(s32 *)(&insn_buf[1]) = rel;
|
||||
insn_buff[0] = RELATIVEJUMP_OPCODE;
|
||||
*(s32 *)(&insn_buff[1]) = rel;
|
||||
|
||||
text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
|
||||
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
|
||||
op->optinsn.insn);
|
||||
|
||||
list_del_init(&op->list);
|
||||
@@ -443,12 +447,12 @@ void arch_optimize_kprobes(struct list_head *oplist)
|
||||
/* Replace a relative jump with a breakpoint (int3). */
|
||||
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
|
||||
{
|
||||
u8 insn_buf[RELATIVEJUMP_SIZE];
|
||||
u8 insn_buff[RELATIVEJUMP_SIZE];
|
||||
|
||||
/* Set int3 to first byte for kprobes */
|
||||
insn_buf[0] = BREAKPOINT_INSTRUCTION;
|
||||
memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
|
||||
text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
|
||||
insn_buff[0] = BREAKPOINT_INSTRUCTION;
|
||||
memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
|
||||
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
|
||||
op->optinsn.insn);
|
||||
}
|
||||
|
||||
|
@@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
|
||||
NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
|
||||
|
||||
dotraplinkage void
|
||||
do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
|
||||
{
|
||||
enum ctx_state prev_state;
|
||||
|
||||
switch (kvm_read_and_reset_pf_reason()) {
|
||||
default:
|
||||
do_page_fault(regs, error_code);
|
||||
do_page_fault(regs, error_code, address);
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_NOT_PRESENT:
|
||||
/* page is swapped out by the host. */
|
||||
prev_state = exception_enter();
|
||||
kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
|
||||
kvm_async_pf_task_wait((u32)address, !user_mode(regs));
|
||||
exception_exit(prev_state);
|
||||
break;
|
||||
case KVM_PV_REASON_PAGE_READY:
|
||||
rcu_irq_enter();
|
||||
kvm_async_pf_task_wake((u32)read_cr2());
|
||||
kvm_async_pf_task_wake((u32)address);
|
||||
rcu_irq_exit();
|
||||
break;
|
||||
}
|
||||
@@ -527,6 +527,21 @@ static void kvm_setup_pv_ipi(void)
|
||||
pr_info("KVM setup pv IPIs\n");
|
||||
}
|
||||
|
||||
static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
native_send_call_func_ipi(mask);
|
||||
|
||||
/* Make sure other vCPUs get a chance to run if they need to. */
|
||||
for_each_cpu(cpu, mask) {
|
||||
if (vcpu_is_preempted(cpu)) {
|
||||
kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
native_smp_prepare_cpus(max_cpus);
|
||||
@@ -638,6 +653,12 @@ static void __init kvm_guest_init(void)
|
||||
#ifdef CONFIG_SMP
|
||||
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
|
||||
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
|
||||
if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
|
||||
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
|
||||
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
|
||||
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
|
||||
pr_info("KVM setup pv sched yield\n");
|
||||
}
|
||||
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
|
||||
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
|
||||
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
|
||||
@@ -817,6 +838,7 @@ asm(
|
||||
"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
|
||||
"setne %al;"
|
||||
"ret;"
|
||||
".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
|
||||
".popsection");
|
||||
|
||||
#endif
|
||||
|
@@ -1,9 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* handle transition of Linux booting another kernel
|
||||
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
|
||||
*
|
||||
* This source code is licensed under the GNU General Public License,
|
||||
* Version 2. See the file COPYING for more details.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
@@ -1,9 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* handle transition of Linux booting another kernel
|
||||
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
|
||||
*
|
||||
* This source code is licensed under the GNU General Public License,
|
||||
* Version 2. See the file COPYING for more details.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "kexec: " fmt
|
||||
@@ -18,6 +16,7 @@
|
||||
#include <linux/io.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/efi.h>
|
||||
|
||||
#include <asm/init.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -29,6 +28,55 @@
|
||||
#include <asm/setup.h>
|
||||
#include <asm/set_memory.h>
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
/*
|
||||
* Used while adding mapping for ACPI tables.
|
||||
* Can be reused when other iomem regions need be mapped
|
||||
*/
|
||||
struct init_pgtable_data {
|
||||
struct x86_mapping_info *info;
|
||||
pgd_t *level4p;
|
||||
};
|
||||
|
||||
static int mem_region_callback(struct resource *res, void *arg)
|
||||
{
|
||||
struct init_pgtable_data *data = arg;
|
||||
unsigned long mstart, mend;
|
||||
|
||||
mstart = res->start;
|
||||
mend = mstart + resource_size(res) - 1;
|
||||
|
||||
return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend);
|
||||
}
|
||||
|
||||
static int
|
||||
map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p)
|
||||
{
|
||||
struct init_pgtable_data data;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
data.info = info;
|
||||
data.level4p = level4p;
|
||||
flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
|
||||
ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1,
|
||||
&data, mem_region_callback);
|
||||
if (ret && ret != -EINVAL)
|
||||
return ret;
|
||||
|
||||
/* ACPI tables could be located in ACPI Non-volatile Storage region */
|
||||
ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1,
|
||||
&data, mem_region_callback);
|
||||
if (ret && ret != -EINVAL)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KEXEC_FILE
|
||||
const struct kexec_file_ops * const kexec_file_loaders[] = {
|
||||
&kexec_bzImage64_ops,
|
||||
@@ -36,6 +84,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
static int
|
||||
map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p)
|
||||
{
|
||||
#ifdef CONFIG_EFI
|
||||
unsigned long mstart, mend;
|
||||
|
||||
if (!efi_enabled(EFI_BOOT))
|
||||
return 0;
|
||||
|
||||
mstart = (boot_params.efi_info.efi_systab |
|
||||
((u64)boot_params.efi_info.efi_systab_hi<<32));
|
||||
|
||||
if (efi_enabled(EFI_64BIT))
|
||||
mend = mstart + sizeof(efi_system_table_64_t);
|
||||
else
|
||||
mend = mstart + sizeof(efi_system_table_32_t);
|
||||
|
||||
if (!mstart)
|
||||
return 0;
|
||||
|
||||
return kernel_ident_mapping_init(info, level4p, mstart, mend);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_transition_pgtable(struct kimage *image)
|
||||
{
|
||||
free_page((unsigned long)image->arch.p4d);
|
||||
@@ -50,12 +123,13 @@ static void free_transition_pgtable(struct kimage *image)
|
||||
|
||||
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
|
||||
{
|
||||
pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
|
||||
unsigned long vaddr, paddr;
|
||||
int result = -ENOMEM;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
unsigned long vaddr, paddr;
|
||||
int result = -ENOMEM;
|
||||
|
||||
vaddr = (unsigned long)relocate_kernel;
|
||||
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
|
||||
@@ -92,7 +166,11 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
|
||||
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
|
||||
}
|
||||
pte = pte_offset_kernel(pmd, vaddr);
|
||||
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
|
||||
|
||||
if (sev_active())
|
||||
prot = PAGE_KERNEL_EXEC;
|
||||
|
||||
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
|
||||
return 0;
|
||||
err:
|
||||
return result;
|
||||
@@ -129,6 +207,11 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
||||
level4p = (pgd_t *)__va(start_pgtable);
|
||||
clear_page(level4p);
|
||||
|
||||
if (sev_active()) {
|
||||
info.page_flag |= _PAGE_ENC;
|
||||
info.kernpg_flag |= _PAGE_ENC;
|
||||
}
|
||||
|
||||
if (direct_gbpages)
|
||||
info.direct_gbpages = true;
|
||||
|
||||
@@ -159,6 +242,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare EFI systab and ACPI tables for kexec kernel since they are
|
||||
* not covered by pfn_mapped.
|
||||
*/
|
||||
result = map_efi_systab(&info, level4p);
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
result = map_acpi_tables(&info, level4p);
|
||||
if (result)
|
||||
return result;
|
||||
|
||||
return init_transition_pgtable(image, level4p);
|
||||
}
|
||||
|
||||
@@ -559,8 +654,20 @@ void arch_kexec_unprotect_crashkres(void)
|
||||
kexec_mark_crashkres(false);
|
||||
}
|
||||
|
||||
/*
|
||||
* During a traditional boot under SME, SME will encrypt the kernel,
|
||||
* so the SME kexec kernel also needs to be un-encrypted in order to
|
||||
* replicate a normal SME boot.
|
||||
*
|
||||
* During a traditional boot under SEV, the kernel has already been
|
||||
* loaded encrypted, so the SEV kexec kernel needs to be encrypted in
|
||||
* order to replicate a normal SEV boot.
|
||||
*/
|
||||
int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
|
||||
{
|
||||
if (sev_active())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If SME is active we need to be sure that kexec pages are
|
||||
* not encrypted because when we boot to the new kernel the
|
||||
@@ -571,6 +678,9 @@ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
|
||||
|
||||
void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
|
||||
{
|
||||
if (sev_active())
|
||||
return;
|
||||
|
||||
/*
|
||||
* If SME is active we need to reset the pages back to being
|
||||
* an encrypted mapping before freeing them.
|
||||
|
@@ -546,17 +546,15 @@ void __init default_get_smp_config(unsigned int early)
|
||||
* local APIC has default address
|
||||
*/
|
||||
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_info("Default MP configuration #%d\n", mpf->feature1);
|
||||
construct_default_ISA_mptable(mpf->feature1);
|
||||
|
||||
} else if (mpf->physptr) {
|
||||
if (check_physptr(mpf, early)) {
|
||||
early_memunmap(mpf, sizeof(*mpf));
|
||||
return;
|
||||
}
|
||||
if (check_physptr(mpf, early))
|
||||
goto out;
|
||||
} else
|
||||
BUG();
|
||||
|
||||
@@ -565,7 +563,7 @@ void __init default_get_smp_config(unsigned int early)
|
||||
/*
|
||||
* Only use the first configuration found.
|
||||
*/
|
||||
|
||||
out:
|
||||
early_memunmap(mpf, sizeof(*mpf));
|
||||
}
|
||||
|
||||
|
@@ -58,24 +58,24 @@ struct branch {
|
||||
u32 delta;
|
||||
} __attribute__((packed));
|
||||
|
||||
static unsigned paravirt_patch_call(void *insnbuf, const void *target,
|
||||
static unsigned paravirt_patch_call(void *insn_buff, const void *target,
|
||||
unsigned long addr, unsigned len)
|
||||
{
|
||||
struct branch *b = insnbuf;
|
||||
unsigned long delta = (unsigned long)target - (addr+5);
|
||||
const int call_len = 5;
|
||||
struct branch *b = insn_buff;
|
||||
unsigned long delta = (unsigned long)target - (addr+call_len);
|
||||
|
||||
if (len < 5) {
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
|
||||
#endif
|
||||
return len; /* call too long for patch site */
|
||||
if (len < call_len) {
|
||||
pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
|
||||
/* Kernel might not be viable if patching fails, bail out: */
|
||||
BUG_ON(1);
|
||||
}
|
||||
|
||||
b->opcode = 0xe8; /* call */
|
||||
b->delta = delta;
|
||||
BUILD_BUG_ON(sizeof(*b) != 5);
|
||||
BUILD_BUG_ON(sizeof(*b) != call_len);
|
||||
|
||||
return 5;
|
||||
return call_len;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
@@ -85,10 +85,10 @@ u64 notrace _paravirt_ident_64(u64 x)
|
||||
return x;
|
||||
}
|
||||
|
||||
static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
|
||||
static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
|
||||
unsigned long addr, unsigned len)
|
||||
{
|
||||
struct branch *b = insnbuf;
|
||||
struct branch *b = insn_buff;
|
||||
unsigned long delta = (unsigned long)target - (addr+5);
|
||||
|
||||
if (len < 5) {
|
||||
@@ -113,7 +113,7 @@ void __init native_pv_lock_init(void)
|
||||
static_branch_disable(&virt_spin_lock_key);
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_default(u8 type, void *insnbuf,
|
||||
unsigned paravirt_patch_default(u8 type, void *insn_buff,
|
||||
unsigned long addr, unsigned len)
|
||||
{
|
||||
/*
|
||||
@@ -125,36 +125,36 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf,
|
||||
|
||||
if (opfunc == NULL)
|
||||
/* If there's no function, patch it with a ud2a (BUG) */
|
||||
ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
|
||||
ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
|
||||
else if (opfunc == _paravirt_nop)
|
||||
ret = 0;
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
/* identity functions just return their single argument */
|
||||
else if (opfunc == _paravirt_ident_64)
|
||||
ret = paravirt_patch_ident_64(insnbuf, len);
|
||||
ret = paravirt_patch_ident_64(insn_buff, len);
|
||||
|
||||
else if (type == PARAVIRT_PATCH(cpu.iret) ||
|
||||
type == PARAVIRT_PATCH(cpu.usergs_sysret64))
|
||||
/* If operation requires a jmp, then jmp */
|
||||
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
|
||||
ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
|
||||
#endif
|
||||
else
|
||||
/* Otherwise call the function. */
|
||||
ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
|
||||
ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
|
||||
unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
|
||||
const char *start, const char *end)
|
||||
{
|
||||
unsigned insn_len = end - start;
|
||||
|
||||
if (insn_len > len || start == NULL)
|
||||
insn_len = len;
|
||||
else
|
||||
memcpy(insnbuf, start, insn_len);
|
||||
/* Alternative instruction is too large for the patch site and we cannot continue: */
|
||||
BUG_ON(insn_len > len || start == NULL);
|
||||
|
||||
memcpy(insn_buff, start, insn_len);
|
||||
|
||||
return insn_len;
|
||||
}
|
||||
@@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops = {
|
||||
.mmu.exit_mmap = paravirt_nop,
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
.mmu.read_cr2 = native_read_cr2,
|
||||
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
|
||||
.mmu.write_cr2 = native_write_cr2,
|
||||
.mmu.read_cr3 = __native_read_cr3,
|
||||
.mmu.write_cr3 = native_write_cr3,
|
||||
|
126
arch/x86/kernel/paravirt_patch.c
Normal file
126
arch/x86/kernel/paravirt_patch.c
Normal file
@@ -0,0 +1,126 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/stringify.h>
|
||||
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#define PSTART(d, m) \
|
||||
patch_data_##d.m
|
||||
|
||||
#define PEND(d, m) \
|
||||
(PSTART(d, m) + sizeof(patch_data_##d.m))
|
||||
|
||||
#define PATCH(d, m, insn_buff, len) \
|
||||
paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m))
|
||||
|
||||
#define PATCH_CASE(ops, m, data, insn_buff, len) \
|
||||
case PARAVIRT_PATCH(ops.m): \
|
||||
return PATCH(data, ops##_##m, insn_buff, len)
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
struct patch_xxl {
|
||||
const unsigned char irq_irq_disable[1];
|
||||
const unsigned char irq_irq_enable[1];
|
||||
const unsigned char irq_save_fl[2];
|
||||
const unsigned char mmu_read_cr2[3];
|
||||
const unsigned char mmu_read_cr3[3];
|
||||
const unsigned char mmu_write_cr3[3];
|
||||
const unsigned char irq_restore_fl[2];
|
||||
# ifdef CONFIG_X86_64
|
||||
const unsigned char cpu_wbinvd[2];
|
||||
const unsigned char cpu_usergs_sysret64[6];
|
||||
const unsigned char cpu_swapgs[3];
|
||||
const unsigned char mov64[3];
|
||||
# else
|
||||
const unsigned char cpu_iret[1];
|
||||
# endif
|
||||
};
|
||||
|
||||
static const struct patch_xxl patch_data_xxl = {
|
||||
.irq_irq_disable = { 0xfa }, // cli
|
||||
.irq_irq_enable = { 0xfb }, // sti
|
||||
.irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax
|
||||
.mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
|
||||
.mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
|
||||
# ifdef CONFIG_X86_64
|
||||
.mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
|
||||
.irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
|
||||
.cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
|
||||
.cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8,
|
||||
0x48, 0x0f, 0x07 }, // swapgs; sysretq
|
||||
.cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs
|
||||
.mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
|
||||
# else
|
||||
.mmu_write_cr3 = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3
|
||||
.irq_restore_fl = { 0x50, 0x9d }, // push %eax; popf
|
||||
.cpu_iret = { 0xcf }, // iret
|
||||
# endif
|
||||
};
|
||||
|
||||
unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return PATCH(xxl, mov64, insn_buff, len);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
# endif /* CONFIG_PARAVIRT_XXL */
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
struct patch_lock {
|
||||
unsigned char queued_spin_unlock[3];
|
||||
unsigned char vcpu_is_preempted[2];
|
||||
};
|
||||
|
||||
static const struct patch_lock patch_data_lock = {
|
||||
.vcpu_is_preempted = { 0x31, 0xc0 }, // xor %eax, %eax
|
||||
|
||||
# ifdef CONFIG_X86_64
|
||||
.queued_spin_unlock = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi)
|
||||
# else
|
||||
.queued_spin_unlock = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax)
|
||||
# endif
|
||||
};
|
||||
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
|
||||
|
||||
unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
|
||||
unsigned int len)
|
||||
{
|
||||
switch (type) {
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
|
||||
PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
|
||||
PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
|
||||
PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
|
||||
|
||||
PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len);
|
||||
PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
|
||||
PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
|
||||
|
||||
# ifdef CONFIG_X86_64
|
||||
PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len);
|
||||
PATCH_CASE(cpu, swapgs, xxl, insn_buff, len);
|
||||
PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
|
||||
# else
|
||||
PATCH_CASE(cpu, iret, xxl, insn_buff, len);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
case PARAVIRT_PATCH(lock.queued_spin_unlock):
|
||||
if (pv_is_native_spin_unlock())
|
||||
return PATCH(lock, queued_spin_unlock, insn_buff, len);
|
||||
break;
|
||||
|
||||
case PARAVIRT_PATCH(lock.vcpu_is_preempted):
|
||||
if (pv_is_native_vcpu_is_preempted())
|
||||
return PATCH(lock, vcpu_is_preempted, insn_buff, len);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return paravirt_patch_default(type, insn_buff, addr, len);
|
||||
}
|
@@ -1,67 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
DEF_NATIVE(irq, irq_disable, "cli");
|
||||
DEF_NATIVE(irq, irq_enable, "sti");
|
||||
DEF_NATIVE(irq, restore_fl, "push %eax; popf");
|
||||
DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
|
||||
DEF_NATIVE(cpu, iret, "iret");
|
||||
DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
|
||||
DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
|
||||
DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
|
||||
|
||||
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
|
||||
{
|
||||
/* arg in %edx:%eax, return in %edx:%eax */
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
|
||||
DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
|
||||
#endif
|
||||
|
||||
extern bool pv_is_native_spin_unlock(void);
|
||||
extern bool pv_is_native_vcpu_is_preempted(void);
|
||||
|
||||
unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
|
||||
{
|
||||
#define PATCH_SITE(ops, x) \
|
||||
case PARAVIRT_PATCH(ops.x): \
|
||||
return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
|
||||
|
||||
switch (type) {
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
PATCH_SITE(irq, irq_disable);
|
||||
PATCH_SITE(irq, irq_enable);
|
||||
PATCH_SITE(irq, restore_fl);
|
||||
PATCH_SITE(irq, save_fl);
|
||||
PATCH_SITE(cpu, iret);
|
||||
PATCH_SITE(mmu, read_cr2);
|
||||
PATCH_SITE(mmu, read_cr3);
|
||||
PATCH_SITE(mmu, write_cr3);
|
||||
#endif
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(lock.queued_spin_unlock):
|
||||
if (pv_is_native_spin_unlock())
|
||||
return paravirt_patch_insns(ibuf, len,
|
||||
start_lock_queued_spin_unlock,
|
||||
end_lock_queued_spin_unlock);
|
||||
break;
|
||||
|
||||
case PARAVIRT_PATCH(lock.vcpu_is_preempted):
|
||||
if (pv_is_native_vcpu_is_preempted())
|
||||
return paravirt_patch_insns(ibuf, len,
|
||||
start_lock_vcpu_is_preempted,
|
||||
end_lock_vcpu_is_preempted);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#undef PATCH_SITE
|
||||
return paravirt_patch_default(type, ibuf, addr, len);
|
||||
}
|
@@ -1,75 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <linux/stringify.h>
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
DEF_NATIVE(irq, irq_disable, "cli");
|
||||
DEF_NATIVE(irq, irq_enable, "sti");
|
||||
DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
|
||||
DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
|
||||
DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
|
||||
DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
|
||||
DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
|
||||
DEF_NATIVE(cpu, wbinvd, "wbinvd");
|
||||
|
||||
DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
|
||||
DEF_NATIVE(cpu, swapgs, "swapgs");
|
||||
DEF_NATIVE(, mov64, "mov %rdi, %rax");
|
||||
|
||||
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
|
||||
{
|
||||
return paravirt_patch_insns(insnbuf, len,
|
||||
start__mov64, end__mov64);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
|
||||
DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
|
||||
#endif
|
||||
|
||||
extern bool pv_is_native_spin_unlock(void);
|
||||
extern bool pv_is_native_vcpu_is_preempted(void);
|
||||
|
||||
unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
|
||||
{
|
||||
#define PATCH_SITE(ops, x) \
|
||||
case PARAVIRT_PATCH(ops.x): \
|
||||
return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
|
||||
|
||||
switch (type) {
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
PATCH_SITE(irq, restore_fl);
|
||||
PATCH_SITE(irq, save_fl);
|
||||
PATCH_SITE(irq, irq_enable);
|
||||
PATCH_SITE(irq, irq_disable);
|
||||
PATCH_SITE(cpu, usergs_sysret64);
|
||||
PATCH_SITE(cpu, swapgs);
|
||||
PATCH_SITE(cpu, wbinvd);
|
||||
PATCH_SITE(mmu, read_cr2);
|
||||
PATCH_SITE(mmu, read_cr3);
|
||||
PATCH_SITE(mmu, write_cr3);
|
||||
#endif
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(lock.queued_spin_unlock):
|
||||
if (pv_is_native_spin_unlock())
|
||||
return paravirt_patch_insns(ibuf, len,
|
||||
start_lock_queued_spin_unlock,
|
||||
end_lock_queued_spin_unlock);
|
||||
break;
|
||||
|
||||
case PARAVIRT_PATCH(lock.vcpu_is_preempted):
|
||||
if (pv_is_native_vcpu_is_preempted())
|
||||
return paravirt_patch_insns(ibuf, len,
|
||||
start_lock_vcpu_is_preempted,
|
||||
end_lock_vcpu_is_preempted);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#undef PATCH_SITE
|
||||
return paravirt_patch_default(type, ibuf, addr, len);
|
||||
}
|
@@ -70,7 +70,7 @@ void __init pci_iommu_alloc(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
|
||||
* See <Documentation/x86/x86_64/boot-options.rst> for the iommu kernel
|
||||
* parameter documentation.
|
||||
*/
|
||||
static __init int iommu_setup(char *p)
|
||||
|
@@ -74,6 +74,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
|
||||
return regs_get_register(regs, pt_regs_offset[idx]);
|
||||
}
|
||||
|
||||
#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \
|
||||
~((1ULL << PERF_REG_X86_MAX) - 1))
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
|
||||
(1ULL << PERF_REG_X86_R9) | \
|
||||
@@ -86,7 +89,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
|
||||
|
||||
int perf_reg_validate(u64 mask)
|
||||
{
|
||||
if (!mask || (mask & REG_NOSUPPORT))
|
||||
if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
@@ -112,7 +115,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
|
||||
|
||||
int perf_reg_validate(u64 mask)
|
||||
{
|
||||
if (!mask || (mask & REG_NOSUPPORT))
|
||||
if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
@@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
|
||||
{
|
||||
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
|
||||
unsigned long d0, d1, d2, d3, d6, d7;
|
||||
unsigned long sp;
|
||||
unsigned short ss, gs;
|
||||
unsigned short gs;
|
||||
|
||||
if (user_mode(regs)) {
|
||||
sp = regs->sp;
|
||||
ss = regs->ss;
|
||||
if (user_mode(regs))
|
||||
gs = get_user_gs(regs);
|
||||
} else {
|
||||
sp = kernel_stack_pointer(regs);
|
||||
savesegment(ss, ss);
|
||||
else
|
||||
savesegment(gs, gs);
|
||||
}
|
||||
|
||||
show_ip(regs, KERN_DEFAULT);
|
||||
|
||||
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
|
||||
regs->ax, regs->bx, regs->cx, regs->dx);
|
||||
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
|
||||
regs->si, regs->di, regs->bp, sp);
|
||||
regs->si, regs->di, regs->bp, regs->sp);
|
||||
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
|
||||
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
|
||||
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
|
||||
|
||||
if (mode != SHOW_REGS_ALL)
|
||||
return;
|
||||
|
@@ -143,17 +143,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
|
||||
|
||||
void release_thread(struct task_struct *dead_task)
|
||||
{
|
||||
if (dead_task->mm) {
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
if (dead_task->mm->context.ldt) {
|
||||
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
|
||||
dead_task->comm,
|
||||
dead_task->mm->context.ldt->entries,
|
||||
dead_task->mm->context.ldt->nr_entries);
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
WARN_ON(dead_task->mm);
|
||||
}
|
||||
|
||||
enum which_selector {
|
||||
|
@@ -25,6 +25,7 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -154,35 +155,6 @@ static inline bool invalid_selector(u16 value)
|
||||
|
||||
#define FLAG_MASK FLAG_MASK_32
|
||||
|
||||
/*
|
||||
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
|
||||
* when it traps. The previous stack will be directly underneath the saved
|
||||
* registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'.
|
||||
*
|
||||
* Now, if the stack is empty, '®s->sp' is out of range. In this
|
||||
* case we try to take the previous stack. To always return a non-null
|
||||
* stack pointer we fall back to regs as stack if no previous stack
|
||||
* exists.
|
||||
*
|
||||
* This is valid only for kernel mode traps.
|
||||
*/
|
||||
unsigned long kernel_stack_pointer(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
|
||||
unsigned long sp = (unsigned long)®s->sp;
|
||||
u32 *prev_esp;
|
||||
|
||||
if (context == (sp & ~(THREAD_SIZE - 1)))
|
||||
return sp;
|
||||
|
||||
prev_esp = (u32 *)(context);
|
||||
if (*prev_esp)
|
||||
return (unsigned long)*prev_esp;
|
||||
|
||||
return (unsigned long)regs;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_stack_pointer);
|
||||
|
||||
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
|
||||
{
|
||||
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
|
||||
@@ -645,7 +617,8 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
|
||||
unsigned long val = 0;
|
||||
|
||||
if (n < HBP_NUM) {
|
||||
struct perf_event *bp = thread->ptrace_bps[n];
|
||||
int index = array_index_nospec(n, HBP_NUM);
|
||||
struct perf_event *bp = thread->ptrace_bps[index];
|
||||
|
||||
if (bp)
|
||||
val = bp->hw.info.address;
|
||||
@@ -747,9 +720,6 @@ static int ioperm_get(struct task_struct *target,
|
||||
void ptrace_disable(struct task_struct *child)
|
||||
{
|
||||
user_disable_single_step(child);
|
||||
#ifdef TIF_SYSCALL_EMU
|
||||
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
@@ -1361,18 +1331,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
|
||||
#endif
|
||||
}
|
||||
|
||||
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
|
||||
int error_code, int si_code)
|
||||
void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
tsk->thread.trap_nr = X86_TRAP_DB;
|
||||
tsk->thread.error_code = error_code;
|
||||
|
||||
/* Send us the fake SIGTRAP */
|
||||
force_sig_fault(SIGTRAP, si_code,
|
||||
user_mode(regs) ? (void __user *)regs->ip : NULL, tsk);
|
||||
user_mode(regs) ? (void __user *)regs->ip : NULL);
|
||||
}
|
||||
|
||||
void user_single_step_report(struct pt_regs *regs)
|
||||
{
|
||||
send_sigtrap(current, regs, 0, TRAP_BRKPT);
|
||||
send_sigtrap(regs, 0, TRAP_BRKPT);
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@
|
||||
|
||||
*/
|
||||
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/notifier.h>
|
||||
|
@@ -1,9 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* relocate_kernel.S - put the kernel image in place to boot
|
||||
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
|
||||
*
|
||||
* This source code is licensed under the GNU General Public License,
|
||||
* Version 2. See the file COPYING for more details.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
@@ -1,9 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* relocate_kernel.S - put the kernel image in place to boot
|
||||
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
|
||||
*
|
||||
* This source code is licensed under the GNU General Public License,
|
||||
* Version 2. See the file COPYING for more details.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
@@ -453,15 +453,24 @@ static void __init memblock_x86_reserve_range_setup_data(void)
|
||||
#define CRASH_ALIGN SZ_16M
|
||||
|
||||
/*
|
||||
* Keep the crash kernel below this limit. On 32 bits earlier kernels
|
||||
* would limit the kernel to the low 512 MiB due to mapping restrictions.
|
||||
* Keep the crash kernel below this limit.
|
||||
*
|
||||
* On 32 bits earlier kernels would limit the kernel to the low 512 MiB
|
||||
* due to mapping restrictions.
|
||||
*
|
||||
* On 64bit, kdump kernel need be restricted to be under 64TB, which is
|
||||
* the upper limit of system RAM in 4-level paing mode. Since the kdump
|
||||
* jumping could be from 5-level to 4-level, the jumping will fail if
|
||||
* kernel is put above 64TB, and there's no way to detect the paging mode
|
||||
* of the kernel which will be loaded for dumping during the 1st kernel
|
||||
* bootup.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
# define CRASH_ADDR_LOW_MAX SZ_512M
|
||||
# define CRASH_ADDR_HIGH_MAX SZ_512M
|
||||
#else
|
||||
# define CRASH_ADDR_LOW_MAX SZ_4G
|
||||
# define CRASH_ADDR_HIGH_MAX MAXMEM
|
||||
# define CRASH_ADDR_HIGH_MAX SZ_64T
|
||||
#endif
|
||||
|
||||
static int __init reserve_crashkernel_low(void)
|
||||
@@ -827,8 +836,14 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
|
||||
|
||||
void __init setup_arch(char **cmdline_p)
|
||||
{
|
||||
/*
|
||||
* Reserve the memory occupied by the kernel between _text and
|
||||
* __end_of_kernel_reserve symbols. Any kernel sections after the
|
||||
* __end_of_kernel_reserve symbol must be explicitly reserved with a
|
||||
* separate memblock_reserve() or they will be discarded.
|
||||
*/
|
||||
memblock_reserve(__pa_symbol(_text),
|
||||
(unsigned long)__bss_stop - (unsigned long)_text);
|
||||
(unsigned long)__end_of_kernel_reserve - (unsigned long)_text);
|
||||
|
||||
/*
|
||||
* Make sure page 0 is always reserved because on systems with
|
||||
|
@@ -391,7 +391,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
|
||||
put_user_ex(&frame->uc, &frame->puc);
|
||||
|
||||
/* Create the ucontext. */
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVE))
|
||||
if (static_cpu_has(X86_FEATURE_XSAVE))
|
||||
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
|
||||
else
|
||||
put_user_ex(0, &frame->uc.uc_flags);
|
||||
@@ -857,7 +857,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
||||
pr_cont("\n");
|
||||
}
|
||||
|
||||
force_sig(SIGSEGV, me);
|
||||
force_sig(SIGSEGV);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
|
@@ -144,7 +144,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
|
||||
}
|
||||
|
||||
cpumask_copy(allbutself, cpu_online_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), allbutself);
|
||||
__cpumask_clear_cpu(smp_processor_id(), allbutself);
|
||||
|
||||
if (cpumask_equal(mask, allbutself) &&
|
||||
cpumask_equal(cpu_online_mask, cpu_callout_mask))
|
||||
|
@@ -89,6 +89,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
|
||||
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
|
||||
|
||||
/* representing HT, core, and die siblings of each logical CPU */
|
||||
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_die_map);
|
||||
|
||||
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
|
||||
|
||||
/* Per CPU bogomips and other parameters */
|
||||
@@ -99,6 +103,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
|
||||
unsigned int __max_logical_packages __read_mostly;
|
||||
EXPORT_SYMBOL(__max_logical_packages);
|
||||
static unsigned int logical_packages __read_mostly;
|
||||
static unsigned int logical_die __read_mostly;
|
||||
|
||||
/* Maximum number of SMT threads on any online core */
|
||||
int __read_mostly __max_smt_threads = 1;
|
||||
@@ -210,17 +215,11 @@ static void notrace start_secondary(void *unused)
|
||||
* before cpu_init(), SMP booting is too fragile that we want to
|
||||
* limit the things done here to the most necessary things.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_PCID))
|
||||
__write_cr4(__read_cr4() | X86_CR4_PCIDE);
|
||||
cr4_init();
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* switch away from the initial page table */
|
||||
load_cr3(swapper_pg_dir);
|
||||
/*
|
||||
* Initialize the CR4 shadow before doing anything that could
|
||||
* try to read it.
|
||||
*/
|
||||
cr4_init_shadow();
|
||||
__flush_tlb_all();
|
||||
#endif
|
||||
load_current_idt();
|
||||
@@ -300,6 +299,26 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
|
||||
return -1;
|
||||
}
|
||||
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
|
||||
/**
|
||||
* topology_phys_to_logical_die - Map a physical die id to logical
|
||||
*
|
||||
* Returns logical die id or -1 if not found
|
||||
*/
|
||||
int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
|
||||
{
|
||||
int cpu;
|
||||
int proc_id = cpu_data(cur_cpu).phys_proc_id;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
if (c->initialized && c->cpu_die_id == die_id &&
|
||||
c->phys_proc_id == proc_id)
|
||||
return c->logical_die_id;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
EXPORT_SYMBOL(topology_phys_to_logical_die);
|
||||
|
||||
/**
|
||||
* topology_update_package_map - Update the physical to logical package map
|
||||
@@ -324,6 +343,29 @@ found:
|
||||
cpu_data(cpu).logical_proc_id = new;
|
||||
return 0;
|
||||
}
|
||||
/**
|
||||
* topology_update_die_map - Update the physical to logical die map
|
||||
* @die: The die id as retrieved via CPUID
|
||||
* @cpu: The cpu for which this is updated
|
||||
*/
|
||||
int topology_update_die_map(unsigned int die, unsigned int cpu)
|
||||
{
|
||||
int new;
|
||||
|
||||
/* Already available somewhere? */
|
||||
new = topology_phys_to_logical_die(die, cpu);
|
||||
if (new >= 0)
|
||||
goto found;
|
||||
|
||||
new = logical_die++;
|
||||
if (new != die) {
|
||||
pr_info("CPU %u Converting physical %u to logical die %u\n",
|
||||
cpu, die, new);
|
||||
}
|
||||
found:
|
||||
cpu_data(cpu).logical_die_id = new;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init smp_store_boot_cpu_info(void)
|
||||
{
|
||||
@@ -333,6 +375,7 @@ void __init smp_store_boot_cpu_info(void)
|
||||
*c = boot_cpu_data;
|
||||
c->cpu_index = id;
|
||||
topology_update_package_map(c->phys_proc_id, id);
|
||||
topology_update_die_map(c->cpu_die_id, id);
|
||||
c->initialized = true;
|
||||
}
|
||||
|
||||
@@ -387,6 +430,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
|
||||
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
|
||||
|
||||
if (c->phys_proc_id == o->phys_proc_id &&
|
||||
c->cpu_die_id == o->cpu_die_id &&
|
||||
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
|
||||
if (c->cpu_core_id == o->cpu_core_id)
|
||||
return topology_sane(c, o, "smt");
|
||||
@@ -398,6 +442,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
|
||||
}
|
||||
|
||||
} else if (c->phys_proc_id == o->phys_proc_id &&
|
||||
c->cpu_die_id == o->cpu_die_id &&
|
||||
c->cpu_core_id == o->cpu_core_id) {
|
||||
return topology_sane(c, o, "smt");
|
||||
}
|
||||
@@ -460,6 +505,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
|
||||
{
|
||||
if ((c->phys_proc_id == o->phys_proc_id) &&
|
||||
(c->cpu_die_id == o->cpu_die_id))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
|
||||
static inline int x86_sched_itmt_flags(void)
|
||||
{
|
||||
@@ -522,6 +576,7 @@ void set_cpu_sibling_map(int cpu)
|
||||
cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
|
||||
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
|
||||
cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
|
||||
cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
|
||||
c->booted_cores = 1;
|
||||
return;
|
||||
}
|
||||
@@ -570,6 +625,9 @@ void set_cpu_sibling_map(int cpu)
|
||||
}
|
||||
if (match_pkg(c, o) && !topology_same_node(c, o))
|
||||
x86_has_numa_in_package = true;
|
||||
|
||||
if ((i == cpu) || (has_mp && match_die(c, o)))
|
||||
link_mask(topology_die_cpumask, cpu, i);
|
||||
}
|
||||
|
||||
threads = cpumask_weight(topology_sibling_cpumask(cpu));
|
||||
@@ -1174,6 +1232,7 @@ static __init void disable_smp(void)
|
||||
physid_set_mask_of_physid(0, &phys_cpu_present_map);
|
||||
cpumask_set_cpu(0, topology_sibling_cpumask(0));
|
||||
cpumask_set_cpu(0, topology_core_cpumask(0));
|
||||
cpumask_set_cpu(0, topology_die_cpumask(0));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1269,6 +1328,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
for_each_possible_cpu(i) {
|
||||
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
|
||||
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
|
||||
zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
|
||||
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
|
||||
}
|
||||
|
||||
@@ -1308,8 +1368,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
pr_info("CPU0: ");
|
||||
print_cpu_info(&cpu_data(0));
|
||||
|
||||
native_pv_lock_init();
|
||||
|
||||
uv_system_init();
|
||||
|
||||
set_mtrr_aps_delayed_init();
|
||||
@@ -1339,6 +1397,7 @@ void __init native_smp_prepare_boot_cpu(void)
|
||||
/* already set me in cpu_online_mask in boot_cpu_init() */
|
||||
cpumask_set_cpu(me, cpu_callout_mask);
|
||||
cpu_set_state_online(me);
|
||||
native_pv_lock_init();
|
||||
}
|
||||
|
||||
void __init calculate_max_logical_packages(void)
|
||||
@@ -1489,6 +1548,8 @@ static void remove_siblinginfo(int cpu)
|
||||
cpu_data(sibling).booted_cores--;
|
||||
}
|
||||
|
||||
for_each_cpu(sibling, topology_die_cpumask(cpu))
|
||||
cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
|
||||
for_each_cpu(sibling, topology_sibling_cpumask(cpu))
|
||||
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
|
||||
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
|
||||
@@ -1496,6 +1557,7 @@ static void remove_siblinginfo(int cpu)
|
||||
cpumask_clear(cpu_llc_shared_mask(cpu));
|
||||
cpumask_clear(topology_sibling_cpumask(cpu));
|
||||
cpumask_clear(topology_core_cpumask(cpu));
|
||||
cpumask_clear(topology_die_cpumask(cpu));
|
||||
c->cpu_core_id = 0;
|
||||
c->booted_cores = 0;
|
||||
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
|
||||
|
@@ -129,11 +129,9 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
|
||||
break;
|
||||
if ((unsigned long)fp < regs->sp)
|
||||
break;
|
||||
if (frame.ret_addr) {
|
||||
if (!consume_entry(cookie, frame.ret_addr, false))
|
||||
return;
|
||||
}
|
||||
if (fp == frame.next_fp)
|
||||
if (!frame.ret_addr)
|
||||
break;
|
||||
if (!consume_entry(cookie, frame.ret_addr, false))
|
||||
break;
|
||||
fp = frame.next_fp;
|
||||
}
|
||||
|
@@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs)
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
return *(unsigned long *)(regs->bp + sizeof(long));
|
||||
#else
|
||||
unsigned long *sp =
|
||||
(unsigned long *)kernel_stack_pointer(regs);
|
||||
unsigned long *sp = (unsigned long *)regs->sp;
|
||||
/*
|
||||
* Return address is either directly at stack pointer
|
||||
* or above a saved flags. Eflags has bits 22-31 zero,
|
||||
@@ -82,8 +81,11 @@ static void __init setup_default_timer_irq(void)
|
||||
/* Default timer init function */
|
||||
void __init hpet_time_init(void)
|
||||
{
|
||||
if (!hpet_enable())
|
||||
setup_pit_timer();
|
||||
if (!hpet_enable()) {
|
||||
if (!pit_timer_init())
|
||||
return;
|
||||
}
|
||||
|
||||
setup_default_timer_irq();
|
||||
}
|
||||
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#include <linux/user.h>
|
||||
#include <linux/regset.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/desc.h>
|
||||
@@ -220,6 +221,7 @@ int do_get_thread_area(struct task_struct *p, int idx,
|
||||
struct user_desc __user *u_info)
|
||||
{
|
||||
struct user_desc info;
|
||||
int index;
|
||||
|
||||
if (idx == -1 && get_user(idx, &u_info->entry_number))
|
||||
return -EFAULT;
|
||||
@@ -227,8 +229,11 @@ int do_get_thread_area(struct task_struct *p, int idx,
|
||||
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
fill_user_desc(&info, idx,
|
||||
&p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]);
|
||||
index = idx - GDT_ENTRY_TLS_MIN;
|
||||
index = array_index_nospec(index,
|
||||
GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1);
|
||||
|
||||
fill_user_desc(&info, idx, &p->thread.tls_array[index]);
|
||||
|
||||
if (copy_to_user(u_info, &info, sizeof(info)))
|
||||
return -EFAULT;
|
||||
|
@@ -254,9 +254,9 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
|
||||
show_signal(tsk, signr, "trap ", str, regs, error_code);
|
||||
|
||||
if (!sicode)
|
||||
force_sig(signr, tsk);
|
||||
force_sig(signr);
|
||||
else
|
||||
force_sig_fault(signr, sicode, addr, tsk);
|
||||
force_sig_fault(signr, sicode, addr);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_trap);
|
||||
|
||||
@@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Runs on IST stack */
|
||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
|
||||
{
|
||||
static const char str[] = "double fault";
|
||||
struct task_struct *tsk = current;
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
unsigned long cr2;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
extern unsigned char native_irq_return_iret[];
|
||||
@@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
* stack even if the actual trigger for the double fault was
|
||||
* something else.
|
||||
*/
|
||||
cr2 = read_cr2();
|
||||
if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
|
||||
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
|
||||
#endif
|
||||
@@ -566,7 +562,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
|
||||
|
||||
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
|
||||
|
||||
force_sig(SIGSEGV, tsk);
|
||||
force_sig(SIGSEGV);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_general_protection);
|
||||
|
||||
@@ -805,7 +801,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
}
|
||||
si_code = get_si_code(tsk->thread.debugreg6);
|
||||
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
send_sigtrap(regs, error_code, si_code);
|
||||
cond_local_irq_disable(regs);
|
||||
debug_stack_usage_dec();
|
||||
|
||||
@@ -856,7 +852,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
|
||||
return;
|
||||
|
||||
force_sig_fault(SIGFPE, si_code,
|
||||
(void __user *)uprobe_get_trap_addr(regs), task);
|
||||
(void __user *)uprobe_get_trap_addr(regs));
|
||||
}
|
||||
|
||||
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
||||
|
@@ -59,7 +59,7 @@ struct cyc2ns {
|
||||
|
||||
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
|
||||
|
||||
void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
|
||||
__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
|
||||
{
|
||||
int seq, idx;
|
||||
|
||||
@@ -76,7 +76,7 @@ void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
|
||||
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
|
||||
}
|
||||
|
||||
void __always_inline cyc2ns_read_end(void)
|
||||
__always_inline void cyc2ns_read_end(void)
|
||||
{
|
||||
preempt_enable_notrace();
|
||||
}
|
||||
@@ -632,31 +632,38 @@ unsigned long native_calibrate_tsc(void)
|
||||
|
||||
crystal_khz = ecx_hz / 1000;
|
||||
|
||||
if (crystal_khz == 0) {
|
||||
switch (boot_cpu_data.x86_model) {
|
||||
case INTEL_FAM6_SKYLAKE_MOBILE:
|
||||
case INTEL_FAM6_SKYLAKE_DESKTOP:
|
||||
case INTEL_FAM6_KABYLAKE_MOBILE:
|
||||
case INTEL_FAM6_KABYLAKE_DESKTOP:
|
||||
crystal_khz = 24000; /* 24.0 MHz */
|
||||
break;
|
||||
case INTEL_FAM6_ATOM_GOLDMONT_X:
|
||||
crystal_khz = 25000; /* 25.0 MHz */
|
||||
break;
|
||||
case INTEL_FAM6_ATOM_GOLDMONT:
|
||||
crystal_khz = 19200; /* 19.2 MHz */
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Denverton SoCs don't report crystal clock, and also don't support
|
||||
* CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
|
||||
* clock.
|
||||
*/
|
||||
if (crystal_khz == 0 &&
|
||||
boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
|
||||
crystal_khz = 25000;
|
||||
|
||||
/*
|
||||
* TSC frequency reported directly by CPUID is a "hardware reported"
|
||||
* frequency and is the most accurate one so far we have. This
|
||||
* is considered a known frequency.
|
||||
*/
|
||||
if (crystal_khz != 0)
|
||||
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
|
||||
|
||||
/*
|
||||
* Some Intel SoCs like Skylake and Kabylake don't report the crystal
|
||||
* clock, but we can easily calculate it to a high degree of accuracy
|
||||
* by considering the crystal ratio and the CPU speed.
|
||||
*/
|
||||
if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
|
||||
unsigned int eax_base_mhz, ebx, ecx, edx;
|
||||
|
||||
cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
|
||||
crystal_khz = eax_base_mhz * 1000 *
|
||||
eax_denominator / ebx_numerator;
|
||||
}
|
||||
|
||||
if (crystal_khz == 0)
|
||||
return 0;
|
||||
/*
|
||||
* TSC frequency determined by CPUID is a "hardware reported"
|
||||
* frequency and is the most accurate one so far we have. This
|
||||
* is considered a known frequency.
|
||||
*/
|
||||
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
|
||||
|
||||
/*
|
||||
* For Atom SoCs TSC is the only reliable clocksource.
|
||||
@@ -665,6 +672,16 @@ unsigned long native_calibrate_tsc(void)
|
||||
if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
|
||||
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
/*
|
||||
* The local APIC appears to be fed by the core crystal clock
|
||||
* (which sounds entirely sensible). We can set the global
|
||||
* lapic_timer_period here to avoid having to calibrate the APIC
|
||||
* timer later.
|
||||
*/
|
||||
lapic_timer_period = crystal_khz * 1000 / HZ;
|
||||
#endif
|
||||
|
||||
return crystal_khz * ebx_numerator / eax_denominator;
|
||||
}
|
||||
|
||||
|
@@ -71,7 +71,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
|
||||
/*
|
||||
* MSR-based CPU/TSC frequency discovery for certain CPUs.
|
||||
*
|
||||
* Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy
|
||||
* Set global "lapic_timer_period" to bus_clock_cycles/jiffy
|
||||
* Return processor base frequency in KHz, or 0 on failure.
|
||||
*/
|
||||
unsigned long cpu_khz_from_msr(void)
|
||||
@@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void)
|
||||
res = freq * ratio;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
lapic_timer_frequency = (freq * 1000) / HZ;
|
||||
lapic_timer_period = (freq * 1000) / HZ;
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@@ -277,7 +277,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
|
||||
tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
|
||||
tsk->thread.trap_nr = X86_TRAP_PF;
|
||||
|
||||
force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, tsk);
|
||||
force_sig_fault(SIGSEGV, SEGV_MAPERR, addr);
|
||||
|
||||
if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
|
||||
return;
|
||||
|
@@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state)
|
||||
}
|
||||
}
|
||||
|
||||
static size_t regs_size(struct pt_regs *regs)
|
||||
{
|
||||
/* x86_32 regs from kernel mode are two words shorter: */
|
||||
if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
|
||||
return sizeof(*regs) - 2*sizeof(long);
|
||||
|
||||
return sizeof(*regs);
|
||||
}
|
||||
|
||||
static bool in_entry_code(unsigned long ip)
|
||||
{
|
||||
char *addr = (char *)ip;
|
||||
@@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
|
||||
#else
|
||||
#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
|
||||
#endif
|
||||
|
||||
static bool update_stack_state(struct unwind_state *state,
|
||||
unsigned long *next_bp)
|
||||
{
|
||||
@@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state,
|
||||
size_t len;
|
||||
|
||||
if (state->regs)
|
||||
prev_frame_end = (void *)state->regs + regs_size(state->regs);
|
||||
prev_frame_end = (void *)state->regs + sizeof(*state->regs);
|
||||
else
|
||||
prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE;
|
||||
|
||||
@@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state,
|
||||
regs = decode_frame_pointer(next_bp);
|
||||
if (regs) {
|
||||
frame = (unsigned long *)regs;
|
||||
len = KERNEL_REGS_SIZE;
|
||||
len = sizeof(*regs);
|
||||
state->got_irq = true;
|
||||
} else {
|
||||
frame = next_bp;
|
||||
@@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state,
|
||||
frame < prev_frame_end)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* On 32-bit with user mode regs, make sure the last two regs are safe
|
||||
* to access:
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
|
||||
!on_stack(info, frame, len + 2*sizeof(long)))
|
||||
return false;
|
||||
|
||||
/* Move state to the next frame: */
|
||||
if (regs) {
|
||||
state->regs = regs;
|
||||
@@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
* Pretend that the frame is complete and that BP points to it, but save
|
||||
* the real BP so that we can use it when looking for the next frame.
|
||||
*/
|
||||
if (regs && regs->ip == 0 &&
|
||||
(unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
|
||||
if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) {
|
||||
state->next_bp = bp;
|
||||
bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1;
|
||||
bp = ((unsigned long *)regs->sp) - 1;
|
||||
}
|
||||
|
||||
/* Initialize stack info and make sure the frame data is accessible: */
|
||||
|
@@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip);
|
||||
* But they are copies of the ftrace entries that are static and
|
||||
* defined in ftrace_*.S, which do have orc entries.
|
||||
*
|
||||
* If the undwinder comes across a ftrace trampoline, then find the
|
||||
* If the unwinder comes across a ftrace trampoline, then find the
|
||||
* ftrace function that was used to create it, and use that ftrace
|
||||
* function's orc entrie, as the placement of the return code in
|
||||
* function's orc entry, as the placement of the return code in
|
||||
* the stack will be identical.
|
||||
*/
|
||||
static struct orc_entry *orc_ftrace_find(unsigned long ip)
|
||||
@@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = {
|
||||
.type = ORC_TYPE_CALL
|
||||
};
|
||||
|
||||
/* Fake frame pointer entry -- used as a fallback for generated code */
|
||||
static struct orc_entry orc_fp_entry = {
|
||||
.type = ORC_TYPE_CALL,
|
||||
.sp_reg = ORC_REG_BP,
|
||||
.sp_offset = 16,
|
||||
.bp_reg = ORC_REG_PREV_SP,
|
||||
.bp_offset = -16,
|
||||
.end = 0,
|
||||
};
|
||||
|
||||
static struct orc_entry *orc_find(unsigned long ip)
|
||||
{
|
||||
static struct orc_entry *orc;
|
||||
@@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
* calls and calls to noreturn functions.
|
||||
*/
|
||||
orc = orc_find(state->signal ? state->ip : state->ip - 1);
|
||||
if (!orc)
|
||||
goto err;
|
||||
if (!orc) {
|
||||
/*
|
||||
* As a fallback, try to assume this code uses a frame pointer.
|
||||
* This is useful for generated code, like BPF, which ORC
|
||||
* doesn't know about. This is just a guess, so the rest of
|
||||
* the unwind is no longer considered reliable.
|
||||
*/
|
||||
orc = &orc_fp_entry;
|
||||
state->error = true;
|
||||
}
|
||||
|
||||
/* End-of-stack check for kernel threads: */
|
||||
if (orc->sp_reg == ORC_REG_UNDEFINED) {
|
||||
@@ -580,7 +598,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
goto done;
|
||||
|
||||
state->ip = regs->ip;
|
||||
state->sp = kernel_stack_pointer(regs);
|
||||
state->sp = regs->sp;
|
||||
state->bp = regs->bp;
|
||||
state->regs = regs;
|
||||
state->full_regs = true;
|
||||
|
@@ -1074,7 +1074,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
|
||||
pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
|
||||
current->pid, regs->sp, regs->ip);
|
||||
|
||||
force_sig(SIGSEGV, current);
|
||||
force_sig(SIGSEGV);
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
*
|
||||
* verify_cpu.S - Code for cpu long mode and SSE verification. This
|
||||
@@ -9,9 +10,6 @@
|
||||
* Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
|
||||
* Copyright (c) 2010 Kees Cook (kees.cook@canonical.com)
|
||||
*
|
||||
* This source code is licensed under the GNU General Public License,
|
||||
* Version 2. See the file COPYING for more details.
|
||||
*
|
||||
* This is a common code for verification whether CPU supports
|
||||
* long mode and SSE or not. It is not called directly instead this
|
||||
* file is included at various places and compiled in that context.
|
||||
|
@@ -583,7 +583,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
|
||||
return 1; /* we let this handle by the calling routine */
|
||||
current->thread.trap_nr = trapno;
|
||||
current->thread.error_code = error_code;
|
||||
force_sig(SIGTRAP, current);
|
||||
force_sig(SIGTRAP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -141,10 +141,10 @@ SECTIONS
|
||||
*(.text.__x86.indirect_thunk)
|
||||
__indirect_thunk_end = .;
|
||||
#endif
|
||||
} :text = 0x9090
|
||||
|
||||
/* End of text section */
|
||||
_etext = .;
|
||||
/* End of text section */
|
||||
_etext = .;
|
||||
} :text = 0x9090
|
||||
|
||||
NOTES :text :note
|
||||
|
||||
@@ -368,6 +368,14 @@ SECTIONS
|
||||
__bss_stop = .;
|
||||
}
|
||||
|
||||
/*
|
||||
* The memory occupied from _text to here, __end_of_kernel_reserve, is
|
||||
* automatically reserved in setup_arch(). Anything after here must be
|
||||
* explicitly reserved using memblock_reserve() or it will be discarded
|
||||
* and treated as available memory.
|
||||
*/
|
||||
__end_of_kernel_reserve = .;
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
|
||||
__brk_base = .;
|
||||
@@ -379,10 +387,34 @@ SECTIONS
|
||||
. = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
|
||||
_end = .;
|
||||
|
||||
#ifdef CONFIG_AMD_MEM_ENCRYPT
|
||||
/*
|
||||
* Early scratch/workarea section: Lives outside of the kernel proper
|
||||
* (_text - _end).
|
||||
*
|
||||
* Resides after _end because even though the .brk section is after
|
||||
* __end_of_kernel_reserve, the .brk section is later reserved as a
|
||||
* part of the kernel. Since it is located after __end_of_kernel_reserve
|
||||
* it will be discarded and become part of the available memory. As
|
||||
* such, it can only be used by very early boot code and must not be
|
||||
* needed afterwards.
|
||||
*
|
||||
* Currently used by SME for performing in-place encryption of the
|
||||
* kernel during boot. Resides on a 2MB boundary to simplify the
|
||||
* pagetable setup used for SME in-place encryption.
|
||||
*/
|
||||
. = ALIGN(HPAGE_SIZE);
|
||||
.init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) {
|
||||
__init_scratch_begin = .;
|
||||
*(.init.scratch)
|
||||
. = ALIGN(HPAGE_SIZE);
|
||||
__init_scratch_end = .;
|
||||
}
|
||||
#endif
|
||||
|
||||
STABS_DEBUG
|
||||
DWARF_DEBUG
|
||||
|
||||
/* Sections to be discarded */
|
||||
DISCARDS
|
||||
/DISCARD/ : {
|
||||
*(.eh_frame)
|
||||
|
@@ -29,8 +29,8 @@ void x86_init_noop(void) { }
|
||||
void __init x86_init_uint_noop(unsigned int unused) { }
|
||||
static int __init iommu_init_noop(void) { return 0; }
|
||||
static void iommu_shutdown_noop(void) { }
|
||||
static bool __init bool_x86_init_noop(void) { return false; }
|
||||
static void x86_op_int_noop(int cpu) { }
|
||||
bool __init bool_x86_init_noop(void) { return false; }
|
||||
void x86_op_int_noop(int cpu) { }
|
||||
|
||||
/*
|
||||
* The platform setup functions are preset with the default functions
|
||||
|
Reference in New Issue
Block a user