Merge branch 'linus' into x86/urgent, to be able to merge a dependent fix

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2015-09-05 09:00:30 +02:00
3401 changed files with 144883 additions and 71332 deletions

View File

@@ -313,7 +313,6 @@ struct apic {
/* wakeup_secondary_cpu */
int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
bool wait_for_init_deassert;
void (*inquire_remote_apic)(int apicid);
/* apic ops */
@@ -378,7 +377,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
* APIC functionality to boot other CPUs - only used on SMP:
*/
#ifdef CONFIG_SMP
extern atomic_t init_deasserted;
extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
#endif

View File

@@ -21,7 +21,7 @@
* ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
* compiler switches.
*/
static inline unsigned int __arch_hweight32(unsigned int w)
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res = 0;
@@ -42,20 +42,23 @@ static inline unsigned int __arch_hweight8(unsigned int w)
return __arch_hweight32(w & 0xff);
}
#ifdef CONFIG_X86_32
static inline unsigned long __arch_hweight64(__u64 w)
{
return __arch_hweight32((u32)w) +
__arch_hweight32((u32)(w >> 32));
}
#else
static __always_inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res = 0;
#ifdef CONFIG_X86_32
return __arch_hweight32((u32)w) +
__arch_hweight32((u32)(w >> 32));
#else
asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
: "="REG_OUT (res)
: REG_IN (w));
#endif /* CONFIG_X86_32 */
return res;
}
#endif /* CONFIG_X86_32 */
#endif

View File

@@ -91,15 +91,4 @@ do { \
#define smp_mb__before_atomic() barrier()
#define smp_mb__after_atomic() barrier()
/*
* Stop RDTSC speculation. This is needed when you need to use RDTSC
* (or get_cycles or vread that possibly accesses the TSC) in a defined
* code region.
*/
static __always_inline void rdtsc_barrier(void)
{
alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
"lfence", X86_FEATURE_LFENCE_RDTSC);
}
#endif /* _ASM_X86_BARRIER_H */

View File

@@ -1,10 +0,0 @@
#ifndef _ASM_X86_CONTEXT_TRACKING_H
#define _ASM_X86_CONTEXT_TRACKING_H
#ifdef CONFIG_CONTEXT_TRACKING
# define SCHEDULE_USER call schedule_user
#else
# define SCHEDULE_USER call schedule
#endif
#endif

View File

@@ -119,6 +119,7 @@
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
@@ -176,6 +177,7 @@
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
/*
* Auxiliary flags: Linux defined - For features scattered in various

View File

@@ -4,5 +4,6 @@
#include <asm-generic/delay.h>
void use_tsc_delay(void);
void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */

View File

@@ -78,7 +78,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
#ifdef CONFIG_X86_64
extern unsigned int vdso64_enabled;
#endif
#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
extern unsigned int vdso32_enabled;
#endif
@@ -187,8 +187,8 @@ static inline void elf_common_init(struct thread_struct *t,
#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
elf_common_init(&current->thread, regs, __USER_DS)
void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp);
#define compat_start_thread start_thread_ia32
void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp);
#define compat_start_thread compat_start_thread
void set_personality_ia32(bool);
#define COMPAT_SET_PERSONALITY(ex) \
@@ -344,14 +344,9 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
*/
static inline int mmap_is_ia32(void)
{
#ifdef CONFIG_X86_32
return 1;
#endif
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_ADDR32))
return 1;
#endif
return 0;
return config_enabled(CONFIG_X86_32) ||
(config_enabled(CONFIG_COMPAT) &&
test_thread_flag(TIF_ADDR32));
}
/* Do not change the values. See get_align_mask() */

View File

@@ -182,10 +182,10 @@ extern char irq_entries_start[];
#define trace_irq_entries_start irq_entries_start
#endif
#define VECTOR_UNDEFINED (-1)
#define VECTOR_RETRIGGERED (-2)
#define VECTOR_UNUSED NULL
#define VECTOR_RETRIGGERED ((void *)~0UL)
typedef int vector_irq_t[NR_VECTORS];
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
#endif /* !ASSEMBLY_ */

View File

@@ -22,15 +22,6 @@ struct ucontext_ia32 {
compat_sigset_t uc_sigmask; /* mask last for extensibility */
};
struct ucontext_x32 {
unsigned int uc_flags;
unsigned int uc_link;
compat_stack_t uc_stack;
unsigned int uc__pad0; /* needed for alignment */
struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */
compat_sigset_t uc_sigmask; /* mask last for extensibility */
};
/* This matches struct stat64 in glibc2.2, hence the absolutely
* insane amounts of padding around dev_t's.
*/

View File

@@ -180,6 +180,8 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
*/
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
unsigned long prot_val);

View File

@@ -52,20 +52,20 @@
/* Quark available units */
#define QRK_MBI_UNIT_HBA 0x00
#define QRK_MBI_UNIT_HB 0x03
#define QRK_MBI_UNIT_HB 0x03
#define QRK_MBI_UNIT_RMU 0x04
#define QRK_MBI_UNIT_MM 0x05
#define QRK_MBI_UNIT_MM 0x05
#define QRK_MBI_UNIT_MMESRAM 0x05
#define QRK_MBI_UNIT_SOC 0x31
/* Quark read/write opcodes */
#define QRK_MBI_HBA_READ 0x10
#define QRK_MBI_HBA_WRITE 0x11
#define QRK_MBI_HB_READ 0x10
#define QRK_MBI_HB_READ 0x10
#define QRK_MBI_HB_WRITE 0x11
#define QRK_MBI_RMU_READ 0x10
#define QRK_MBI_RMU_WRITE 0x11
#define QRK_MBI_MM_READ 0x10
#define QRK_MBI_MM_READ 0x10
#define QRK_MBI_MM_WRITE 0x11
#define QRK_MBI_MMESRAM_READ 0x12
#define QRK_MBI_MMESRAM_WRITE 0x13

View File

@@ -36,7 +36,9 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
extern void (*x86_platform_ipi_callback)(void);
extern void native_init_IRQ(void);
extern bool handle_irq(unsigned irq, struct pt_regs *regs);
struct irq_desc;
extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
extern __visible unsigned int do_IRQ(struct pt_regs *regs);

View File

@@ -117,16 +117,6 @@
#define FPU_IRQ 13
#define FIRST_VM86_IRQ 3
#define LAST_VM86_IRQ 15
#ifndef __ASSEMBLY__
static inline int invalid_vm86_irq(int irq)
{
return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
}
#endif
/*
* Size the maximum number of interrupts.
*

View File

@@ -1,6 +1,9 @@
#ifndef _ASM_X86_KASAN_H
#define _ASM_X86_KASAN_H
#include <linux/const.h>
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
/*
* Compiler uses shadow offset assuming that addresses start
* from 0. Kernel addresses don't start from 0, so shadow

View File

@@ -252,6 +252,11 @@ struct kvm_pio_request {
int size;
};
struct rsvd_bits_validate {
u64 rsvd_bits_mask[2][4];
u64 bad_mt_xwr;
};
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -289,8 +294,15 @@ struct kvm_mmu {
u64 *pae_root;
u64 *lm_root;
u64 rsvd_bits_mask[2][4];
u64 bad_mt_xwr;
/*
* check zero bits on shadow page table entries, these
* bits include not only hardware reserved bits but also
* the bits spte never used.
*/
struct rsvd_bits_validate shadow_zero_check;
struct rsvd_bits_validate guest_rsvd_check;
/*
* Bitmap: bit set = last pte in walk
@@ -358,6 +370,11 @@ struct kvm_mtrr {
struct list_head head;
};
/* Hyper-V per vcpu emulation context */
struct kvm_vcpu_hv {
u64 hv_vapic;
};
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -514,8 +531,7 @@ struct kvm_vcpu_arch {
/* used for guest single stepping over the given code position */
unsigned long singlestep_rip;
/* fields used by HYPER-V emulation */
u64 hv_vapic;
struct kvm_vcpu_hv hyperv;
cpumask_var_t wbinvd_dirty_mask;
@@ -586,6 +602,17 @@ struct kvm_apic_map {
struct kvm_lapic *logical_map[16][16];
};
/* Hyper-V emulation context */
struct kvm_hv {
u64 hv_guest_os_id;
u64 hv_hypercall;
u64 hv_tsc_page;
/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
u64 hv_crash_ctl;
};
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -645,16 +672,14 @@ struct kvm_arch {
/* reads protected by irq_srcu, writes by irq_lock */
struct hlist_head mask_notifier_list;
/* fields used by HYPER-V emulation */
u64 hv_guest_os_id;
u64 hv_hypercall;
u64 hv_tsc_page;
struct kvm_hv hyperv;
#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
bool boot_vcpu_runs_old_kvmclock;
u32 bsp_vcpu_id;
u64 disabled_quirks;
};
@@ -1203,5 +1228,7 @@ int __x86_set_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem);
int x86_set_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
#endif /* _ASM_X86_KVM_HOST_H */

View File

@@ -2,7 +2,6 @@
#define _ASM_X86_MATH_EMU_H
#include <asm/ptrace.h>
#include <asm/vm86.h>
/* This structure matches the layout of the data saved to the stack
following a device-not-present interrupt, part of it saved
@@ -10,9 +9,6 @@
*/
struct math_emu_info {
long ___orig_eip;
union {
struct pt_regs *regs;
struct kernel_vm86_regs *vm86;
};
struct pt_regs *regs;
};
#endif /* _ASM_X86_MATH_EMU_H */

View File

@@ -151,10 +151,12 @@ extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
void mcheck_cpu_clear(struct cpuinfo_x86 *c);
void mcheck_vendor_init_severity(void);
#else
static inline int mcheck_init(void) { return 0; }
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
static inline void mcheck_vendor_init_severity(void) {}
#endif
@@ -181,20 +183,18 @@ DECLARE_PER_CPU(struct device *, mce_device);
#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void mce_intel_feature_clear(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
void cmci_rediscover(void);
void cmci_recheck(void);
void lmce_clear(void);
void lmce_enable(void);
#else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
static inline void lmce_clear(void) {}
static inline void lmce_enable(void) {}
#endif
#ifdef CONFIG_X86_MCE_AMD

View File

@@ -9,7 +9,9 @@
* we put the segment information here.
*/
typedef struct {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
#endif
#ifdef CONFIG_X86_64
/* True if mm supports a task running in 32 bit compatibility mode. */

View File

@@ -33,6 +33,7 @@ static inline void load_mm_cr4(struct mm_struct *mm)
static inline void load_mm_cr4(struct mm_struct *mm) {}
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* ldt_structs can be allocated, used, and freed, but they are never
* modified while live.
@@ -48,8 +49,23 @@ struct ldt_struct {
int size;
};
/*
* Used for LDT copy/destruction.
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context(struct mm_struct *mm);
#else /* CONFIG_MODIFY_LDT_SYSCALL */
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
return 0;
}
static inline void destroy_context(struct mm_struct *mm) {}
#endif
static inline void load_mm_ldt(struct mm_struct *mm)
{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
/* lockless_dereference synchronizes with smp_store_release */
@@ -73,17 +89,13 @@ static inline void load_mm_ldt(struct mm_struct *mm)
set_ldt(ldt->entries, ldt->size);
else
clear_LDT();
#else
clear_LDT();
#endif
DEBUG_LOCKS_WARN_ON(preemptible());
}
/*
* Used for LDT copy/destruction.
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context(struct mm_struct *mm);
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
@@ -114,6 +126,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* Load per-mm CR4 state */
load_mm_cr4(next);
#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* Load the LDT, if the LDT is different.
*
@@ -128,6 +141,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_mm_ldt(next);
#endif
}
#ifdef CONFIG_SMP
else {

View File

@@ -7,6 +7,7 @@
struct ms_hyperv_info {
u32 features;
u32 misc_features;
u32 hints;
};
@@ -20,4 +21,8 @@ void hyperv_vector_handler(struct pt_regs *regs);
void hv_setup_vmbus_irq(void (*handler)(void));
void hv_remove_vmbus_irq(void);
void hv_setup_kexec_handler(void (*handler)(void));
void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
void hv_remove_crash_handler(void);
#endif

View File

@@ -73,6 +73,12 @@
#define MSR_LBR_CORE_FROM 0x00000040
#define MSR_LBR_CORE_TO 0x00000060
#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
#define LBR_INFO_MISPRED BIT_ULL(63)
#define LBR_INFO_IN_TX BIT_ULL(62)
#define LBR_INFO_ABORT BIT_ULL(61)
#define LBR_INFO_CYCLES 0xffff
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
@@ -80,13 +86,21 @@
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
#define RTIT_CTL_CYCLEACC BIT(1)
#define RTIT_CTL_OS BIT(2)
#define RTIT_CTL_USR BIT(3)
#define RTIT_CTL_CR3EN BIT(7)
#define RTIT_CTL_TOPA BIT(8)
#define RTIT_CTL_MTC_EN BIT(9)
#define RTIT_CTL_TSC_EN BIT(10)
#define RTIT_CTL_DISRETC BIT(11)
#define RTIT_CTL_BRANCH_EN BIT(13)
#define RTIT_CTL_MTC_RANGE_OFFSET 14
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
#define RTIT_CTL_CYC_THRESH_OFFSET 19
#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
#define RTIT_CTL_PSB_FREQ_OFFSET 24
#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
#define MSR_IA32_RTIT_STATUS 0x00000571
#define RTIT_STATUS_CONTEXTEN BIT(1)
#define RTIT_STATUS_TRIGGEREN BIT(2)

View File

@@ -47,14 +47,13 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
* it means rax *or* rdx.
*/
#ifdef CONFIG_X86_64
#define DECLARE_ARGS(val, low, high) unsigned low, high
#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32))
#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
/* Using 64-bit values saves one instruction clearing the high half of low */
#define DECLARE_ARGS(val, low, high) unsigned long low, high
#define EAX_EDX_VAL(val, low, high) ((low) | (high) << 32)
#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
#else
#define DECLARE_ARGS(val, low, high) unsigned long long val
#define EAX_EDX_VAL(val, low, high) (val)
#define EAX_EDX_ARGS(val, low, high) "A" (val)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
@@ -106,12 +105,19 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
return err;
}
extern unsigned long long native_read_tsc(void);
extern int rdmsr_safe_regs(u32 regs[8]);
extern int wrmsr_safe_regs(u32 regs[8]);
static __always_inline unsigned long long __native_read_tsc(void)
/**
* rdtsc() - returns the current TSC without ordering constraints
*
* rdtsc() returns the result of RDTSC as a 64-bit integer. The
* only ordering constraint it supplies is the ordering implied by
* "asm volatile": it will put the RDTSC in the place you expect. The
* CPU can and will speculatively execute that RDTSC, though, so the
* results can be non-monotonic if compared on different CPUs.
*/
static __always_inline unsigned long long rdtsc(void)
{
DECLARE_ARGS(val, low, high);
@@ -120,6 +126,35 @@ static __always_inline unsigned long long __native_read_tsc(void)
return EAX_EDX_VAL(val, low, high);
}
/**
* rdtsc_ordered() - read the current TSC in program order
*
* rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
* It is ordered like a load to a global in-memory counter. It should
* be impossible to observe non-monotonic rdtsc_unordered() behavior
* across multiple CPUs as long as the TSC is synced.
*/
static __always_inline unsigned long long rdtsc_ordered(void)
{
/*
* The RDTSC instruction is not ordered relative to memory
* access. The Intel SDM and the AMD APM are both vague on this
* point, but empirically an RDTSC instruction can be
* speculatively executed before prior loads. An RDTSC
* immediately after an appropriate barrier appears to be
* ordered as a normal load, that is, it provides the same
* ordering guarantees as reading from a global memory location
* that some other imaginary CPU is updating continuously with a
* time stamp.
*/
alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
"lfence", X86_FEATURE_LFENCE_RDTSC);
return rdtsc();
}
/* Deprecated, keep it for a cycle for easier merging: */
#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0)
static inline unsigned long long native_read_pmc(int counter)
{
DECLARE_ARGS(val, low, high);
@@ -153,8 +188,10 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
#define rdmsrl(msr, val) \
((val) = native_read_msr((msr)))
#define wrmsrl(msr, val) \
native_write_msr((msr), (u32)((u64)(val)), (u32)((u64)(val) >> 32))
static inline void wrmsrl(unsigned msr, u64 val)
{
native_write_msr(msr, (u32)val, (u32)(val >> 32));
}
/* wrmsr with exception handling */
static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
@@ -180,12 +217,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
return err;
}
#define rdtscl(low) \
((low) = (u32)__native_read_tsc())
#define rdtscll(val) \
((val) = __native_read_tsc())
#define rdpmc(counter, low, high) \
do { \
u64 _l = native_read_pmc((counter)); \
@@ -195,15 +226,6 @@ do { \
#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
#define rdtscp(low, high, aux) \
do { \
unsigned long long _val = native_read_tscp(&(aux)); \
(low) = (u32)_val; \
(high) = (u32)(_val >> 32); \
} while (0)
#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux))
#endif /* !CONFIG_PARAVIRT */
/*

View File

@@ -14,6 +14,9 @@
#define CPUID5_ECX_INTERRUPT_BREAK 0x2
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
#define MWAITX_MAX_LOOPS ((u32)-1)
#define MWAITX_DISABLE_CSTATES 0xf
static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
@@ -23,6 +26,14 @@ static inline void __monitor(const void *eax, unsigned long ecx,
:: "a" (eax), "c" (ecx), "d"(edx));
}
static inline void __monitorx(const void *eax, unsigned long ecx,
unsigned long edx)
{
/* "monitorx %eax, %ecx, %edx;" */
asm volatile(".byte 0x0f, 0x01, 0xfa;"
:: "a" (eax), "c" (ecx), "d"(edx));
}
static inline void __mwait(unsigned long eax, unsigned long ecx)
{
/* "mwait %eax, %ecx;" */
@@ -30,6 +41,40 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
:: "a" (eax), "c" (ecx));
}
/*
* MWAITX allows for a timer expiration to get the core out a wait state in
* addition to the default MWAIT exit condition of a store appearing at a
* monitored virtual address.
*
* Registers:
*
* MWAITX ECX[1]: enable timer if set
* MWAITX EBX[31:0]: max wait time expressed in SW P0 clocks. The software P0
* frequency is the same as the TSC frequency.
*
* Below is a comparison between MWAIT and MWAITX on AMD processors:
*
* MWAIT MWAITX
* opcode 0f 01 c9 | 0f 01 fb
* ECX[0] value of RFLAGS.IF seen by instruction
* ECX[1] unused/#GP if set | enable timer if set
* ECX[31:2] unused/#GP if set
* EAX unused (reserve for hint)
* EBX[31:0] unused | max wait time (P0 clocks)
*
* MONITOR MONITORX
* opcode 0f 01 c8 | 0f 01 fa
* EAX (logical) address to monitor
* ECX #GP if not zero
*/
static inline void __mwaitx(unsigned long eax, unsigned long ebx,
unsigned long ecx)
{
/* "mwaitx %eax, %ebx, %ecx;" */
asm volatile(".byte 0x0f, 0x01, 0xfb;"
:: "a" (eax), "b" (ebx), "c" (ecx));
}
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
trace_hardirqs_on();

View File

@@ -153,7 +153,11 @@ do { \
val = paravirt_read_msr(msr, &_err); \
} while (0)
#define wrmsrl(msr, val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
static inline void wrmsrl(unsigned msr, u64 val)
{
wrmsr(msr, (u32)val, (u32)(val>>32));
}
#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b)
/* rdmsr with exception handling */
@@ -174,19 +178,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
return err;
}
static inline u64 paravirt_read_tsc(void)
{
return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
}
#define rdtscl(low) \
do { \
u64 _l = paravirt_read_tsc(); \
low = (int)_l; \
} while (0)
#define rdtscll(val) (val = paravirt_read_tsc())
static inline unsigned long long paravirt_sched_clock(void)
{
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
@@ -215,27 +206,6 @@ do { \
#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
{
return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
}
#define rdtscp(low, high, aux) \
do { \
int __aux; \
unsigned long __val = paravirt_rdtscp(&__aux); \
(low) = (u32)__val; \
(high) = (u32)(__val >> 32); \
(aux) = __aux; \
} while (0)
#define rdtscpll(val, aux) \
do { \
unsigned long __aux; \
val = paravirt_rdtscp(&__aux); \
(aux) = __aux; \
} while (0)
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);

View File

@@ -156,9 +156,7 @@ struct pv_cpu_ops {
u64 (*read_msr)(unsigned int msr, int *err);
int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
u64 (*read_tsc)(void);
u64 (*read_pmc)(int counter);
unsigned long long (*read_tscp)(unsigned int *aux);
#ifdef CONFIG_X86_32
/*

View File

@@ -93,8 +93,6 @@ extern raw_spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
extern void (*pcibios_disable_irq)(struct pci_dev *dev);
extern bool mp_should_keep_irq(struct device *dev);
struct pci_raw_ops {
int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val);

View File

@@ -159,6 +159,13 @@ struct x86_pmu_capability {
*/
#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62)
#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
#define GLOBAL_STATUS_ASIF BIT_ULL(60)
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(58)
/*
* IBS cpuid feature detection
*/

View File

@@ -18,6 +18,8 @@
/* ValleyView Power Control Unit PCI Device ID */
#define PCI_DEVICE_ID_VLV_PMC 0x0F1C
/* CherryTrail Power Control Unit PCI Device ID */
#define PCI_DEVICE_ID_CHT_PMC 0x229C
/* PMC Memory mapped IO registers */
#define PMC_BASE_ADDR_OFFSET 0x44
@@ -29,6 +31,10 @@
#define PMC_FUNC_DIS 0x34
#define PMC_FUNC_DIS_2 0x38
/* CHT specific bits in FUNC_DIS2 register */
#define BIT_FD_GMM BIT(3)
#define BIT_FD_ISH BIT(4)
/* S0ix wake event control */
#define PMC_S0IX_WAKE_EN 0x3C
@@ -75,6 +81,21 @@
#define PMC_PSS_BIT_USB BIT(16)
#define PMC_PSS_BIT_USB_SUS BIT(17)
/* CHT specific bits in PSS register */
#define PMC_PSS_BIT_CHT_UFS BIT(7)
#define PMC_PSS_BIT_CHT_UXD BIT(11)
#define PMC_PSS_BIT_CHT_UXD_FD BIT(12)
#define PMC_PSS_BIT_CHT_UX_ENG BIT(15)
#define PMC_PSS_BIT_CHT_USB_SUS BIT(16)
#define PMC_PSS_BIT_CHT_GMM BIT(17)
#define PMC_PSS_BIT_CHT_ISH BIT(18)
#define PMC_PSS_BIT_CHT_DFX_MASTER BIT(26)
#define PMC_PSS_BIT_CHT_DFX_CLUSTER1 BIT(27)
#define PMC_PSS_BIT_CHT_DFX_CLUSTER2 BIT(28)
#define PMC_PSS_BIT_CHT_DFX_CLUSTER3 BIT(29)
#define PMC_PSS_BIT_CHT_DFX_CLUSTER4 BIT(30)
#define PMC_PSS_BIT_CHT_DFX_CLUSTER5 BIT(31)
/* These registers reflect D3 status of functions */
#define PMC_D3_STS_0 0xA0
@@ -117,6 +138,10 @@
#define BIT_USH_SS_PHY BIT(2)
#define BIT_DFX BIT(3)
/* CHT specific bits in PMC_D3_STS_1 register */
#define BIT_STS_GMM BIT(1)
#define BIT_STS_ISH BIT(2)
/* PMC I/O Registers */
#define ACPI_BASE_ADDR_OFFSET 0x40
#define ACPI_BASE_ADDR_MASK 0xFFFFFE00
@@ -126,4 +151,8 @@
#define SLEEP_TYPE_MASK 0xFFFFECFF
#define SLEEP_TYPE_S5 0x1C00
#define SLEEP_ENABLE 0x2000
extern int pmc_atom_read(int offset, u32 *value);
extern int pmc_atom_write(int offset, u32 value);
#endif /* PMC_ATOM_H */

View File

@@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(void)
static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(!raw_cpu_read_4(__preempt_count));
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
#ifdef CONFIG_PREEMPT

View File

@@ -6,8 +6,8 @@
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
struct vm86;
#include <asm/vm86.h>
#include <asm/math_emu.h>
#include <asm/segment.h>
#include <asm/types.h>
@@ -400,15 +400,9 @@ struct thread_struct {
unsigned long cr2;
unsigned long trap_nr;
unsigned long error_code;
#ifdef CONFIG_X86_32
#ifdef CONFIG_VM86
/* Virtual 86 mode info */
struct vm86_struct __user *vm86_info;
unsigned long screen_bitmap;
unsigned long v86flags;
unsigned long v86mask;
unsigned long saved_sp0;
unsigned int saved_fs;
unsigned int saved_gs;
struct vm86 *vm86;
#endif
/* IO permissions: */
unsigned long *io_bitmap_ptr;
@@ -651,14 +645,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
extern void set_task_blockstep(struct task_struct *task, bool on);
/*
* from system description table in BIOS. Mostly for MCA use, but
* others may find it useful:
*/
extern unsigned int machine_id;
extern unsigned int machine_submodel_id;
extern unsigned int BIOS_revision;
/* Boot loader type from the setup header: */
extern int bootloader_type;
extern int bootloader_version;
@@ -720,7 +706,6 @@ static inline void spin_lock_prefetch(const void *x)
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \
}

View File

@@ -88,7 +88,6 @@ extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch,
unsigned long phase1_result);
extern long syscall_trace_enter(struct pt_regs *);
extern void syscall_trace_leave(struct pt_regs *);
static inline unsigned long regs_return_value(struct pt_regs *regs)
{

View File

@@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
static __always_inline
u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
{
u64 delta = __native_read_tsc() - src->tsc_timestamp;
u64 delta = rdtsc_ordered() - src->tsc_timestamp;
return pvclock_scale_delta(delta, src->tsc_to_system_mul,
src->tsc_shift);
}
@@ -76,13 +76,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
u8 ret_flags;
version = src->version;
/* Note: emulated platforms which do not advertise SSE2 support
* result in kvmclock not using the necessary RDTSC barriers.
* Without barriers, it is possible that RDTSC instruction reads from
* the time stamp counter outside rdtsc_barrier protected section
* below, resulting in violation of monotonicity.
*/
rdtsc_barrier();
offset = pvclock_get_nsec_offset(src);
ret = src->system_time + offset;
ret_flags = src->flags;

View File

@@ -4,6 +4,7 @@
#include <asm/sigcontext.h>
#include <asm/siginfo.h>
#include <asm/ucontext.h>
#include <linux/compat.h>
#ifdef CONFIG_X86_32
#define sigframe_ia32 sigframe
@@ -69,6 +70,15 @@ struct rt_sigframe {
#ifdef CONFIG_X86_X32_ABI
struct ucontext_x32 {
unsigned int uc_flags;
unsigned int uc_link;
compat_stack_t uc_stack;
unsigned int uc__pad0; /* needed for alignment */
struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */
compat_sigset_t uc_sigmask; /* mask last for extensibility */
};
struct rt_sigframe_x32 {
u64 pretcode;
struct ucontext_x32 uc;

View File

@@ -30,7 +30,7 @@ typedef sigset_t compat_sigset_t;
#endif /* __ASSEMBLY__ */
#include <uapi/asm/signal.h>
#ifndef __ASSEMBLY__
extern void do_notify_resume(struct pt_regs *, void *, __u32);
extern void do_signal(struct pt_regs *regs);
#define __ARCH_HAS_SA_RESTORER

View File

@@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void)
* on during the bootup the random pool has true entropy too.
*/
get_random_bytes(&canary, sizeof(canary));
tsc = __native_read_tsc();
tsc = rdtsc();
canary += tsc + (tsc << 32UL);
current->stack_canary = canary;

View File

@@ -37,6 +37,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *);
asmlinkage unsigned long sys_sigreturn(void);
/* kernel/vm86_32.c */
struct vm86_struct;
asmlinkage long sys_vm86old(struct vm86_struct __user *);
asmlinkage long sys_vm86(unsigned long, unsigned long);

View File

@@ -27,14 +27,17 @@
* Without this offset, that can result in a page fault. (We are
* careful that, in this case, the value we read doesn't matter.)
*
* In vm86 mode, the hardware frame is much longer still, but we neither
* access the extra members from NMI context, nor do we write such a
* frame at sp0 at all.
* In vm86 mode, the hardware frame is much longer still, so add 16
* bytes to make room for the real-mode segments.
*
* x86_64 has a fixed-length stack frame.
*/
#ifdef CONFIG_X86_32
# define TOP_OF_KERNEL_STACK_PADDING 8
# ifdef CONFIG_VM86
# define TOP_OF_KERNEL_STACK_PADDING 16
# else
# define TOP_OF_KERNEL_STACK_PADDING 8
# endif
#else
# define TOP_OF_KERNEL_STACK_PADDING 0
#endif
@@ -140,27 +143,11 @@ struct thread_info {
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
/* work to do in syscall_trace_leave() */
#define _TIF_WORK_SYSCALL_EXIT \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
_TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
(0x0000FFFF & \
~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \
_TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
_TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)

View File

@@ -112,8 +112,8 @@ asmlinkage void smp_threshold_interrupt(void);
asmlinkage void smp_deferred_error_interrupt(void);
#endif
extern enum ctx_state ist_enter(struct pt_regs *regs);
extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
extern void ist_enter(struct pt_regs *regs);
extern void ist_exit(struct pt_regs *regs);
extern void ist_begin_non_atomic(struct pt_regs *regs);
extern void ist_end_non_atomic(void);

View File

@@ -21,28 +21,12 @@ extern void disable_TSC(void);
static inline cycles_t get_cycles(void)
{
unsigned long long ret = 0;
#ifndef CONFIG_X86_TSC
if (!cpu_has_tsc)
return 0;
#endif
rdtscll(ret);
return ret;
}
static __always_inline cycles_t vget_cycles(void)
{
/*
* We only do VDSOs on TSC capable CPUs, so this shouldn't
* access boot_cpu_data (which is not VDSO-safe):
*/
#ifndef CONFIG_X86_TSC
if (!cpu_has_tsc)
return 0;
#endif
return (cycles_t)__native_read_tsc();
return rdtsc();
}
extern void tsc_init(void);
@@ -51,6 +35,7 @@ extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
extern int check_tsc_disabled(void);
extern unsigned long native_calibrate_tsc(void);
extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
extern int tsc_clocksource_reliable;

View File

@@ -1,7 +1,6 @@
#ifndef _ASM_X86_VM86_H
#define _ASM_X86_VM86_H
#include <asm/ptrace.h>
#include <uapi/asm/vm86.h>
@@ -28,43 +27,49 @@ struct kernel_vm86_regs {
unsigned short gs, __gsh;
};
struct kernel_vm86_struct {
struct kernel_vm86_regs regs;
/*
* the below part remains on the kernel stack while we are in VM86 mode.
* 'tss.esp0' then contains the address of VM86_TSS_ESP0 below, and when we
* get forced back from VM86, the CPU and "SAVE_ALL" will restore the above
* 'struct kernel_vm86_regs' with the then actual values.
* Therefore, pt_regs in fact points to a complete 'kernel_vm86_struct'
* in kernelspace, hence we need not reget the data from userspace.
*/
#define VM86_TSS_ESP0 flags
struct vm86 {
struct vm86plus_struct __user *user_vm86;
struct pt_regs regs32;
unsigned long veflags;
unsigned long veflags_mask;
unsigned long saved_sp0;
unsigned long flags;
unsigned long screen_bitmap;
unsigned long cpu_type;
struct revectored_struct int_revectored;
struct revectored_struct int21_revectored;
struct vm86plus_info_struct vm86plus;
struct pt_regs *regs32; /* here we save the pointer to the old regs */
/*
* The below is not part of the structure, but the stack layout continues
* this way. In front of 'return-eip' may be some data, depending on
* compilation, so we don't rely on this and save the pointer to 'oldregs'
* in 'regs32' above.
* However, with GCC-2.7.2 and the current CFLAGS you see exactly this:
long return-eip; from call to vm86()
struct pt_regs oldregs; user space registers as saved by syscall
*/
};
#ifdef CONFIG_VM86
void handle_vm86_fault(struct kernel_vm86_regs *, long);
int handle_vm86_trap(struct kernel_vm86_regs *, long, int);
struct pt_regs *save_v86_state(struct kernel_vm86_regs *);
void save_v86_state(struct kernel_vm86_regs *, int);
struct task_struct;
#define free_vm86(t) do { \
struct thread_struct *__t = (t); \
if (__t->vm86 != NULL) { \
kfree(__t->vm86); \
__t->vm86 = NULL; \
} \
} while (0)
/*
* Support for VM86 programs to request interrupts for
* real mode hardware drivers:
*/
#define FIRST_VM86_IRQ 3
#define LAST_VM86_IRQ 15
static inline int invalid_vm86_irq(int irq)
{
return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
}
void release_vm86_irqs(struct task_struct *);
#else
@@ -77,6 +82,10 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
return 0;
}
static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { }
#define free_vm86(t) do { } while(0)
#endif /* CONFIG_VM86 */
#endif /* _ASM_X86_VM86_H */

View File

@@ -47,6 +47,7 @@
#define CPU_BASED_MOV_DR_EXITING 0x00800000
#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
#define CPU_BASED_USE_IO_BITMAPS 0x02000000
#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000
#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
#define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000
@@ -367,29 +368,29 @@ enum vmcs_field {
#define TYPE_PHYSICAL_APIC_EVENT (10 << 12)
#define TYPE_PHYSICAL_APIC_INST (15 << 12)
/* segment AR */
#define SEGMENT_AR_L_MASK (1 << 13)
/* segment AR in VMCS -- these are different from what LAR reports */
#define VMX_SEGMENT_AR_L_MASK (1 << 13)
#define AR_TYPE_ACCESSES_MASK 1
#define AR_TYPE_READABLE_MASK (1 << 1)
#define AR_TYPE_WRITEABLE_MASK (1 << 2)
#define AR_TYPE_CODE_MASK (1 << 3)
#define AR_TYPE_MASK 0x0f
#define AR_TYPE_BUSY_64_TSS 11
#define AR_TYPE_BUSY_32_TSS 11
#define AR_TYPE_BUSY_16_TSS 3
#define AR_TYPE_LDT 2
#define VMX_AR_TYPE_ACCESSES_MASK 1
#define VMX_AR_TYPE_READABLE_MASK (1 << 1)
#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2)
#define VMX_AR_TYPE_CODE_MASK (1 << 3)
#define VMX_AR_TYPE_MASK 0x0f
#define VMX_AR_TYPE_BUSY_64_TSS 11
#define VMX_AR_TYPE_BUSY_32_TSS 11
#define VMX_AR_TYPE_BUSY_16_TSS 3
#define VMX_AR_TYPE_LDT 2
#define AR_UNUSABLE_MASK (1 << 16)
#define AR_S_MASK (1 << 4)
#define AR_P_MASK (1 << 7)
#define AR_L_MASK (1 << 13)
#define AR_DB_MASK (1 << 14)
#define AR_G_MASK (1 << 15)
#define AR_DPL_SHIFT 5
#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
#define VMX_AR_UNUSABLE_MASK (1 << 16)
#define VMX_AR_S_MASK (1 << 4)
#define VMX_AR_P_MASK (1 << 7)
#define VMX_AR_L_MASK (1 << 13)
#define VMX_AR_DB_MASK (1 << 14)
#define VMX_AR_G_MASK (1 << 15)
#define VMX_AR_DPL_SHIFT 5
#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3)
#define AR_RESERVD_MASK 0xfffe0f00
#define VMX_AR_RESERVD_MASK 0xfffe0f00
#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1)