x86/delay: Introduce TPAUSE delay
TPAUSE instructs the processor to enter an implementation-dependent optimized state. The instruction execution wakes up when the time-stamp counter reaches or exceeds the implicit EDX:EAX 64-bit input value. The instruction execution also wakes up due to the expiration of the operating system time-limit or by an external interrupt or exceptions such as a debug exception or a machine check exception. TPAUSE offers a choice of two lower power states: 1. Light-weight power/performance optimized state C0.1 2. Improved power/performance optimized state C0.2 This way, it can save power with low wake-up latency in comparison to spinloop based delay. The selection between the two is governed by the input register. TPAUSE is available on processors with X86_FEATURE_WAITPKG. Co-developed-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Kyung Min Park <kyung.min.park@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Tony Luck <tony.luck@intel.com> Link: https://lkml.kernel.org/r/1587757076-30337-4-git-send-email-kyung.min.park@intel.com
This commit is contained in:

committed by
Thomas Gleixner

parent
46f90c7aad
commit
cec5f268cd
@@ -96,6 +96,27 @@ static void delay_tsc(u64 cycles)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* On Intel the TPAUSE instruction waits until any of:
|
||||
* 1) the TSC counter exceeds the value provided in EDX:EAX
|
||||
* 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
|
||||
* 3) an external interrupt occurs
|
||||
*/
|
||||
static void delay_halt_tpause(u64 start, u64 cycles)
|
||||
{
|
||||
u64 until = start + cycles;
|
||||
u32 eax, edx;
|
||||
|
||||
eax = lower_32_bits(until);
|
||||
edx = upper_32_bits(until);
|
||||
|
||||
/*
|
||||
* Hard code the deeper (C0.2) sleep state because exit latency is
|
||||
* small compared to the "microseconds" that usleep() will delay.
|
||||
*/
|
||||
__tpause(TPAUSE_C02_STATE, edx, eax);
|
||||
}
|
||||
|
||||
/*
|
||||
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
|
||||
* counts with TSC frequency. The input value is the number of TSC cycles
|
||||
@@ -156,6 +177,12 @@ void __init use_tsc_delay(void)
|
||||
delay_fn = delay_tsc;
|
||||
}
|
||||
|
||||
void __init use_tpause_delay(void)
|
||||
{
|
||||
delay_halt_fn = delay_halt_tpause;
|
||||
delay_fn = delay_halt;
|
||||
}
|
||||
|
||||
void use_mwaitx_delay(void)
|
||||
{
|
||||
delay_halt_fn = delay_halt_mwaitx;
|
||||
|
Reference in New Issue
Block a user