delay.c 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Precise Delay Loops for i386
  4. *
  5. * Copyright (C) 1993 Linus Torvalds
  6. * Copyright (C) 1997 Martin Mares <[email protected]>
  7. * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
  8. *
  9. * The __delay function must _NOT_ be inlined as its execution time
  10. * depends wildly on alignment on many x86 processors. The additional
  11. * jump magic is needed to get the timing stable on all the CPU's
  12. * we have to worry about.
  13. */
  14. #include <linux/export.h>
  15. #include <linux/sched.h>
  16. #include <linux/timex.h>
  17. #include <linux/preempt.h>
  18. #include <linux/delay.h>
  19. #include <asm/processor.h>
  20. #include <asm/delay.h>
  21. #include <asm/timer.h>
  22. #include <asm/mwait.h>
  23. #ifdef CONFIG_SMP
  24. # include <asm/smp.h>
  25. #endif
  26. static void delay_loop(u64 __loops);
  27. /*
  28. * Calibration and selection of the delay mechanism happens only once
  29. * during boot.
  30. */
  31. static void (*delay_fn)(u64) __ro_after_init = delay_loop;
  32. static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
  33. /* simple loop based delay: */
  34. static void delay_loop(u64 __loops)
  35. {
  36. unsigned long loops = (unsigned long)__loops;
  37. asm volatile(
  38. " test %0,%0 \n"
  39. " jz 3f \n"
  40. " jmp 1f \n"
  41. ".align 16 \n"
  42. "1: jmp 2f \n"
  43. ".align 16 \n"
  44. "2: dec %0 \n"
  45. " jnz 2b \n"
  46. "3: dec %0 \n"
  47. : "+a" (loops)
  48. :
  49. );
  50. }
  51. /* TSC based delay: */
  52. static void delay_tsc(u64 cycles)
  53. {
  54. u64 bclock, now;
  55. int cpu;
  56. preempt_disable();
  57. cpu = smp_processor_id();
  58. bclock = rdtsc_ordered();
  59. for (;;) {
  60. now = rdtsc_ordered();
  61. if ((now - bclock) >= cycles)
  62. break;
  63. /* Allow RT tasks to run */
  64. preempt_enable();
  65. rep_nop();
  66. preempt_disable();
  67. /*
  68. * It is possible that we moved to another CPU, and
  69. * since TSC's are per-cpu we need to calculate
  70. * that. The delay must guarantee that we wait "at
  71. * least" the amount of time. Being moved to another
  72. * CPU could make the wait longer but we just need to
  73. * make sure we waited long enough. Rebalance the
  74. * counter for this CPU.
  75. */
  76. if (unlikely(cpu != smp_processor_id())) {
  77. cycles -= (now - bclock);
  78. cpu = smp_processor_id();
  79. bclock = rdtsc_ordered();
  80. }
  81. }
  82. preempt_enable();
  83. }
  84. /*
  85. * On Intel the TPAUSE instruction waits until any of:
  86. * 1) the TSC counter exceeds the value provided in EDX:EAX
  87. * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
  88. * 3) an external interrupt occurs
  89. */
  90. static void delay_halt_tpause(u64 start, u64 cycles)
  91. {
  92. u64 until = start + cycles;
  93. u32 eax, edx;
  94. eax = lower_32_bits(until);
  95. edx = upper_32_bits(until);
  96. /*
  97. * Hard code the deeper (C0.2) sleep state because exit latency is
  98. * small compared to the "microseconds" that usleep() will delay.
  99. */
  100. __tpause(TPAUSE_C02_STATE, edx, eax);
  101. }
  102. /*
  103. * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
  104. * counts with TSC frequency. The input value is the number of TSC cycles
  105. * to wait. MWAITX will also exit when the timer expires.
  106. */
  107. static void delay_halt_mwaitx(u64 unused, u64 cycles)
  108. {
  109. u64 delay;
  110. delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
  111. /*
  112. * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
  113. * variable as the monitor target.
  114. */
  115. __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
  116. /*
  117. * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
  118. * enter any deep C-state and we use it here in delay() to minimize
  119. * wakeup latency.
  120. */
  121. __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
  122. }
  123. /*
  124. * Call a vendor specific function to delay for a given amount of time. Because
  125. * these functions may return earlier than requested, check for actual elapsed
  126. * time and call again until done.
  127. */
  128. static void delay_halt(u64 __cycles)
  129. {
  130. u64 start, end, cycles = __cycles;
  131. /*
  132. * Timer value of 0 causes MWAITX to wait indefinitely, unless there
  133. * is a store on the memory monitored by MONITORX.
  134. */
  135. if (!cycles)
  136. return;
  137. start = rdtsc_ordered();
  138. for (;;) {
  139. delay_halt_fn(start, cycles);
  140. end = rdtsc_ordered();
  141. if (cycles <= end - start)
  142. break;
  143. cycles -= end - start;
  144. start = end;
  145. }
  146. }
  147. void __init use_tsc_delay(void)
  148. {
  149. if (delay_fn == delay_loop)
  150. delay_fn = delay_tsc;
  151. }
  152. void __init use_tpause_delay(void)
  153. {
  154. delay_halt_fn = delay_halt_tpause;
  155. delay_fn = delay_halt;
  156. }
  157. void use_mwaitx_delay(void)
  158. {
  159. delay_halt_fn = delay_halt_mwaitx;
  160. delay_fn = delay_halt;
  161. }
  162. int read_current_timer(unsigned long *timer_val)
  163. {
  164. if (delay_fn == delay_tsc) {
  165. *timer_val = rdtsc();
  166. return 0;
  167. }
  168. return -1;
  169. }
  170. void __delay(unsigned long loops)
  171. {
  172. delay_fn(loops);
  173. }
  174. EXPORT_SYMBOL(__delay);
  175. noinline void __const_udelay(unsigned long xloops)
  176. {
  177. unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
  178. int d0;
  179. xloops *= 4;
  180. asm("mull %%edx"
  181. :"=d" (xloops), "=&a" (d0)
  182. :"1" (xloops), "0" (lpj * (HZ / 4)));
  183. __delay(++xloops);
  184. }
  185. EXPORT_SYMBOL(__const_udelay);
  186. void __udelay(unsigned long usecs)
  187. {
  188. __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
  189. }
  190. EXPORT_SYMBOL(__udelay);
  191. void __ndelay(unsigned long nsecs)
  192. {
  193. __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
  194. }
  195. EXPORT_SYMBOL(__ndelay);