rseq_test.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. #define _GNU_SOURCE /* for program_invocation_short_name */
  3. #include <errno.h>
  4. #include <fcntl.h>
  5. #include <pthread.h>
  6. #include <sched.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <signal.h>
  11. #include <syscall.h>
  12. #include <sys/ioctl.h>
  13. #include <sys/sysinfo.h>
  14. #include <asm/barrier.h>
  15. #include <linux/atomic.h>
  16. #include <linux/rseq.h>
  17. #include <linux/unistd.h>
  18. #include "kvm_util.h"
  19. #include "processor.h"
  20. #include "test_util.h"
  21. #include "../rseq/rseq.c"
  22. /*
  23. * Any bug related to task migration is likely to be timing-dependent; perform
  24. * a large number of migrations to reduce the odds of a false negative.
  25. */
  26. #define NR_TASK_MIGRATIONS 100000
  27. static pthread_t migration_thread;
  28. static cpu_set_t possible_mask;
  29. static int min_cpu, max_cpu;
  30. static bool done;
  31. static atomic_t seq_cnt;
  32. static void guest_code(void)
  33. {
  34. for (;;)
  35. GUEST_SYNC(0);
  36. }
  37. /*
  38. * We have to perform direct system call for getcpu() because it's
  39. * not available until glic 2.29.
  40. */
  41. static void sys_getcpu(unsigned *cpu)
  42. {
  43. int r;
  44. r = syscall(__NR_getcpu, cpu, NULL, NULL);
  45. TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)", errno, strerror(errno));
  46. }
  47. static int next_cpu(int cpu)
  48. {
  49. /*
  50. * Advance to the next CPU, skipping those that weren't in the original
  51. * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
  52. * data storage is considered as opaque. Note, if this task is pinned
  53. * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
  54. * burn a lot cycles and the test will take longer than normal to
  55. * complete.
  56. */
  57. do {
  58. cpu++;
  59. if (cpu > max_cpu) {
  60. cpu = min_cpu;
  61. TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
  62. "Min CPU = %d must always be usable", cpu);
  63. break;
  64. }
  65. } while (!CPU_ISSET(cpu, &possible_mask));
  66. return cpu;
  67. }
  68. static void *migration_worker(void *__rseq_tid)
  69. {
  70. pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
  71. cpu_set_t allowed_mask;
  72. int r, i, cpu;
  73. CPU_ZERO(&allowed_mask);
  74. for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
  75. CPU_SET(cpu, &allowed_mask);
  76. /*
  77. * Bump the sequence count twice to allow the reader to detect
  78. * that a migration may have occurred in between rseq and sched
  79. * CPU ID reads. An odd sequence count indicates a migration
  80. * is in-progress, while a completely different count indicates
  81. * a migration occurred since the count was last read.
  82. */
  83. atomic_inc(&seq_cnt);
  84. /*
  85. * Ensure the odd count is visible while getcpu() isn't
  86. * stable, i.e. while changing affinity is in-progress.
  87. */
  88. smp_wmb();
  89. r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
  90. TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
  91. errno, strerror(errno));
  92. smp_wmb();
  93. atomic_inc(&seq_cnt);
  94. CPU_CLR(cpu, &allowed_mask);
  95. /*
  96. * Wait 1-10us before proceeding to the next iteration and more
  97. * specifically, before bumping seq_cnt again. A delay is
  98. * needed on three fronts:
  99. *
  100. * 1. To allow sched_setaffinity() to prompt migration before
  101. * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
  102. * (or TIF_NEED_RESCHED, which indirectly leads to handling
  103. * NOTIFY_RESUME) is handled in KVM context.
  104. *
  105. * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
  106. * the guest, the guest will trigger a IO/MMIO exit all the
  107. * way to userspace and the TIF flags will be handled by
  108. * the generic "exit to userspace" logic, not by KVM. The
  109. * exit to userspace is necessary to give the test a chance
  110. * to check the rseq CPU ID (see #2).
  111. *
  112. * Alternatively, guest_code() could include an instruction
  113. * to trigger an exit that is handled by KVM, but any such
  114. * exit requires architecture specific code.
  115. *
  116. * 2. To let ioctl(KVM_RUN) make its way back to the test
  117. * before the next round of migration. The test's check on
  118. * the rseq CPU ID must wait for migration to complete in
  119. * order to avoid false positive, thus any kernel rseq bug
  120. * will be missed if the next migration starts before the
  121. * check completes.
  122. *
  123. * 3. To ensure the read-side makes efficient forward progress,
  124. * e.g. if getcpu() involves a syscall. Stalling the read-side
  125. * means the test will spend more time waiting for getcpu()
  126. * to stabilize and less time trying to hit the timing-dependent
  127. * bug.
  128. *
  129. * Because any bug in this area is likely to be timing-dependent,
  130. * run with a range of delays at 1us intervals from 1us to 10us
  131. * as a best effort to avoid tuning the test to the point where
  132. * it can hit _only_ the original bug and not detect future
  133. * regressions.
  134. *
  135. * The original bug can reproduce with a delay up to ~500us on
  136. * x86-64, but starts to require more iterations to reproduce
  137. * as the delay creeps above ~10us, and the average runtime of
  138. * each iteration obviously increases as well. Cap the delay
  139. * at 10us to keep test runtime reasonable while minimizing
  140. * potential coverage loss.
  141. *
  142. * The lower bound for reproducing the bug is likely below 1us,
  143. * e.g. failures occur on x86-64 with nanosleep(0), but at that
  144. * point the overhead of the syscall likely dominates the delay.
  145. * Use usleep() for simplicity and to avoid unnecessary kernel
  146. * dependencies.
  147. */
  148. usleep((i % 10) + 1);
  149. }
  150. done = true;
  151. return NULL;
  152. }
  153. static void calc_min_max_cpu(void)
  154. {
  155. int i, cnt, nproc;
  156. TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2);
  157. /*
  158. * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
  159. * this task is affined to in order to reduce the time spent querying
  160. * unusable CPUs, e.g. if this task is pinned to a small percentage of
  161. * total CPUs.
  162. */
  163. nproc = get_nprocs_conf();
  164. min_cpu = -1;
  165. max_cpu = -1;
  166. cnt = 0;
  167. for (i = 0; i < nproc; i++) {
  168. if (!CPU_ISSET(i, &possible_mask))
  169. continue;
  170. if (min_cpu == -1)
  171. min_cpu = i;
  172. max_cpu = i;
  173. cnt++;
  174. }
  175. __TEST_REQUIRE(cnt >= 2,
  176. "Only one usable CPU, task migration not possible");
  177. }
  178. int main(int argc, char *argv[])
  179. {
  180. int r, i, snapshot;
  181. struct kvm_vm *vm;
  182. struct kvm_vcpu *vcpu;
  183. u32 cpu, rseq_cpu;
  184. /* Tell stdout not to buffer its content */
  185. setbuf(stdout, NULL);
  186. r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
  187. TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
  188. strerror(errno));
  189. calc_min_max_cpu();
  190. r = rseq_register_current_thread();
  191. TEST_ASSERT(!r, "rseq_register_current_thread failed, errno = %d (%s)",
  192. errno, strerror(errno));
  193. /*
  194. * Create and run a dummy VM that immediately exits to userspace via
  195. * GUEST_SYNC, while concurrently migrating the process by setting its
  196. * CPU affinity.
  197. */
  198. vm = vm_create_with_one_vcpu(&vcpu, guest_code);
  199. ucall_init(vm, NULL);
  200. pthread_create(&migration_thread, NULL, migration_worker,
  201. (void *)(unsigned long)syscall(SYS_gettid));
  202. for (i = 0; !done; i++) {
  203. vcpu_run(vcpu);
  204. TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
  205. "Guest failed?");
  206. /*
  207. * Verify rseq's CPU matches sched's CPU. Ensure migration
  208. * doesn't occur between getcpu() and reading the rseq cpu_id
  209. * by rereading both if the sequence count changes, or if the
  210. * count is odd (migration in-progress).
  211. */
  212. do {
  213. /*
  214. * Drop bit 0 to force a mismatch if the count is odd,
  215. * i.e. if a migration is in-progress.
  216. */
  217. snapshot = atomic_read(&seq_cnt) & ~1;
  218. /*
  219. * Ensure calling getcpu() and reading rseq.cpu_id complete
  220. * in a single "no migration" window, i.e. are not reordered
  221. * across the seq_cnt reads.
  222. */
  223. smp_rmb();
  224. sys_getcpu(&cpu);
  225. rseq_cpu = rseq_current_cpu_raw();
  226. smp_rmb();
  227. } while (snapshot != atomic_read(&seq_cnt));
  228. TEST_ASSERT(rseq_cpu == cpu,
  229. "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
  230. }
  231. /*
  232. * Sanity check that the test was able to enter the guest a reasonable
  233. * number of times, e.g. didn't get stalled too often/long waiting for
  234. * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly
  235. * conservative ratio on x86-64, which can do _more_ KVM_RUNs than
  236. * migrations given the 1us+ delay in the migration task.
  237. */
  238. TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
  239. "Only performed %d KVM_RUNs, task stalled too much?\n", i);
  240. pthread_join(migration_thread, NULL);
  241. kvm_vm_free(vm);
  242. rseq_unregister_current_thread();
  243. return 0;
  244. }