trace.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
  1. #include <nvhe/clock.h>
  2. #include <nvhe/mem_protect.h>
  3. #include <nvhe/mm.h>
  4. #include <nvhe/trace.h>
  5. #include <asm/kvm_mmu.h>
  6. #include <asm/local.h>
  7. #include <linux/ring_buffer.h>
  8. #define HYP_RB_PAGE_HEAD 1UL
  9. #define HYP_RB_PAGE_UPDATE 2UL
  10. #define HYP_RB_FLAG_MASK 3UL
  11. static struct hyp_buffer_pages_backing hyp_buffer_pages_backing;
  12. DEFINE_PER_CPU(struct hyp_rb_per_cpu, trace_rb);
  13. DEFINE_HYP_SPINLOCK(trace_rb_lock);
  14. static bool rb_set_flag(struct hyp_buffer_page *bpage, int new_flag)
  15. {
  16. unsigned long ret, val = (unsigned long)bpage->list.next;
  17. ret = cmpxchg((unsigned long *)&bpage->list.next,
  18. val, (val & ~HYP_RB_FLAG_MASK) | new_flag);
  19. return ret == val;
  20. }
  21. static void rb_set_footer_status(struct hyp_buffer_page *bpage,
  22. unsigned long status,
  23. bool reader)
  24. {
  25. struct buffer_data_page *page = bpage->page;
  26. struct rb_ext_page_footer *footer;
  27. footer = rb_ext_page_get_footer(page);
  28. if (reader)
  29. atomic_set(&footer->reader_status, status);
  30. else
  31. atomic_set(&footer->writer_status, status);
  32. }
  33. static void rb_footer_writer_status(struct hyp_buffer_page *bpage,
  34. unsigned long status)
  35. {
  36. rb_set_footer_status(bpage, status, false);
  37. }
  38. static void rb_footer_reader_status(struct hyp_buffer_page *bpage,
  39. unsigned long status)
  40. {
  41. rb_set_footer_status(bpage, status, true);
  42. }
  43. static struct hyp_buffer_page *rb_hyp_buffer_page(struct list_head *list)
  44. {
  45. unsigned long ptr = (unsigned long)list & ~HYP_RB_FLAG_MASK;
  46. return container_of((struct list_head *)ptr, struct hyp_buffer_page, list);
  47. }
  48. static struct hyp_buffer_page *rb_next_page(struct hyp_buffer_page *bpage)
  49. {
  50. return rb_hyp_buffer_page(bpage->list.next);
  51. }
  52. static bool rb_is_head_page(struct hyp_buffer_page *bpage)
  53. {
  54. return (unsigned long)bpage->list.prev->next & HYP_RB_PAGE_HEAD;
  55. }
  56. static struct hyp_buffer_page *rb_set_head_page(struct hyp_rb_per_cpu *cpu_buffer)
  57. {
  58. struct hyp_buffer_page *bpage, *prev_head;
  59. int cnt = 0;
  60. again:
  61. bpage = prev_head = cpu_buffer->head_page;
  62. do {
  63. if (rb_is_head_page(bpage)) {
  64. cpu_buffer->head_page = bpage;
  65. rb_footer_reader_status(prev_head, 0);
  66. rb_footer_reader_status(bpage, RB_PAGE_FT_HEAD);
  67. return bpage;
  68. }
  69. bpage = rb_next_page(bpage);
  70. } while (bpage != prev_head);
  71. cnt++;
  72. /* We might have race with the writer let's try again */
  73. if (cnt < 3)
  74. goto again;
  75. return NULL;
  76. }
  77. static int rb_swap_reader_page(struct hyp_rb_per_cpu *cpu_buffer)
  78. {
  79. unsigned long *old_head_link, old_link_val, new_link_val, overrun;
  80. struct hyp_buffer_page *head, *reader = cpu_buffer->reader_page;
  81. struct rb_ext_page_footer *footer;
  82. rb_footer_reader_status(cpu_buffer->reader_page, 0);
  83. spin:
  84. /* Update the cpu_buffer->header_page according to HYP_RB_PAGE_HEAD */
  85. head = rb_set_head_page(cpu_buffer);
  86. if (!head)
  87. return -ENODEV;
  88. /* Connect the reader page around the header page */
  89. reader->list.next = head->list.next;
  90. reader->list.prev = head->list.prev;
  91. /* The reader page points to the new header page */
  92. rb_set_flag(reader, HYP_RB_PAGE_HEAD);
  93. /*
  94. * Paired with the cmpxchg in rb_move_tail(). Order the read of the head
  95. * page and overrun.
  96. */
  97. smp_mb();
  98. overrun = atomic_read(&cpu_buffer->overrun);
  99. /* Try to swap the prev head link to the reader page */
  100. old_head_link = (unsigned long *)&reader->list.prev->next;
  101. old_link_val = (*old_head_link & ~HYP_RB_FLAG_MASK) | HYP_RB_PAGE_HEAD;
  102. new_link_val = (unsigned long)&reader->list;
  103. if (cmpxchg(old_head_link, old_link_val, new_link_val)
  104. != old_link_val)
  105. goto spin;
  106. cpu_buffer->head_page = rb_hyp_buffer_page(reader->list.next);
  107. cpu_buffer->head_page->list.prev = &reader->list;
  108. cpu_buffer->reader_page = head;
  109. rb_footer_reader_status(cpu_buffer->reader_page, RB_PAGE_FT_READER);
  110. rb_footer_reader_status(cpu_buffer->head_page, RB_PAGE_FT_HEAD);
  111. footer = rb_ext_page_get_footer(cpu_buffer->reader_page->page);
  112. footer->stats.overrun = overrun;
  113. return 0;
  114. }
  115. static struct hyp_buffer_page *
  116. rb_move_tail(struct hyp_rb_per_cpu *cpu_buffer)
  117. {
  118. struct hyp_buffer_page *tail_page, *new_tail, *new_head;
  119. tail_page = cpu_buffer->tail_page;
  120. new_tail = rb_next_page(tail_page);
  121. again:
  122. /*
  123. * We caught the reader ... Let's try to move the head page.
  124. * The writer can only rely on ->next links to check if this is head.
  125. */
  126. if ((unsigned long)tail_page->list.next & HYP_RB_PAGE_HEAD) {
  127. /* The reader moved the head in between */
  128. if (!rb_set_flag(tail_page, HYP_RB_PAGE_UPDATE))
  129. goto again;
  130. atomic_add(atomic_read(&new_tail->entries), &cpu_buffer->overrun);
  131. /* Move the head */
  132. rb_set_flag(new_tail, HYP_RB_PAGE_HEAD);
  133. /* The new head is in place, reset the update flag */
  134. rb_set_flag(tail_page, 0);
  135. new_head = rb_next_page(new_tail);
  136. }
  137. rb_footer_writer_status(tail_page, 0);
  138. rb_footer_writer_status(new_tail, RB_PAGE_FT_COMMIT);
  139. local_set(&new_tail->page->commit, 0);
  140. atomic_set(&new_tail->write, 0);
  141. atomic_set(&new_tail->entries, 0);
  142. atomic_inc(&cpu_buffer->pages_touched);
  143. cpu_buffer->tail_page = new_tail;
  144. return new_tail;
  145. }
  146. unsigned long rb_event_size(unsigned long length)
  147. {
  148. struct ring_buffer_event *event;
  149. return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
  150. }
  151. static struct ring_buffer_event *
  152. rb_add_ts_extend(struct ring_buffer_event *event, u64 delta)
  153. {
  154. event->type_len = RINGBUF_TYPE_TIME_EXTEND;
  155. event->time_delta = delta & TS_MASK;
  156. event->array[0] = delta >> TS_SHIFT;
  157. return (struct ring_buffer_event *)((unsigned long)event + 8);
  158. }
  159. static struct ring_buffer_event *
  160. rb_reserve_next(struct hyp_rb_per_cpu *cpu_buffer, unsigned long length)
  161. {
  162. unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
  163. struct hyp_buffer_page *tail_page = cpu_buffer->tail_page;
  164. struct ring_buffer_event *event;
  165. unsigned long write, prev_write;
  166. u64 ts, time_delta;
  167. ts = trace_clock();
  168. time_delta = ts - atomic64_read(&cpu_buffer->write_stamp);
  169. if (test_time_stamp(time_delta))
  170. ts_ext_size = 8;
  171. prev_write = atomic_read(&tail_page->write);
  172. write = prev_write + event_size + ts_ext_size;
  173. if (unlikely(write > BUF_EXT_PAGE_SIZE))
  174. tail_page = rb_move_tail(cpu_buffer);
  175. if (!atomic_read(&tail_page->entries)) {
  176. tail_page->page->time_stamp = ts;
  177. time_delta = 0;
  178. ts_ext_size = 0;
  179. write = event_size;
  180. prev_write = 0;
  181. }
  182. atomic_set(&tail_page->write, write);
  183. atomic_inc(&tail_page->entries);
  184. local_set(&tail_page->page->commit, write);
  185. atomic_inc(&cpu_buffer->nr_entries);
  186. atomic64_set(&cpu_buffer->write_stamp, ts);
  187. event = (struct ring_buffer_event *)(tail_page->page->data +
  188. prev_write);
  189. if (ts_ext_size) {
  190. event = rb_add_ts_extend(event, time_delta);
  191. time_delta = 0;
  192. }
  193. event->type_len = 0;
  194. event->time_delta = time_delta;
  195. event->array[0] = event_size - RB_EVNT_HDR_SIZE;
  196. return event;
  197. }
  198. void *
  199. rb_reserve_trace_entry(struct hyp_rb_per_cpu *cpu_buffer, unsigned long length)
  200. {
  201. struct ring_buffer_event *rb_event;
  202. rb_event = rb_reserve_next(cpu_buffer, length);
  203. return &rb_event->array[1];
  204. }
  205. static int rb_update_footers(struct hyp_rb_per_cpu *cpu_buffer)
  206. {
  207. unsigned long entries, pages_touched, overrun;
  208. struct rb_ext_page_footer *footer;
  209. struct buffer_data_page *reader;
  210. if (!rb_set_head_page(cpu_buffer))
  211. return -ENODEV;
  212. reader = cpu_buffer->reader_page->page;
  213. footer = rb_ext_page_get_footer(reader);
  214. entries = atomic_read(&cpu_buffer->nr_entries);
  215. footer->stats.entries = entries;
  216. pages_touched = atomic_read(&cpu_buffer->pages_touched);
  217. footer->stats.pages_touched = pages_touched;
  218. overrun = atomic_read(&cpu_buffer->overrun);
  219. footer->stats.overrun = overrun;
  220. return 0;
  221. }
  222. static int rb_page_init(struct hyp_buffer_page *bpage, unsigned long hva)
  223. {
  224. void *hyp_va = (void *)kern_hyp_va(hva);
  225. int ret;
  226. ret = hyp_pin_shared_mem(hyp_va, hyp_va + PAGE_SIZE);
  227. if (ret)
  228. return ret;
  229. INIT_LIST_HEAD(&bpage->list);
  230. bpage->page = (struct buffer_data_page *)hyp_va;
  231. atomic_set(&bpage->write, 0);
  232. rb_footer_reader_status(bpage, 0);
  233. rb_footer_writer_status(bpage, 0);
  234. return 0;
  235. }
  236. static bool rb_cpu_loaded(struct hyp_rb_per_cpu *cpu_buffer)
  237. {
  238. return cpu_buffer->bpages;
  239. }
  240. static void rb_cpu_disable(struct hyp_rb_per_cpu *cpu_buffer)
  241. {
  242. unsigned int prev_status;
  243. /* Wait for release of the buffer */
  244. do {
  245. /* Paired with __stop_write_hyp_rb */
  246. prev_status = atomic_cmpxchg_acquire(&cpu_buffer->status,
  247. HYP_RB_WRITABLE,
  248. HYP_RB_NONWRITABLE);
  249. } while (prev_status == HYP_RB_WRITING);
  250. if (prev_status == HYP_RB_WRITABLE)
  251. rb_update_footers(cpu_buffer);
  252. }
  253. static int rb_cpu_enable(struct hyp_rb_per_cpu *cpu_buffer)
  254. {
  255. unsigned int prev_status;
  256. if (!rb_cpu_loaded(cpu_buffer))
  257. return -EINVAL;
  258. prev_status = atomic_cmpxchg(&cpu_buffer->status,
  259. HYP_RB_NONWRITABLE, HYP_RB_WRITABLE);
  260. if (prev_status == HYP_RB_NONWRITABLE)
  261. return 0;
  262. return -EINVAL;
  263. }
  264. static void rb_cpu_teardown(struct hyp_rb_per_cpu *cpu_buffer)
  265. {
  266. int i;
  267. if (!rb_cpu_loaded(cpu_buffer))
  268. return;
  269. rb_cpu_disable(cpu_buffer);
  270. for (i = 0; i < cpu_buffer->nr_pages; i++) {
  271. struct hyp_buffer_page *bpage = &cpu_buffer->bpages[i];
  272. if (!bpage->page)
  273. continue;
  274. hyp_unpin_shared_mem((void *)bpage->page,
  275. (void *)bpage->page + PAGE_SIZE);
  276. }
  277. cpu_buffer->bpages = NULL;
  278. }
  279. static bool rb_cpu_fits_backing(unsigned long nr_pages,
  280. struct hyp_buffer_page *start)
  281. {
  282. unsigned long max = hyp_buffer_pages_backing.start +
  283. hyp_buffer_pages_backing.size;
  284. struct hyp_buffer_page *end = start + nr_pages;
  285. return (unsigned long)end <= max;
  286. }
  287. static bool rb_cpu_fits_pack(struct ring_buffer_pack *rb_pack,
  288. unsigned long pack_end)
  289. {
  290. unsigned long *end;
  291. /* Check we can at least read nr_pages */
  292. if ((unsigned long)&rb_pack->nr_pages >= pack_end)
  293. return false;
  294. end = &rb_pack->page_va[rb_pack->nr_pages];
  295. return (unsigned long)end <= pack_end;
  296. }
  297. static int rb_cpu_init(struct ring_buffer_pack *rb_pack, struct hyp_buffer_page *start,
  298. struct hyp_rb_per_cpu *cpu_buffer)
  299. {
  300. struct hyp_buffer_page *bpage = start;
  301. int i, ret;
  302. if (!rb_pack->nr_pages ||
  303. !rb_cpu_fits_backing(rb_pack->nr_pages + 1, start))
  304. return -EINVAL;
  305. memset(cpu_buffer, 0, sizeof(*cpu_buffer));
  306. cpu_buffer->bpages = start;
  307. cpu_buffer->nr_pages = rb_pack->nr_pages + 1;
  308. /* The reader page is not part of the ring initially */
  309. ret = rb_page_init(bpage, rb_pack->reader_page_va);
  310. if (ret)
  311. return ret;
  312. cpu_buffer->reader_page = bpage;
  313. cpu_buffer->tail_page = bpage + 1;
  314. cpu_buffer->head_page = bpage + 1;
  315. for (i = 0; i < rb_pack->nr_pages; i++) {
  316. ret = rb_page_init(++bpage, rb_pack->page_va[i]);
  317. if (ret)
  318. goto err;
  319. bpage->list.next = &(bpage + 1)->list;
  320. bpage->list.prev = &(bpage - 1)->list;
  321. }
  322. /* Close the ring */
  323. bpage->list.next = &cpu_buffer->tail_page->list;
  324. cpu_buffer->tail_page->list.prev = &bpage->list;
  325. /* The last init'ed page points to the head page */
  326. rb_set_flag(bpage, HYP_RB_PAGE_HEAD);
  327. rb_footer_reader_status(cpu_buffer->reader_page, RB_PAGE_FT_READER);
  328. rb_footer_reader_status(cpu_buffer->head_page, RB_PAGE_FT_HEAD);
  329. rb_footer_writer_status(cpu_buffer->head_page, RB_PAGE_FT_COMMIT);
  330. atomic_set(&cpu_buffer->overrun, 0);
  331. atomic64_set(&cpu_buffer->write_stamp, 0);
  332. return 0;
  333. err:
  334. rb_cpu_teardown(cpu_buffer);
  335. return ret;
  336. }
  337. static int rb_setup_bpage_backing(struct hyp_trace_pack *pack)
  338. {
  339. unsigned long start = kern_hyp_va(pack->backing.start);
  340. size_t size = pack->backing.size;
  341. int ret;
  342. if (hyp_buffer_pages_backing.size)
  343. return -EBUSY;
  344. if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size))
  345. return -EINVAL;
  346. ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn((void *)start), size >> PAGE_SHIFT);
  347. if (ret)
  348. return ret;
  349. memset((void *)start, 0, size);
  350. hyp_buffer_pages_backing.start = start;
  351. hyp_buffer_pages_backing.size = size;
  352. return 0;
  353. }
  354. static void rb_teardown_bpage_backing(void)
  355. {
  356. unsigned long start = hyp_buffer_pages_backing.start;
  357. size_t size = hyp_buffer_pages_backing.size;
  358. if (!size)
  359. return;
  360. memset((void *)start, 0, size);
  361. WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(start), size >> PAGE_SHIFT));
  362. hyp_buffer_pages_backing.start = 0;
  363. hyp_buffer_pages_backing.size = 0;
  364. }
  365. int __pkvm_rb_update_footers(int cpu)
  366. {
  367. struct hyp_rb_per_cpu *cpu_buffer;
  368. int ret = 0;
  369. if (cpu >= hyp_nr_cpus)
  370. return -EINVAL;
  371. /* TODO: per-CPU lock for */
  372. hyp_spin_lock(&trace_rb_lock);
  373. cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
  374. if (!rb_cpu_loaded(cpu_buffer))
  375. ret = -ENODEV;
  376. else
  377. ret = rb_update_footers(cpu_buffer);
  378. hyp_spin_unlock(&trace_rb_lock);
  379. return ret;
  380. }
  381. int __pkvm_rb_swap_reader_page(int cpu)
  382. {
  383. struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
  384. int ret = 0;
  385. if (cpu >= hyp_nr_cpus)
  386. return -EINVAL;
  387. /* TODO: per-CPU lock for */
  388. hyp_spin_lock(&trace_rb_lock);
  389. cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
  390. if (!rb_cpu_loaded(cpu_buffer))
  391. ret = -ENODEV;
  392. else
  393. ret = rb_swap_reader_page(cpu_buffer);
  394. hyp_spin_unlock(&trace_rb_lock);
  395. return ret;
  396. }
  397. static void __pkvm_teardown_tracing_locked(void)
  398. {
  399. int cpu;
  400. hyp_assert_lock_held(&trace_rb_lock);
  401. for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
  402. struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
  403. rb_cpu_teardown(cpu_buffer);
  404. }
  405. rb_teardown_bpage_backing();
  406. }
  407. void __pkvm_teardown_tracing(void)
  408. {
  409. hyp_spin_lock(&trace_rb_lock);
  410. __pkvm_teardown_tracing_locked();
  411. hyp_spin_unlock(&trace_rb_lock);
  412. }
  413. int __pkvm_load_tracing(unsigned long pack_hva, size_t pack_size)
  414. {
  415. struct hyp_trace_pack *pack = (struct hyp_trace_pack *)kern_hyp_va(pack_hva);
  416. struct trace_buffer_pack *trace_pack = &pack->trace_buffer_pack;
  417. struct hyp_buffer_page *bpage_backing_start;
  418. struct ring_buffer_pack *rb_pack;
  419. int ret, cpu;
  420. if (!pack_size || !PAGE_ALIGNED(pack_hva) || !PAGE_ALIGNED(pack_size))
  421. return -EINVAL;
  422. ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn((void *)pack),
  423. pack_size >> PAGE_SHIFT);
  424. if (ret)
  425. return ret;
  426. hyp_spin_lock(&trace_rb_lock);
  427. ret = rb_setup_bpage_backing(pack);
  428. if (ret)
  429. goto err;
  430. trace_clock_update(&pack->trace_clock_data);
  431. bpage_backing_start = (struct hyp_buffer_page *)hyp_buffer_pages_backing.start;
  432. for_each_ring_buffer_pack(rb_pack, cpu, trace_pack) {
  433. struct hyp_rb_per_cpu *cpu_buffer;
  434. int cpu;
  435. ret = -EINVAL;
  436. if (!rb_cpu_fits_pack(rb_pack, pack_hva + pack_size))
  437. break;
  438. cpu = rb_pack->cpu;
  439. if (cpu >= hyp_nr_cpus)
  440. break;
  441. cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
  442. ret = rb_cpu_init(rb_pack, bpage_backing_start, cpu_buffer);
  443. if (ret)
  444. break;
  445. /* reader page + nr pages in rb */
  446. bpage_backing_start += 1 + rb_pack->nr_pages;
  447. }
  448. err:
  449. if (ret)
  450. __pkvm_teardown_tracing_locked();
  451. hyp_spin_unlock(&trace_rb_lock);
  452. WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn((void *)pack),
  453. pack_size >> PAGE_SHIFT));
  454. return ret;
  455. }
  456. int __pkvm_enable_tracing(bool enable)
  457. {
  458. int cpu, ret = enable ? -EINVAL : 0;
  459. hyp_spin_lock(&trace_rb_lock);
  460. for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
  461. struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
  462. if (enable) {
  463. int __ret = rb_cpu_enable(cpu_buffer);
  464. if (!__ret)
  465. ret = 0;
  466. } else {
  467. rb_cpu_disable(cpu_buffer);
  468. }
  469. }
  470. hyp_spin_unlock(&trace_rb_lock);
  471. return ret;
  472. }