mmap.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <sys/mman.h>
  3. #include <inttypes.h>
  4. #include <asm/bug.h>
  5. #include <errno.h>
  6. #include <string.h>
  7. #include <linux/ring_buffer.h>
  8. #include <linux/perf_event.h>
  9. #include <perf/mmap.h>
  10. #include <perf/event.h>
  11. #include <perf/evsel.h>
  12. #include <internal/mmap.h>
  13. #include <internal/lib.h>
  14. #include <linux/kernel.h>
  15. #include <linux/math64.h>
  16. #include <linux/stringify.h>
  17. #include "internal.h"
  18. void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
  19. bool overwrite, libperf_unmap_cb_t unmap_cb)
  20. {
  21. map->fd = -1;
  22. map->overwrite = overwrite;
  23. map->unmap_cb = unmap_cb;
  24. refcount_set(&map->refcnt, 0);
  25. if (prev)
  26. prev->next = map;
  27. }
  28. size_t perf_mmap__mmap_len(struct perf_mmap *map)
  29. {
  30. return map->mask + 1 + page_size;
  31. }
  32. int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
  33. int fd, struct perf_cpu cpu)
  34. {
  35. map->prev = 0;
  36. map->mask = mp->mask;
  37. map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
  38. MAP_SHARED, fd, 0);
  39. if (map->base == MAP_FAILED) {
  40. map->base = NULL;
  41. return -1;
  42. }
  43. map->fd = fd;
  44. map->cpu = cpu;
  45. return 0;
  46. }
  47. void perf_mmap__munmap(struct perf_mmap *map)
  48. {
  49. if (map && map->base != NULL) {
  50. munmap(map->base, perf_mmap__mmap_len(map));
  51. map->base = NULL;
  52. map->fd = -1;
  53. refcount_set(&map->refcnt, 0);
  54. }
  55. if (map && map->unmap_cb)
  56. map->unmap_cb(map);
  57. }
  58. void perf_mmap__get(struct perf_mmap *map)
  59. {
  60. refcount_inc(&map->refcnt);
  61. }
  62. void perf_mmap__put(struct perf_mmap *map)
  63. {
  64. BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
  65. if (refcount_dec_and_test(&map->refcnt))
  66. perf_mmap__munmap(map);
  67. }
  68. static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
  69. {
  70. ring_buffer_write_tail(md->base, tail);
  71. }
  72. u64 perf_mmap__read_head(struct perf_mmap *map)
  73. {
  74. return ring_buffer_read_head(map->base);
  75. }
  76. static bool perf_mmap__empty(struct perf_mmap *map)
  77. {
  78. struct perf_event_mmap_page *pc = map->base;
  79. return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
  80. }
  81. void perf_mmap__consume(struct perf_mmap *map)
  82. {
  83. if (!map->overwrite) {
  84. u64 old = map->prev;
  85. perf_mmap__write_tail(map, old);
  86. }
  87. if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
  88. perf_mmap__put(map);
  89. }
  90. static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
  91. {
  92. struct perf_event_header *pheader;
  93. u64 evt_head = *start;
  94. int size = mask + 1;
  95. pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
  96. pheader = (struct perf_event_header *)(buf + (*start & mask));
  97. while (true) {
  98. if (evt_head - *start >= (unsigned int)size) {
  99. pr_debug("Finished reading overwrite ring buffer: rewind\n");
  100. if (evt_head - *start > (unsigned int)size)
  101. evt_head -= pheader->size;
  102. *end = evt_head;
  103. return 0;
  104. }
  105. pheader = (struct perf_event_header *)(buf + (evt_head & mask));
  106. if (pheader->size == 0) {
  107. pr_debug("Finished reading overwrite ring buffer: get start\n");
  108. *end = evt_head;
  109. return 0;
  110. }
  111. evt_head += pheader->size;
  112. pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
  113. }
  114. WARN_ONCE(1, "Shouldn't get here\n");
  115. return -1;
  116. }
  117. /*
  118. * Report the start and end of the available data in ringbuffer
  119. */
  120. static int __perf_mmap__read_init(struct perf_mmap *md)
  121. {
  122. u64 head = perf_mmap__read_head(md);
  123. u64 old = md->prev;
  124. unsigned char *data = md->base + page_size;
  125. unsigned long size;
  126. md->start = md->overwrite ? head : old;
  127. md->end = md->overwrite ? old : head;
  128. if ((md->end - md->start) < md->flush)
  129. return -EAGAIN;
  130. size = md->end - md->start;
  131. if (size > (unsigned long)(md->mask) + 1) {
  132. if (!md->overwrite) {
  133. WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
  134. md->prev = head;
  135. perf_mmap__consume(md);
  136. return -EAGAIN;
  137. }
  138. /*
  139. * Backward ring buffer is full. We still have a chance to read
  140. * most of data from it.
  141. */
  142. if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
  143. return -EINVAL;
  144. }
  145. return 0;
  146. }
  147. int perf_mmap__read_init(struct perf_mmap *map)
  148. {
  149. /*
  150. * Check if event was unmapped due to a POLLHUP/POLLERR.
  151. */
  152. if (!refcount_read(&map->refcnt))
  153. return -ENOENT;
  154. return __perf_mmap__read_init(map);
  155. }
  156. /*
  157. * Mandatory for overwrite mode
  158. * The direction of overwrite mode is backward.
  159. * The last perf_mmap__read() will set tail to map->core.prev.
  160. * Need to correct the map->core.prev to head which is the end of next read.
  161. */
  162. void perf_mmap__read_done(struct perf_mmap *map)
  163. {
  164. /*
  165. * Check if event was unmapped due to a POLLHUP/POLLERR.
  166. */
  167. if (!refcount_read(&map->refcnt))
  168. return;
  169. map->prev = perf_mmap__read_head(map);
  170. }
  171. /* When check_messup is true, 'end' must points to a good entry */
  172. static union perf_event *perf_mmap__read(struct perf_mmap *map,
  173. u64 *startp, u64 end)
  174. {
  175. unsigned char *data = map->base + page_size;
  176. union perf_event *event = NULL;
  177. int diff = end - *startp;
  178. if (diff >= (int)sizeof(event->header)) {
  179. size_t size;
  180. event = (union perf_event *)&data[*startp & map->mask];
  181. size = event->header.size;
  182. if (size < sizeof(event->header) || diff < (int)size)
  183. return NULL;
  184. /*
  185. * Event straddles the mmap boundary -- header should always
  186. * be inside due to u64 alignment of output.
  187. */
  188. if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
  189. unsigned int offset = *startp;
  190. unsigned int len = min(sizeof(*event), size), cpy;
  191. void *dst = map->event_copy;
  192. do {
  193. cpy = min(map->mask + 1 - (offset & map->mask), len);
  194. memcpy(dst, &data[offset & map->mask], cpy);
  195. offset += cpy;
  196. dst += cpy;
  197. len -= cpy;
  198. } while (len);
  199. event = (union perf_event *)map->event_copy;
  200. }
  201. *startp += size;
  202. }
  203. return event;
  204. }
  205. /*
  206. * Read event from ring buffer one by one.
  207. * Return one event for each call.
  208. *
  209. * Usage:
  210. * perf_mmap__read_init()
  211. * while(event = perf_mmap__read_event()) {
  212. * //process the event
  213. * perf_mmap__consume()
  214. * }
  215. * perf_mmap__read_done()
  216. */
  217. union perf_event *perf_mmap__read_event(struct perf_mmap *map)
  218. {
  219. union perf_event *event;
  220. /*
  221. * Check if event was unmapped due to a POLLHUP/POLLERR.
  222. */
  223. if (!refcount_read(&map->refcnt))
  224. return NULL;
  225. /* non-overwirte doesn't pause the ringbuffer */
  226. if (!map->overwrite)
  227. map->end = perf_mmap__read_head(map);
  228. event = perf_mmap__read(map, &map->start, map->end);
  229. if (!map->overwrite)
  230. map->prev = map->start;
  231. return event;
  232. }
  233. #if defined(__i386__) || defined(__x86_64__)
  234. static u64 read_perf_counter(unsigned int counter)
  235. {
  236. unsigned int low, high;
  237. asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
  238. return low | ((u64)high) << 32;
  239. }
  240. static u64 read_timestamp(void)
  241. {
  242. unsigned int low, high;
  243. asm volatile("rdtsc" : "=a" (low), "=d" (high));
  244. return low | ((u64)high) << 32;
  245. }
  246. #elif defined(__aarch64__)
  247. #define read_sysreg(r) ({ \
  248. u64 __val; \
  249. asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
  250. __val; \
  251. })
  252. static u64 read_pmccntr(void)
  253. {
  254. return read_sysreg(pmccntr_el0);
  255. }
  256. #define PMEVCNTR_READ(idx) \
  257. static u64 read_pmevcntr_##idx(void) { \
  258. return read_sysreg(pmevcntr##idx##_el0); \
  259. }
  260. PMEVCNTR_READ(0);
  261. PMEVCNTR_READ(1);
  262. PMEVCNTR_READ(2);
  263. PMEVCNTR_READ(3);
  264. PMEVCNTR_READ(4);
  265. PMEVCNTR_READ(5);
  266. PMEVCNTR_READ(6);
  267. PMEVCNTR_READ(7);
  268. PMEVCNTR_READ(8);
  269. PMEVCNTR_READ(9);
  270. PMEVCNTR_READ(10);
  271. PMEVCNTR_READ(11);
  272. PMEVCNTR_READ(12);
  273. PMEVCNTR_READ(13);
  274. PMEVCNTR_READ(14);
  275. PMEVCNTR_READ(15);
  276. PMEVCNTR_READ(16);
  277. PMEVCNTR_READ(17);
  278. PMEVCNTR_READ(18);
  279. PMEVCNTR_READ(19);
  280. PMEVCNTR_READ(20);
  281. PMEVCNTR_READ(21);
  282. PMEVCNTR_READ(22);
  283. PMEVCNTR_READ(23);
  284. PMEVCNTR_READ(24);
  285. PMEVCNTR_READ(25);
  286. PMEVCNTR_READ(26);
  287. PMEVCNTR_READ(27);
  288. PMEVCNTR_READ(28);
  289. PMEVCNTR_READ(29);
  290. PMEVCNTR_READ(30);
  291. /*
  292. * Read a value direct from PMEVCNTR<idx>
  293. */
  294. static u64 read_perf_counter(unsigned int counter)
  295. {
  296. static u64 (* const read_f[])(void) = {
  297. read_pmevcntr_0,
  298. read_pmevcntr_1,
  299. read_pmevcntr_2,
  300. read_pmevcntr_3,
  301. read_pmevcntr_4,
  302. read_pmevcntr_5,
  303. read_pmevcntr_6,
  304. read_pmevcntr_7,
  305. read_pmevcntr_8,
  306. read_pmevcntr_9,
  307. read_pmevcntr_10,
  308. read_pmevcntr_11,
  309. read_pmevcntr_13,
  310. read_pmevcntr_12,
  311. read_pmevcntr_14,
  312. read_pmevcntr_15,
  313. read_pmevcntr_16,
  314. read_pmevcntr_17,
  315. read_pmevcntr_18,
  316. read_pmevcntr_19,
  317. read_pmevcntr_20,
  318. read_pmevcntr_21,
  319. read_pmevcntr_22,
  320. read_pmevcntr_23,
  321. read_pmevcntr_24,
  322. read_pmevcntr_25,
  323. read_pmevcntr_26,
  324. read_pmevcntr_27,
  325. read_pmevcntr_28,
  326. read_pmevcntr_29,
  327. read_pmevcntr_30,
  328. read_pmccntr
  329. };
  330. if (counter < ARRAY_SIZE(read_f))
  331. return (read_f[counter])();
  332. return 0;
  333. }
  334. static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
  335. #else
  336. static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
  337. static u64 read_timestamp(void) { return 0; }
  338. #endif
  339. int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
  340. {
  341. struct perf_event_mmap_page *pc = map->base;
  342. u32 seq, idx, time_mult = 0, time_shift = 0;
  343. u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
  344. if (!pc || !pc->cap_user_rdpmc)
  345. return -1;
  346. do {
  347. seq = READ_ONCE(pc->lock);
  348. barrier();
  349. count->ena = READ_ONCE(pc->time_enabled);
  350. count->run = READ_ONCE(pc->time_running);
  351. if (pc->cap_user_time && count->ena != count->run) {
  352. cyc = read_timestamp();
  353. time_mult = READ_ONCE(pc->time_mult);
  354. time_shift = READ_ONCE(pc->time_shift);
  355. time_offset = READ_ONCE(pc->time_offset);
  356. if (pc->cap_user_time_short) {
  357. time_cycles = READ_ONCE(pc->time_cycles);
  358. time_mask = READ_ONCE(pc->time_mask);
  359. }
  360. }
  361. idx = READ_ONCE(pc->index);
  362. cnt = READ_ONCE(pc->offset);
  363. if (pc->cap_user_rdpmc && idx) {
  364. s64 evcnt = read_perf_counter(idx - 1);
  365. u16 width = READ_ONCE(pc->pmc_width);
  366. evcnt <<= 64 - width;
  367. evcnt >>= 64 - width;
  368. cnt += evcnt;
  369. } else
  370. return -1;
  371. barrier();
  372. } while (READ_ONCE(pc->lock) != seq);
  373. if (count->ena != count->run) {
  374. u64 delta;
  375. /* Adjust for cap_usr_time_short, a nop if not */
  376. cyc = time_cycles + ((cyc - time_cycles) & time_mask);
  377. delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
  378. count->ena += delta;
  379. if (idx)
  380. count->run += delta;
  381. }
  382. count->val = cnt;
  383. return 0;
  384. }