perf_trace_counters.h 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * Copyright (c) 2013-2014, 2017, 2021, The Linux Foundation. All rights reserved.
  4. */
  5. #undef TRACE_SYSTEM
  6. #define TRACE_SYSTEM perf_trace_counters
  7. #if !defined(_PERF_TRACE_COUNTERS_H_) || defined(TRACE_HEADER_MULTI_READ)
  8. #define _PERF_TRACE_COUNTERS_H_
  9. /* Ctr index for PMCNTENSET/CLR */
  10. #define CC 0x80000000
  11. #define C0 0x1
  12. #define C1 0x2
  13. #define C2 0x4
  14. #define C3 0x8
  15. #define C4 0x10
  16. #define C5 0x20
  17. #define C_ALL (CC | C0 | C1 | C2 | C3 | C4 | C5)
  18. #define TYPE_MASK 0xFFFF
  19. #define NUM_L1_CTRS 6
  20. #define NUM_AMU_CTRS 3
  21. #include <linux/sched.h>
  22. #include <linux/cpumask.h>
  23. #include <linux/tracepoint.h>
  24. DECLARE_PER_CPU(u32, cntenset_val);
  25. DECLARE_PER_CPU(unsigned long, previous_ccnt);
  26. DECLARE_PER_CPU(unsigned long[NUM_L1_CTRS], previous_l1_cnts);
  27. DECLARE_PER_CPU(unsigned long[NUM_AMU_CTRS], previous_amu_cnts);
  28. #ifdef CREATE_TRACE_POINTS
  29. static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
  30. {
  31. unsigned int state;
  32. #ifdef CONFIG_SCHED_DEBUG
  33. BUG_ON(p != current);
  34. #endif /* CONFIG_SCHED_DEBUG */
  35. /*
  36. * Preemption ignores task state, therefore preempted tasks are always
  37. * RUNNING (we will not have dequeued if state != RUNNING).
  38. */
  39. if (preempt)
  40. return TASK_REPORT_MAX;
  41. /*
  42. * task_state_index() uses fls() and returns a value from 0-8 range.
  43. * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
  44. * it for left shift operation to get the correct task->state
  45. * mapping.
  46. */
  47. state = task_state_index(p);
  48. return state ? (1 << (state - 1)) : state;
  49. }
  50. #endif /* CREATE_TRACE_POINTS */
  51. /* Check the AMU bits to judge AMU implementation in ID_AA64PFR0_EL1 */
  52. #define cpu_has_amu \
  53. cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1), ID_AA64PFR0_EL1_AMU_SHIFT)
  54. TRACE_EVENT(sched_switch_with_ctrs,
  55. TP_PROTO(bool preempt,
  56. struct task_struct *prev,
  57. struct task_struct *next),
  58. TP_ARGS(preempt, prev, next),
  59. TP_STRUCT__entry(
  60. __field(pid_t, prev_pid)
  61. __field(pid_t, next_pid)
  62. __array(char, prev_comm, TASK_COMM_LEN)
  63. __array(char, next_comm, TASK_COMM_LEN)
  64. __field(long, prev_state)
  65. __field(unsigned long, cctr)
  66. __field(unsigned long, ctr0)
  67. __field(unsigned long, ctr1)
  68. __field(unsigned long, ctr2)
  69. __field(unsigned long, ctr3)
  70. __field(unsigned long, ctr4)
  71. __field(unsigned long, ctr5)
  72. __field(unsigned long, amu0)
  73. __field(unsigned long, amu1)
  74. __field(unsigned long, amu2)
  75. ),
  76. TP_fast_assign(
  77. u32 cpu = smp_processor_id();
  78. u32 i;
  79. u32 cnten_val;
  80. unsigned long total_ccnt = 0;
  81. unsigned long total_cnt = 0;
  82. unsigned long amu_cnt = 0;
  83. unsigned long delta_l1_cnts[NUM_L1_CTRS] = {0};
  84. unsigned long delta_amu_cnts[NUM_AMU_CTRS] = {0};
  85. memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
  86. memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
  87. __entry->prev_state = __trace_sched_switch_state(preempt, prev);
  88. __entry->prev_pid = prev->pid;
  89. __entry->next_pid = next->pid;
  90. cnten_val = per_cpu(cntenset_val, cpu);
  91. if (cnten_val & CC) {
  92. /* Read value */
  93. total_ccnt = read_sysreg(pmccntr_el0);
  94. __entry->cctr = total_ccnt -
  95. per_cpu(previous_ccnt, cpu);
  96. per_cpu(previous_ccnt, cpu) = total_ccnt;
  97. }
  98. for (i = 0; i < NUM_L1_CTRS; i++) {
  99. if (cnten_val & (1 << i)) {
  100. /* Select */
  101. write_sysreg(i, pmselr_el0);
  102. isb();
  103. /* Read value */
  104. total_cnt = read_sysreg(pmxevcntr_el0);
  105. delta_l1_cnts[i] = total_cnt -
  106. per_cpu(previous_l1_cnts[i], cpu);
  107. per_cpu(previous_l1_cnts[i], cpu) =
  108. total_cnt;
  109. } else
  110. delta_l1_cnts[i] = 0;
  111. }
  112. if (IS_ENABLED(CONFIG_ARM64_AMU_EXTN) && cpu_has_amu > 0) {
  113. amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_CORE_EL0);
  114. delta_amu_cnts[0] = amu_cnt -
  115. per_cpu(previous_amu_cnts[0], cpu);
  116. per_cpu(previous_amu_cnts[0], cpu) = amu_cnt;
  117. amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_INST_RET_EL0);
  118. delta_amu_cnts[1] = amu_cnt -
  119. per_cpu(previous_amu_cnts[1], cpu);
  120. per_cpu(previous_amu_cnts[1], cpu) = amu_cnt;
  121. amu_cnt = read_sysreg_s(SYS_AMEVCNTR0_MEM_STALL);
  122. delta_amu_cnts[2] = amu_cnt -
  123. per_cpu(previous_amu_cnts[2], cpu);
  124. per_cpu(previous_amu_cnts[2], cpu) = amu_cnt;
  125. }
  126. __entry->ctr0 = delta_l1_cnts[0];
  127. __entry->ctr1 = delta_l1_cnts[1];
  128. __entry->ctr2 = delta_l1_cnts[2];
  129. __entry->ctr3 = delta_l1_cnts[3];
  130. __entry->ctr4 = delta_l1_cnts[4];
  131. __entry->ctr5 = delta_l1_cnts[5];
  132. __entry->amu0 = delta_amu_cnts[0];
  133. __entry->amu1 = delta_amu_cnts[1];
  134. __entry->amu2 = delta_amu_cnts[2];
  135. ),
  136. TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s next_pid=%d CCNTR=%u CTR0=%u CTR1=%u CTR2=%u CTR3=%u CTR4=%u CTR5=%u, CYC: %lu, INST: %lu, STALL: %lu",
  137. __entry->prev_comm, __entry->prev_pid,
  138. (__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
  139. __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
  140. { TASK_INTERRUPTIBLE, "S" },
  141. { TASK_UNINTERRUPTIBLE, "D" },
  142. { __TASK_STOPPED, "T" },
  143. { __TASK_TRACED, "t" },
  144. { EXIT_DEAD, "X" },
  145. { EXIT_ZOMBIE, "Z" },
  146. { TASK_PARKED, "P" },
  147. { TASK_DEAD, "I" }) :
  148. "R",
  149. __entry->prev_state & TASK_REPORT_MAX ? "+" : "",
  150. __entry->next_comm,
  151. __entry->next_pid,
  152. __entry->cctr,
  153. __entry->ctr0, __entry->ctr1,
  154. __entry->ctr2, __entry->ctr3,
  155. __entry->ctr4, __entry->ctr5,
  156. __entry->amu0, __entry->amu1,
  157. __entry->amu2)
  158. );
  159. TRACE_EVENT(sched_switch_ctrs_cfg,
  160. TP_PROTO(int cpu),
  161. TP_ARGS(cpu),
  162. TP_STRUCT__entry(
  163. __field(int, cpu)
  164. __field(unsigned long, ctr0)
  165. __field(unsigned long, ctr1)
  166. __field(unsigned long, ctr2)
  167. __field(unsigned long, ctr3)
  168. __field(unsigned long, ctr4)
  169. __field(unsigned long, ctr5)
  170. ),
  171. TP_fast_assign(
  172. u32 i;
  173. u32 cnten_val;
  174. u32 ctr_type[NUM_L1_CTRS] = {0};
  175. cnten_val = per_cpu(cntenset_val, cpu);
  176. for (i = 0; i < NUM_L1_CTRS; i++) {
  177. if (cnten_val & (1 << i)) {
  178. /* Select */
  179. write_sysreg(i, pmselr_el0);
  180. isb();
  181. /* Read type */
  182. ctr_type[i] = read_sysreg(pmxevtyper_el0)
  183. & TYPE_MASK;
  184. } else
  185. ctr_type[i] = 0;
  186. }
  187. __entry->cpu = cpu;
  188. __entry->ctr0 = ctr_type[0];
  189. __entry->ctr1 = ctr_type[1];
  190. __entry->ctr2 = ctr_type[2];
  191. __entry->ctr3 = ctr_type[3];
  192. __entry->ctr4 = ctr_type[4];
  193. __entry->ctr5 = ctr_type[5];
  194. ),
  195. TP_printk("cpu=%d CTR0=%lu CTR1=%lu CTR2=%lu CTR3=%lu CTR4=%lu CTR5=%lu",
  196. __entry->cpu,
  197. __entry->ctr0, __entry->ctr1,
  198. __entry->ctr2, __entry->ctr3,
  199. __entry->ctr4, __entry->ctr5)
  200. );
  201. #endif
  202. #undef TRACE_INCLUDE_PATH
  203. #define TRACE_INCLUDE_PATH ../../kernel/sched/walt
  204. #undef TRACE_INCLUDE_FILE
  205. #define TRACE_INCLUDE_FILE perf_trace_counters
  206. #include <trace/define_trace.h>