msm_perf.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2013 Red Hat
  4. * Author: Rob Clark <[email protected]>
  5. */
  6. /* For profiling, userspace can:
  7. *
  8. * tail -f /sys/kernel/debug/dri/<minor>/gpu
  9. *
  10. * This will enable performance counters/profiling to track the busy time
  11. * and any gpu specific performance counters that are supported.
  12. */
  13. #ifdef CONFIG_DEBUG_FS
  14. #include <linux/debugfs.h>
  15. #include <linux/uaccess.h>
  16. #include <drm/drm_file.h>
  17. #include "msm_drv.h"
  18. #include "msm_gpu.h"
  19. struct msm_perf_state {
  20. struct drm_device *dev;
  21. bool open;
  22. int cnt;
  23. struct mutex read_lock;
  24. char buf[256];
  25. int buftot, bufpos;
  26. unsigned long next_jiffies;
  27. };
  28. #define SAMPLE_TIME (HZ/4)
  29. /* wait for next sample time: */
  30. static int wait_sample(struct msm_perf_state *perf)
  31. {
  32. unsigned long start_jiffies = jiffies;
  33. if (time_after(perf->next_jiffies, start_jiffies)) {
  34. unsigned long remaining_jiffies =
  35. perf->next_jiffies - start_jiffies;
  36. int ret = schedule_timeout_interruptible(remaining_jiffies);
  37. if (ret > 0) {
  38. /* interrupted */
  39. return -ERESTARTSYS;
  40. }
  41. }
  42. perf->next_jiffies += SAMPLE_TIME;
  43. return 0;
  44. }
  45. static int refill_buf(struct msm_perf_state *perf)
  46. {
  47. struct msm_drm_private *priv = perf->dev->dev_private;
  48. struct msm_gpu *gpu = priv->gpu;
  49. char *ptr = perf->buf;
  50. int rem = sizeof(perf->buf);
  51. int i, n;
  52. if ((perf->cnt++ % 32) == 0) {
  53. /* Header line: */
  54. n = snprintf(ptr, rem, "%%BUSY");
  55. ptr += n;
  56. rem -= n;
  57. for (i = 0; i < gpu->num_perfcntrs; i++) {
  58. const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
  59. n = snprintf(ptr, rem, "\t%s", perfcntr->name);
  60. ptr += n;
  61. rem -= n;
  62. }
  63. } else {
  64. /* Sample line: */
  65. uint32_t activetime = 0, totaltime = 0;
  66. uint32_t cntrs[5];
  67. uint32_t val;
  68. int ret;
  69. /* sleep until next sample time: */
  70. ret = wait_sample(perf);
  71. if (ret)
  72. return ret;
  73. ret = msm_gpu_perfcntr_sample(gpu, &activetime, &totaltime,
  74. ARRAY_SIZE(cntrs), cntrs);
  75. if (ret < 0)
  76. return ret;
  77. val = totaltime ? 1000 * activetime / totaltime : 0;
  78. n = snprintf(ptr, rem, "%3d.%d%%", val / 10, val % 10);
  79. ptr += n;
  80. rem -= n;
  81. for (i = 0; i < ret; i++) {
  82. /* cycle counters (I think).. convert to MHz.. */
  83. val = cntrs[i] / 10000;
  84. n = snprintf(ptr, rem, "\t%5d.%02d",
  85. val / 100, val % 100);
  86. ptr += n;
  87. rem -= n;
  88. }
  89. }
  90. n = snprintf(ptr, rem, "\n");
  91. ptr += n;
  92. rem -= n;
  93. perf->bufpos = 0;
  94. perf->buftot = ptr - perf->buf;
  95. return 0;
  96. }
  97. static ssize_t perf_read(struct file *file, char __user *buf,
  98. size_t sz, loff_t *ppos)
  99. {
  100. struct msm_perf_state *perf = file->private_data;
  101. int n = 0, ret = 0;
  102. mutex_lock(&perf->read_lock);
  103. if (perf->bufpos >= perf->buftot) {
  104. ret = refill_buf(perf);
  105. if (ret)
  106. goto out;
  107. }
  108. n = min((int)sz, perf->buftot - perf->bufpos);
  109. if (copy_to_user(buf, &perf->buf[perf->bufpos], n)) {
  110. ret = -EFAULT;
  111. goto out;
  112. }
  113. perf->bufpos += n;
  114. *ppos += n;
  115. out:
  116. mutex_unlock(&perf->read_lock);
  117. if (ret)
  118. return ret;
  119. return n;
  120. }
  121. static int perf_open(struct inode *inode, struct file *file)
  122. {
  123. struct msm_perf_state *perf = inode->i_private;
  124. struct drm_device *dev = perf->dev;
  125. struct msm_drm_private *priv = dev->dev_private;
  126. struct msm_gpu *gpu = priv->gpu;
  127. int ret = 0;
  128. if (!gpu)
  129. return -ENODEV;
  130. mutex_lock(&gpu->lock);
  131. if (perf->open) {
  132. ret = -EBUSY;
  133. goto out;
  134. }
  135. file->private_data = perf;
  136. perf->open = true;
  137. perf->cnt = 0;
  138. perf->buftot = 0;
  139. perf->bufpos = 0;
  140. msm_gpu_perfcntr_start(gpu);
  141. perf->next_jiffies = jiffies + SAMPLE_TIME;
  142. out:
  143. mutex_unlock(&gpu->lock);
  144. return ret;
  145. }
  146. static int perf_release(struct inode *inode, struct file *file)
  147. {
  148. struct msm_perf_state *perf = inode->i_private;
  149. struct msm_drm_private *priv = perf->dev->dev_private;
  150. msm_gpu_perfcntr_stop(priv->gpu);
  151. perf->open = false;
  152. return 0;
  153. }
  154. static const struct file_operations perf_debugfs_fops = {
  155. .owner = THIS_MODULE,
  156. .open = perf_open,
  157. .read = perf_read,
  158. .llseek = no_llseek,
  159. .release = perf_release,
  160. };
  161. int msm_perf_debugfs_init(struct drm_minor *minor)
  162. {
  163. struct msm_drm_private *priv = minor->dev->dev_private;
  164. struct msm_perf_state *perf;
  165. /* only create on first minor: */
  166. if (priv->perf)
  167. return 0;
  168. perf = kzalloc(sizeof(*perf), GFP_KERNEL);
  169. if (!perf)
  170. return -ENOMEM;
  171. perf->dev = minor->dev;
  172. mutex_init(&perf->read_lock);
  173. priv->perf = perf;
  174. debugfs_create_file("perf", S_IFREG | S_IRUGO, minor->debugfs_root,
  175. perf, &perf_debugfs_fops);
  176. return 0;
  177. }
  178. void msm_perf_debugfs_cleanup(struct msm_drm_private *priv)
  179. {
  180. struct msm_perf_state *perf = priv->perf;
  181. if (!perf)
  182. return;
  183. priv->perf = NULL;
  184. mutex_destroy(&perf->read_lock);
  185. kfree(perf);
  186. }
  187. #endif