panfrost_perfcnt.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* Copyright 2019 Collabora Ltd */
  3. #include <linux/completion.h>
  4. #include <linux/iopoll.h>
  5. #include <linux/iosys-map.h>
  6. #include <linux/pm_runtime.h>
  7. #include <linux/slab.h>
  8. #include <linux/uaccess.h>
  9. #include <drm/drm_file.h>
  10. #include <drm/drm_gem_shmem_helper.h>
  11. #include <drm/panfrost_drm.h>
  12. #include "panfrost_device.h"
  13. #include "panfrost_features.h"
  14. #include "panfrost_gem.h"
  15. #include "panfrost_issues.h"
  16. #include "panfrost_job.h"
  17. #include "panfrost_mmu.h"
  18. #include "panfrost_perfcnt.h"
  19. #include "panfrost_regs.h"
  20. #define COUNTERS_PER_BLOCK 64
  21. #define BYTES_PER_COUNTER 4
  22. #define BLOCKS_PER_COREGROUP 8
  23. #define V4_SHADERS_PER_COREGROUP 4
  24. struct panfrost_perfcnt {
  25. struct panfrost_gem_mapping *mapping;
  26. size_t bosize;
  27. void *buf;
  28. struct panfrost_file_priv *user;
  29. struct mutex lock;
  30. struct completion dump_comp;
  31. };
  32. void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev)
  33. {
  34. complete(&pfdev->perfcnt->dump_comp);
  35. }
  36. void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
  37. {
  38. gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES);
  39. }
  40. static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
  41. {
  42. u64 gpuva;
  43. int ret;
  44. reinit_completion(&pfdev->perfcnt->dump_comp);
  45. gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT;
  46. gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva));
  47. gpu_write(pfdev, GPU_PERFCNT_BASE_HI, upper_32_bits(gpuva));
  48. gpu_write(pfdev, GPU_INT_CLEAR,
  49. GPU_IRQ_CLEAN_CACHES_COMPLETED |
  50. GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
  51. gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE);
  52. ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp,
  53. msecs_to_jiffies(1000));
  54. if (!ret)
  55. ret = -ETIMEDOUT;
  56. else if (ret > 0)
  57. ret = 0;
  58. return ret;
  59. }
  60. static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
  61. struct drm_file *file_priv,
  62. unsigned int counterset)
  63. {
  64. struct panfrost_file_priv *user = file_priv->driver_priv;
  65. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  66. struct iosys_map map;
  67. struct drm_gem_shmem_object *bo;
  68. u32 cfg, as;
  69. int ret;
  70. if (user == perfcnt->user)
  71. return 0;
  72. else if (perfcnt->user)
  73. return -EBUSY;
  74. ret = pm_runtime_get_sync(pfdev->dev);
  75. if (ret < 0)
  76. goto err_put_pm;
  77. bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize);
  78. if (IS_ERR(bo)) {
  79. ret = PTR_ERR(bo);
  80. goto err_put_pm;
  81. }
  82. /* Map the perfcnt buf in the address space attached to file_priv. */
  83. ret = panfrost_gem_open(&bo->base, file_priv);
  84. if (ret)
  85. goto err_put_bo;
  86. perfcnt->mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base),
  87. user);
  88. if (!perfcnt->mapping) {
  89. ret = -EINVAL;
  90. goto err_close_bo;
  91. }
  92. ret = drm_gem_shmem_vmap(bo, &map);
  93. if (ret)
  94. goto err_put_mapping;
  95. perfcnt->buf = map.vaddr;
  96. /*
  97. * Invalidate the cache and clear the counters to start from a fresh
  98. * state.
  99. */
  100. reinit_completion(&pfdev->perfcnt->dump_comp);
  101. gpu_write(pfdev, GPU_INT_CLEAR,
  102. GPU_IRQ_CLEAN_CACHES_COMPLETED |
  103. GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
  104. gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR);
  105. gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES);
  106. ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp,
  107. msecs_to_jiffies(1000));
  108. if (!ret) {
  109. ret = -ETIMEDOUT;
  110. goto err_vunmap;
  111. }
  112. perfcnt->user = user;
  113. as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu);
  114. cfg = GPU_PERFCNT_CFG_AS(as) |
  115. GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
  116. /*
  117. * Bifrost GPUs have 2 set of counters, but we're only interested by
  118. * the first one for now.
  119. */
  120. if (panfrost_model_is_bifrost(pfdev))
  121. cfg |= GPU_PERFCNT_CFG_SETSEL(counterset);
  122. gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff);
  123. gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff);
  124. gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff);
  125. /*
  126. * Due to PRLAM-8186 we need to disable the Tiler before we enable HW
  127. * counters.
  128. */
  129. if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
  130. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
  131. else
  132. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
  133. gpu_write(pfdev, GPU_PERFCNT_CFG, cfg);
  134. if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
  135. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
  136. /* The BO ref is retained by the mapping. */
  137. drm_gem_object_put(&bo->base);
  138. return 0;
  139. err_vunmap:
  140. drm_gem_shmem_vunmap(bo, &map);
  141. err_put_mapping:
  142. panfrost_gem_mapping_put(perfcnt->mapping);
  143. err_close_bo:
  144. panfrost_gem_close(&bo->base, file_priv);
  145. err_put_bo:
  146. drm_gem_object_put(&bo->base);
  147. err_put_pm:
  148. pm_runtime_put(pfdev->dev);
  149. return ret;
  150. }
  151. static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
  152. struct drm_file *file_priv)
  153. {
  154. struct panfrost_file_priv *user = file_priv->driver_priv;
  155. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  156. struct iosys_map map = IOSYS_MAP_INIT_VADDR(perfcnt->buf);
  157. if (user != perfcnt->user)
  158. return -EINVAL;
  159. gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0);
  160. gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0);
  161. gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0);
  162. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
  163. gpu_write(pfdev, GPU_PERFCNT_CFG,
  164. GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
  165. perfcnt->user = NULL;
  166. drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base, &map);
  167. perfcnt->buf = NULL;
  168. panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv);
  169. panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
  170. panfrost_gem_mapping_put(perfcnt->mapping);
  171. perfcnt->mapping = NULL;
  172. pm_runtime_mark_last_busy(pfdev->dev);
  173. pm_runtime_put_autosuspend(pfdev->dev);
  174. return 0;
  175. }
  176. int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
  177. struct drm_file *file_priv)
  178. {
  179. struct panfrost_device *pfdev = dev->dev_private;
  180. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  181. struct drm_panfrost_perfcnt_enable *req = data;
  182. int ret;
  183. ret = panfrost_unstable_ioctl_check();
  184. if (ret)
  185. return ret;
  186. /* Only Bifrost GPUs have 2 set of counters. */
  187. if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0))
  188. return -EINVAL;
  189. mutex_lock(&perfcnt->lock);
  190. if (req->enable)
  191. ret = panfrost_perfcnt_enable_locked(pfdev, file_priv,
  192. req->counterset);
  193. else
  194. ret = panfrost_perfcnt_disable_locked(pfdev, file_priv);
  195. mutex_unlock(&perfcnt->lock);
  196. return ret;
  197. }
  198. int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
  199. struct drm_file *file_priv)
  200. {
  201. struct panfrost_device *pfdev = dev->dev_private;
  202. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  203. struct drm_panfrost_perfcnt_dump *req = data;
  204. void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr;
  205. int ret;
  206. ret = panfrost_unstable_ioctl_check();
  207. if (ret)
  208. return ret;
  209. mutex_lock(&perfcnt->lock);
  210. if (perfcnt->user != file_priv->driver_priv) {
  211. ret = -EINVAL;
  212. goto out;
  213. }
  214. ret = panfrost_perfcnt_dump_locked(pfdev);
  215. if (ret)
  216. goto out;
  217. if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize))
  218. ret = -EFAULT;
  219. out:
  220. mutex_unlock(&perfcnt->lock);
  221. return ret;
  222. }
  223. void panfrost_perfcnt_close(struct drm_file *file_priv)
  224. {
  225. struct panfrost_file_priv *pfile = file_priv->driver_priv;
  226. struct panfrost_device *pfdev = pfile->pfdev;
  227. struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
  228. pm_runtime_get_sync(pfdev->dev);
  229. mutex_lock(&perfcnt->lock);
  230. if (perfcnt->user == pfile)
  231. panfrost_perfcnt_disable_locked(pfdev, file_priv);
  232. mutex_unlock(&perfcnt->lock);
  233. pm_runtime_mark_last_busy(pfdev->dev);
  234. pm_runtime_put_autosuspend(pfdev->dev);
  235. }
  236. int panfrost_perfcnt_init(struct panfrost_device *pfdev)
  237. {
  238. struct panfrost_perfcnt *perfcnt;
  239. size_t size;
  240. if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) {
  241. unsigned int ncoregroups;
  242. ncoregroups = hweight64(pfdev->features.l2_present);
  243. size = ncoregroups * BLOCKS_PER_COREGROUP *
  244. COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
  245. } else {
  246. unsigned int nl2c, ncores;
  247. /*
  248. * TODO: define a macro to extract the number of l2 caches from
  249. * mem_features.
  250. */
  251. nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1;
  252. /*
  253. * shader_present might be sparse, but the counters layout
  254. * forces to dump unused regions too, hence the fls64() call
  255. * instead of hweight64().
  256. */
  257. ncores = fls64(pfdev->features.shader_present);
  258. /*
  259. * There's always one JM and one Tiler block, hence the '+ 2'
  260. * here.
  261. */
  262. size = (nl2c + ncores + 2) *
  263. COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
  264. }
  265. perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL);
  266. if (!perfcnt)
  267. return -ENOMEM;
  268. perfcnt->bosize = size;
  269. /* Start with everything disabled. */
  270. gpu_write(pfdev, GPU_PERFCNT_CFG,
  271. GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
  272. gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
  273. gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
  274. gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
  275. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
  276. init_completion(&perfcnt->dump_comp);
  277. mutex_init(&perfcnt->lock);
  278. pfdev->perfcnt = perfcnt;
  279. return 0;
  280. }
  281. void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
  282. {
  283. /* Disable everything before leaving. */
  284. gpu_write(pfdev, GPU_PERFCNT_CFG,
  285. GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
  286. gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
  287. gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
  288. gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
  289. gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
  290. }