123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349 |
- // SPDX-License-Identifier: GPL-2.0
- /* Copyright 2019 Collabora Ltd */
- #include <linux/completion.h>
- #include <linux/iopoll.h>
- #include <linux/iosys-map.h>
- #include <linux/pm_runtime.h>
- #include <linux/slab.h>
- #include <linux/uaccess.h>
- #include <drm/drm_file.h>
- #include <drm/drm_gem_shmem_helper.h>
- #include <drm/panfrost_drm.h>
- #include "panfrost_device.h"
- #include "panfrost_features.h"
- #include "panfrost_gem.h"
- #include "panfrost_issues.h"
- #include "panfrost_job.h"
- #include "panfrost_mmu.h"
- #include "panfrost_perfcnt.h"
- #include "panfrost_regs.h"
- #define COUNTERS_PER_BLOCK 64
- #define BYTES_PER_COUNTER 4
- #define BLOCKS_PER_COREGROUP 8
- #define V4_SHADERS_PER_COREGROUP 4
- struct panfrost_perfcnt {
- struct panfrost_gem_mapping *mapping;
- size_t bosize;
- void *buf;
- struct panfrost_file_priv *user;
- struct mutex lock;
- struct completion dump_comp;
- };
- void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev)
- {
- complete(&pfdev->perfcnt->dump_comp);
- }
- void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
- {
- gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES);
- }
- static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
- {
- u64 gpuva;
- int ret;
- reinit_completion(&pfdev->perfcnt->dump_comp);
- gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT;
- gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva));
- gpu_write(pfdev, GPU_PERFCNT_BASE_HI, upper_32_bits(gpuva));
- gpu_write(pfdev, GPU_INT_CLEAR,
- GPU_IRQ_CLEAN_CACHES_COMPLETED |
- GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
- gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE);
- ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp,
- msecs_to_jiffies(1000));
- if (!ret)
- ret = -ETIMEDOUT;
- else if (ret > 0)
- ret = 0;
- return ret;
- }
- static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
- struct drm_file *file_priv,
- unsigned int counterset)
- {
- struct panfrost_file_priv *user = file_priv->driver_priv;
- struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
- struct iosys_map map;
- struct drm_gem_shmem_object *bo;
- u32 cfg, as;
- int ret;
- if (user == perfcnt->user)
- return 0;
- else if (perfcnt->user)
- return -EBUSY;
- ret = pm_runtime_get_sync(pfdev->dev);
- if (ret < 0)
- goto err_put_pm;
- bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize);
- if (IS_ERR(bo)) {
- ret = PTR_ERR(bo);
- goto err_put_pm;
- }
- /* Map the perfcnt buf in the address space attached to file_priv. */
- ret = panfrost_gem_open(&bo->base, file_priv);
- if (ret)
- goto err_put_bo;
- perfcnt->mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base),
- user);
- if (!perfcnt->mapping) {
- ret = -EINVAL;
- goto err_close_bo;
- }
- ret = drm_gem_shmem_vmap(bo, &map);
- if (ret)
- goto err_put_mapping;
- perfcnt->buf = map.vaddr;
- /*
- * Invalidate the cache and clear the counters to start from a fresh
- * state.
- */
- reinit_completion(&pfdev->perfcnt->dump_comp);
- gpu_write(pfdev, GPU_INT_CLEAR,
- GPU_IRQ_CLEAN_CACHES_COMPLETED |
- GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
- gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR);
- gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES);
- ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp,
- msecs_to_jiffies(1000));
- if (!ret) {
- ret = -ETIMEDOUT;
- goto err_vunmap;
- }
- perfcnt->user = user;
- as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu);
- cfg = GPU_PERFCNT_CFG_AS(as) |
- GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
- /*
- * Bifrost GPUs have 2 set of counters, but we're only interested by
- * the first one for now.
- */
- if (panfrost_model_is_bifrost(pfdev))
- cfg |= GPU_PERFCNT_CFG_SETSEL(counterset);
- gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff);
- gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff);
- gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff);
- /*
- * Due to PRLAM-8186 we need to disable the Tiler before we enable HW
- * counters.
- */
- if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
- gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
- else
- gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
- gpu_write(pfdev, GPU_PERFCNT_CFG, cfg);
- if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
- gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
- /* The BO ref is retained by the mapping. */
- drm_gem_object_put(&bo->base);
- return 0;
- err_vunmap:
- drm_gem_shmem_vunmap(bo, &map);
- err_put_mapping:
- panfrost_gem_mapping_put(perfcnt->mapping);
- err_close_bo:
- panfrost_gem_close(&bo->base, file_priv);
- err_put_bo:
- drm_gem_object_put(&bo->base);
- err_put_pm:
- pm_runtime_put(pfdev->dev);
- return ret;
- }
- static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
- struct drm_file *file_priv)
- {
- struct panfrost_file_priv *user = file_priv->driver_priv;
- struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
- struct iosys_map map = IOSYS_MAP_INIT_VADDR(perfcnt->buf);
- if (user != perfcnt->user)
- return -EINVAL;
- gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0);
- gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0);
- gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0);
- gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
- gpu_write(pfdev, GPU_PERFCNT_CFG,
- GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
- perfcnt->user = NULL;
- drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base, &map);
- perfcnt->buf = NULL;
- panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv);
- panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
- panfrost_gem_mapping_put(perfcnt->mapping);
- perfcnt->mapping = NULL;
- pm_runtime_mark_last_busy(pfdev->dev);
- pm_runtime_put_autosuspend(pfdev->dev);
- return 0;
- }
- int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
- struct drm_file *file_priv)
- {
- struct panfrost_device *pfdev = dev->dev_private;
- struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
- struct drm_panfrost_perfcnt_enable *req = data;
- int ret;
- ret = panfrost_unstable_ioctl_check();
- if (ret)
- return ret;
- /* Only Bifrost GPUs have 2 set of counters. */
- if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0))
- return -EINVAL;
- mutex_lock(&perfcnt->lock);
- if (req->enable)
- ret = panfrost_perfcnt_enable_locked(pfdev, file_priv,
- req->counterset);
- else
- ret = panfrost_perfcnt_disable_locked(pfdev, file_priv);
- mutex_unlock(&perfcnt->lock);
- return ret;
- }
- int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
- struct drm_file *file_priv)
- {
- struct panfrost_device *pfdev = dev->dev_private;
- struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
- struct drm_panfrost_perfcnt_dump *req = data;
- void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr;
- int ret;
- ret = panfrost_unstable_ioctl_check();
- if (ret)
- return ret;
- mutex_lock(&perfcnt->lock);
- if (perfcnt->user != file_priv->driver_priv) {
- ret = -EINVAL;
- goto out;
- }
- ret = panfrost_perfcnt_dump_locked(pfdev);
- if (ret)
- goto out;
- if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize))
- ret = -EFAULT;
- out:
- mutex_unlock(&perfcnt->lock);
- return ret;
- }
- void panfrost_perfcnt_close(struct drm_file *file_priv)
- {
- struct panfrost_file_priv *pfile = file_priv->driver_priv;
- struct panfrost_device *pfdev = pfile->pfdev;
- struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
- pm_runtime_get_sync(pfdev->dev);
- mutex_lock(&perfcnt->lock);
- if (perfcnt->user == pfile)
- panfrost_perfcnt_disable_locked(pfdev, file_priv);
- mutex_unlock(&perfcnt->lock);
- pm_runtime_mark_last_busy(pfdev->dev);
- pm_runtime_put_autosuspend(pfdev->dev);
- }
- int panfrost_perfcnt_init(struct panfrost_device *pfdev)
- {
- struct panfrost_perfcnt *perfcnt;
- size_t size;
- if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) {
- unsigned int ncoregroups;
- ncoregroups = hweight64(pfdev->features.l2_present);
- size = ncoregroups * BLOCKS_PER_COREGROUP *
- COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
- } else {
- unsigned int nl2c, ncores;
- /*
- * TODO: define a macro to extract the number of l2 caches from
- * mem_features.
- */
- nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1;
- /*
- * shader_present might be sparse, but the counters layout
- * forces to dump unused regions too, hence the fls64() call
- * instead of hweight64().
- */
- ncores = fls64(pfdev->features.shader_present);
- /*
- * There's always one JM and one Tiler block, hence the '+ 2'
- * here.
- */
- size = (nl2c + ncores + 2) *
- COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
- }
- perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL);
- if (!perfcnt)
- return -ENOMEM;
- perfcnt->bosize = size;
- /* Start with everything disabled. */
- gpu_write(pfdev, GPU_PERFCNT_CFG,
- GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
- gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
- gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
- gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
- gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
- init_completion(&perfcnt->dump_comp);
- mutex_init(&perfcnt->lock);
- pfdev->perfcnt = perfcnt;
- return 0;
- }
- void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
- {
- /* Disable everything before leaving. */
- gpu_write(pfdev, GPU_PERFCNT_CFG,
- GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
- gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
- gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
- gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
- gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
- }
|