Merge tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux

Pull drm updates from Dave Airlie: "This is the main drm pull request for v4.15. Core: - Atomic object lifetime fixes - Atomic iterator improvements - Sparse/smatch fixes - Legacy kms ioctls to be interruptible - EDID override improvements - fb/gem helper cleanups - Simple outreachy patches - Documentation improvements - Fix dma-buf rcu races - DRM mode object leasing for improving VR use cases. - vgaarb improvements for non-x86 platforms. New driver: - tve200: Faraday Technology TVE200 block. This "TV Encoder" encodes a ITU-T BT.656 stream and can be found in the StorLink SL3516 (later Cortina Systems CS3516) as well as the Grain Media GM8180. New bridges: - SiI9234 support New panels: - S6E63J0X03, OTM8009A, Seiko 43WVF1G, 7" rpi touch panel, Toshiba LT089AC19000, Innolux AT043TN24 i915: - Remove Coffeelake from alpha support - Cannonlake workarounds - Infoframe refactoring for DisplayPort - VBT updates - DisplayPort vswing/emph/buffer translation refactoring - CCS fixes - Restore GPU clock boost on missed vblanks - Scatter list updates for userptr allocations - Gen9+ transition watermarks - Display IPC (Isochronous Priority Control) - Private PAT management - GVT: improved error handling and pci config sanitizing - Execlist refactoring - Transparent Huge Page support - User defined priorities support - HuC/GuC firmware refactoring - DP MST fixes - eDP power sequencing fixes - Use RCU instead of stop_machine - PSR state tracking support - Eviction fixes - BDW DP aux channel timeout fixes - LSPCON fixes - Cannonlake PLL fixes amdgpu: - Per VM BO support - Powerplay cleanups - CI powerplay support - PASID mgr for kfd - SR-IOV fixes - initial GPU reset for vega10 - Prime mmap support - TTM updates - Clock query interface for Raven - Fence to handle ioctl - UVD encode ring support on Polaris - Transparent huge page DMA support - Compute LRU pipe tweaks - BO flag to allow buffers to opt out of implicit sync - CTX priority setting API - VRAM lost infrastructure plumbing qxl: - fix flicker since atomic rework amdkfd: - Further improvements from internal AMD tree - Usermode events - Drop radeon support nouveau: - Pascal temperature sensor support - Improved BAR2 handling - MMU rework to support Pascal MMU exynos: - Improved HDMI/mixer support - HDMI audio interface support tegra: - Prep work for tegra186 - Cleanup/fixes msm: - Preemption support for a5xx - Display fixes for 8x96 (snapdragon 820) - Async cursor plane fixes - FW loading rework - GPU debugging improvements vc4: - Prep for DSI panels - fix T-format tiling scanout - New madvise ioctl Rockchip: - LVDS support omapdrm: - omap4 HDMI CEC support etnaviv: - GPU performance counters groundwork sun4i: - refactor driver load + TCON backend - HDMI improvements - A31 support - Misc fixes udl: - Probe/EDID read fixes. tilcdc: - Misc fixes. pl111: - Support more variants adv7511: - Improve EDID handling. - HDMI CEC support sii8620: - Add remote control support" * tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux: (1480 commits) drm/rockchip: analogix_dp: Use mutex rather than spinlock drm/mode_object: fix documentation for object lookups. drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU drm/i915: Move init_clock_gating() back to where it was drm/i915: Prune the reservation shared fence array drm/i915: Idle the GPU before shinking everything drm/i915: Lock llist_del_first() vs llist_del_all() drm/i915: Calculate ironlake intermediate watermarks correctly, v2. drm/i915: Disable lazy PPGTT page table optimization for vGPU drm/i915/execlists: Remove the priority "optimisation" drm/i915: Filter out spurious execlists context-switch interrupts drm/amdgpu: use irq-safe lock for kiq->ring_lock drm/amdgpu: bypass lru touch for KIQ ring submission drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories() drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs() drm/amd/powerplay: initialize a variable before using it drm/amd/powerplay: suppress KASAN out of bounds warning in vega10_populate_all_memory_levels drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition drm/vblank: Tune drm_crtc_accurate_vblank_count() WARN down to a debug drm/rockchip: add CONFIG_OF dependency for lvds ...
2017-11-15 20:42:10 -08:00
commit e60e1ee606
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -25,6 +25,7 @@
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
+#include "etnaviv_perfmon.h"
 #include "common.xml.h"
 #include "state.xml.h"
 #include "state_hi.xml.h"
@@ -420,9 +421,10 @@ static void etnaviv_gpu_update_clock(struct etnaviv_gpu *gpu)
 			     gpu->base_rate_shader >> gpu->freq_scale);
 	} else {
 		unsigned int fscale = 1 << (6 - gpu->freq_scale);
-		u32 clock = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
-			    VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(fscale);
+		u32 clock = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);

+		clock &= ~VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__MASK;
+		clock |= VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(fscale);
 		etnaviv_gpu_load_clock(gpu, clock);
 	}
 }
@@ -433,24 +435,14 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 	unsigned long timeout;
 	bool failed = true;

-	/* TODO
-	 *
-	 * - clock gating
-	 * - puls eater
-	 * - what about VG?
-	 */
-
 	/* We hope that the GPU resets in under one second */
 	timeout = jiffies + msecs_to_jiffies(1000);

 	while (time_is_after_jiffies(timeout)) {
 		/* enable clock */
-		etnaviv_gpu_update_clock(gpu);
-
-		control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
-
-		/* Wait for stable clock.  Vivante's code waited for 1ms */
-		usleep_range(1000, 10000);
+		unsigned int fscale = 1 << (6 - gpu->freq_scale);
+		control = VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(fscale);
+		etnaviv_gpu_load_clock(gpu, control);

 		/* isolate the GPU. */
 		control |= VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU;
@@ -461,7 +453,7 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);

 		/* wait for reset. */
-		msleep(1);
+		usleep_range(10, 20);

 		/* reset soft reset bit. */
 		control &= ~VIVS_HI_CLOCK_CONTROL_SOFT_RESET;
@@ -490,6 +482,10 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 			continue;
 		}

+		/* disable debug registers, as they are not normally needed */
+		control |= VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
 		failed = false;
 		break;
 	}
@@ -721,7 +717,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}

 	/* Create buffer: */
-	gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0);
+	gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0, 0);
 	if (!gpu->buffer) {
 		ret = -ENOMEM;
 		dev_err(gpu->dev, "could not create command buffer\n");
@@ -739,10 +735,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	/* Setup event management */
 	spin_lock_init(&gpu->event_spinlock);
 	init_completion(&gpu->event_free);
-	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
-		gpu->event[i].used = false;
+	bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS);
+	for (i = 0; i < ARRAY_SIZE(gpu->event); i++)
 		complete(&gpu->event_free);
-	}

 	/* Now program the hardware */
 	mutex_lock(&gpu->lock);
@@ -926,7 +921,7 @@ static void recover_worker(struct work_struct *work)
 	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
 					       recover_work);
 	unsigned long flags;
-	unsigned int i;
+	unsigned int i = 0;

 	dev_err(gpu->dev, "hangcheck recover!\n");

@@ -945,14 +940,12 @@ static void recover_worker(struct work_struct *work)

 	/* complete all events, the GPU won't do it after the reset */
 	spin_lock_irqsave(&gpu->event_spinlock, flags);
-	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
-		if (!gpu->event[i].used)
-			continue;
+	for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS) {
 		dma_fence_signal(gpu->event[i].fence);
 		gpu->event[i].fence = NULL;
-		gpu->event[i].used = false;
 		complete(&gpu->event_free);
 	}
+	bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS);
 	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
 	gpu->completed_fence = gpu->active_fence;

@@ -1140,30 +1133,45 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
 * event management:
 */

-static unsigned int event_alloc(struct etnaviv_gpu *gpu)
+static int event_alloc(struct etnaviv_gpu *gpu, unsigned nr_events,
+	unsigned int *events)
 {
-	unsigned long ret, flags;
-	unsigned int i, event = ~0U;
+	unsigned long flags, timeout = msecs_to_jiffies(10 * 10000);
+	unsigned i, acquired = 0;

-	ret = wait_for_completion_timeout(&gpu->event_free,
-					  msecs_to_jiffies(10 * 10000));
-	if (!ret)
-		dev_err(gpu->dev, "wait_for_completion_timeout failed");
+	for (i = 0; i < nr_events; i++) {
+		unsigned long ret;
+
+		ret = wait_for_completion_timeout(&gpu->event_free, timeout);
+
+		if (!ret) {
+			dev_err(gpu->dev, "wait_for_completion_timeout failed");
+			goto out;
+		}
+
+		acquired++;
+		timeout = ret;
+	}

 	spin_lock_irqsave(&gpu->event_spinlock, flags);

-	/* find first free event */
-	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
-		if (gpu->event[i].used == false) {
-			gpu->event[i].used = true;
-			event = i;
-			break;
-		}
+	for (i = 0; i < nr_events; i++) {
+		int event = find_first_zero_bit(gpu->event_bitmap, ETNA_NR_EVENTS);
+
+		events[i] = event;
+		memset(&gpu->event[event], 0, sizeof(struct etnaviv_event));
+		set_bit(event, gpu->event_bitmap);
 	}

 	spin_unlock_irqrestore(&gpu->event_spinlock, flags);

-	return event;
+	return 0;
+
+out:
+	for (i = 0; i < acquired; i++)
+		complete(&gpu->event_free);
+
+	return -EBUSY;
 }

 static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
@@ -1172,12 +1180,12 @@ static void event_free(struct etnaviv_gpu *gpu, unsigned int event)

 	spin_lock_irqsave(&gpu->event_spinlock, flags);

-	if (gpu->event[event].used == false) {
+	if (!test_bit(event, gpu->event_bitmap)) {
 		dev_warn(gpu->dev, "event %u is already marked as free",
 			 event);
 		spin_unlock_irqrestore(&gpu->event_spinlock, flags);
 	} else {
-		gpu->event[event].used = false;
+		clear_bit(event, gpu->event_bitmap);
 		spin_unlock_irqrestore(&gpu->event_spinlock, flags);

 		complete(&gpu->event_free);
@@ -1311,12 +1319,71 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
 	pm_runtime_put_autosuspend(gpu->dev);
 }

+static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu,
+	struct etnaviv_event *event, unsigned int flags)
+{
+	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
+	unsigned int i;
+
+	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
+		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
+
+		if (pmr->flags == flags)
+			etnaviv_perfmon_process(gpu, pmr);
+	}
+}
+
+static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
+	struct etnaviv_event *event)
+{
+	u32 val;
+
+	/* disable clock gating */
+	val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
+	val &= ~VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
+	gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val);
+
+	/* enable debug register */
+	val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
+	val &= ~VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS;
+	gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val);
+
+	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE);
+}
+
+static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
+	struct etnaviv_event *event)
+{
+	const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf;
+	unsigned int i;
+	u32 val;
+
+	sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST);
+
+	for (i = 0; i < cmdbuf->nr_pmrs; i++) {
+		const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i;
+
+		*pmr->bo_vma = pmr->sequence;
+	}
+
+	/* disable debug register */
+	val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
+	val |= VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS;
+	gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val);
+
+	/* enable clock gating */
+	val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
+	val |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
+	gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val);
+}
+
+
 /* add bo's to gpu's ring, and kick gpu: */
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
 {
 	struct dma_fence *fence;
-	unsigned int event, i;
+	unsigned int i, nr_events = 1, event[3];
 	int ret;

 	ret = etnaviv_gpu_pm_get_sync(gpu);
@@ -1332,10 +1399,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	 *
 	 */

-	event = event_alloc(gpu);
-	if (unlikely(event == ~0U)) {
-		DRM_ERROR("no free event\n");
-		ret = -EBUSY;
+	/*
+	 * if there are performance monitor requests we need to have
+	 * - a sync point to re-configure gpu and process ETNA_PM_PROCESS_PRE
+	 *   requests.
+	 * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests
+	 *   and update the sequence number for userspace.
+	 */
+	if (cmdbuf->nr_pmrs)
+		nr_events = 3;
+
+	ret = event_alloc(gpu, nr_events, event);
+	if (ret) {
+		DRM_ERROR("no free events\n");
 		goto out_pm_put;
 	}

@@ -1343,12 +1419,14 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,

 	fence = etnaviv_gpu_fence_alloc(gpu);
 	if (!fence) {
-		event_free(gpu, event);
+		for (i = 0; i < nr_events; i++)
+			event_free(gpu, event[i]);
+
 		ret = -ENOMEM;
 		goto out_unlock;
 	}

-	gpu->event[event].fence = fence;
+	gpu->event[event[0]].fence = fence;
 	submit->fence = dma_fence_get(fence);
 	gpu->active_fence = submit->fence->seqno;

@@ -1358,7 +1436,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 		gpu->lastctx = cmdbuf->ctx;
 	}

-	etnaviv_buffer_queue(gpu, event, cmdbuf);
+	if (cmdbuf->nr_pmrs) {
+		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
+		gpu->event[event[1]].cmdbuf = cmdbuf;
+		etnaviv_sync_point_queue(gpu, event[1]);
+	}
+
+	etnaviv_buffer_queue(gpu, event[0], cmdbuf);
+
+	if (cmdbuf->nr_pmrs) {
+		gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
+		gpu->event[event[2]].cmdbuf = cmdbuf;
+		etnaviv_sync_point_queue(gpu, event[2]);
+	}

 	cmdbuf->fence = fence;
 	list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
@@ -1394,6 +1484,24 @@ out_pm_put:
 	return ret;
 }

+static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu,
+	struct etnaviv_event *event)
+{
+	u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+
+	event->sync_point(gpu, event);
+	etnaviv_gpu_start_fe(gpu, addr + 2, 2);
+}
+
+static void sync_point_worker(struct work_struct *work)
+{
+	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
+					       sync_point_work);
+
+	etnaviv_process_sync_point(gpu, &gpu->event[gpu->sync_point_event]);
+	event_free(gpu, gpu->sync_point_event);
+}
+
 /*
 * Init/Cleanup:
 */
@@ -1440,7 +1548,15 @@ static irqreturn_t irq_handler(int irq, void *data)

 			dev_dbg(gpu->dev, "event %u\n", event);

+			if (gpu->event[event].sync_point) {
+				gpu->sync_point_event = event;
+				etnaviv_queue_work(gpu->drm, &gpu->sync_point_work);
+			}
+
 			fence = gpu->event[event].fence;
+			if (!fence)
+				continue;
+
 			gpu->event[event].fence = NULL;
 			dma_fence_signal(fence);

@@ -1645,6 +1761,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,

 	INIT_LIST_HEAD(&gpu->active_cmd_list);
 	INIT_WORK(&gpu->retire_work, retire_worker);
+	INIT_WORK(&gpu->sync_point_work, sync_point_worker);
 	INIT_WORK(&gpu->recover_work, recover_worker);
 	init_waitqueue_head(&gpu->fence_event);