Merge tag 'drm-next-2019-09-18' of git://anongit.freedesktop.org/drm/drm

Pull drm updates from Dave Airlie: "This is the main pull request for 5.4-rc1 merge window. I don't think there is anything outstanding so next week should just be fixes, but we'll see if I missed anything. I landed some fixes earlier in the week but got delayed writing summary and sending it out, due to a mix of sick kid and jetlag! There are some fixes pending, but I'd rather get the main merge out of the way instead of delaying it longer. It's also pretty large in commit count and new amd header file size. The largest thing is four new amdgpu products (navi12/14, arcturus and renoir APU support). Otherwise it's pretty much lots of work across the board, i915 has started landing tigerlake support, lots of icelake fixes and lots of locking reworking for future gpu support, lots of header file rework (drmP.h is nearly gone), some old legacy hacks (DRM_WAIT_ON) have been put into the places they are needed. uapi: - content protection type property for HDCP core: - rework include dependencies - lots of drmP.h removals - link rate calculation robustness fix - make fb helper map only when required - add connector->DDC adapter link - DRM_WAIT_ON removed - drop DRM_AUTH usage from drivers dma-buf: - reservation object fence helper dma-fence: - shrink dma_fence struct - merge signal functions - store timestamps in dma_fence - selftests ttm: - embed drm_get_object struct into ttm_buffer_object - release_notify callback bridges: - sii902x - audio graph card support - tc358767 - aux data handling rework - ti-snd64dsi86 - debugfs support, DSI mode flags support panels: - Support for GiantPlus GPM940B0, Sharp LQ070Y3DG3B, Ortustech COM37H3M, Novatek NT39016, Sharp LS020B1DD01D, Raydium RM67191, Boe Himax8279d, Sharp LD-D5116Z01B - TI nspire, NEC NL8048HL11, LG Philips LB035Q02, Sharp LS037V7DW01, Sony ACX565AKM, Toppoly TD028TTEC1 Toppoly TD043MTEA1 i915: - Initial tigerlake platform support - Locking simplification work, general all over refactoring. - Selftests - HDCP debug info improvements - DSI properties - Icelake display PLL fixes, colorspace fixes, bandwidth fixes, DSI suspend/resume - GuC fixes - Perf fixes - ElkhartLake enablement - DP MST fixes - GVT - command parser enhancements amdgpu: - add wipe memory on release flag for buffer creation - Navi12/14 support (may be marked experimental) - Arcturus support - Renoir APU support - mclk DPM for Navi - DC display fixes - Raven scatter/gather support - RAS support for GFX - Navi12 + Arcturus power features - GPU reset for Picasso - smu11 i2c controller support amdkfd: - navi12/14 support - Arcturus support radeon: - kexec fix nouveau: - improved display color management - detect lack of GPU power cables vmwgfx: - evicition priority support - remove unused security feature msm: - msm8998 display support - better async commit support for cursor updates etnaviv: - per-process address space support - performance counter fixes - softpin support mcde: - DCS transfers fix exynos: - drmP.h cleanup lima: - reduce logging kirin: - misc clenaups komeda: - dual-link support - DT memory regions hisilicon: - misc fixes imx: - IPUv3 image converter fixes - 32-bit RGB V4L2 pixel format support ingenic: - more support for panel related cases mgag200: - cursor support fix panfrost: - export GPU features register to userspace - gpu heap allocations - per-fd address space support pl111: - CLD pads wiring support removed from DT rockchip: - rework to use DRM PSR helpers - fix bug in VOP_WIN_GET macro - DSI DT binding rework sun4i: - improve support for color encoding and range - DDC enabled GPIO tinydrm: - rework SPI support - improve MIPI-DBI support - moved to drm/tiny vkms: - rework CRC tracking dw-hdmi: - get_eld and i2s improvements gm12u320: - misc fixes meson: - global code cleanup - vpu feature detect omap: - alpha/pixel blend mode properties rcar-du: - misc fixes" * tag 'drm-next-2019-09-18' of git://anongit.freedesktop.org/drm/drm: (2112 commits) drm/nouveau/bar/gm20b: Avoid BAR1 teardown during init drm/nouveau: Fix ordering between TTM and GEM release drm/nouveau/prime: Extend DMA reservation object lock drm/nouveau: Fix fallout from reservation object rework drm/nouveau/kms/nv50-: Don't create MSTMs for eDP connectors drm/i915: Use NOEVICT for first pass on attemping to pin a GGTT mmap drm/i915: to make vgpu ppgtt notificaiton as atomic operation drm/i915: Flush the existing fence before GGTT read/write drm/i915: Hold irq-off for the entire fake lock period drm/i915/gvt: update RING_START reg of vGPU when the context is submitted to i915 drm/i915/gvt: update vgpu workload head pointer correctly drm/mcde: Fix DSI transfers drm/msm: Use the correct dma_sync calls harder drm/msm: remove unlikely() from WARN_ON() conditions drm/msm/dsi: Fix return value check for clk_get_parent drm/msm: add atomic traces drm/msm/dpu: async commit support drm/msm: async commit support drm/msm: split power control from prepare/complete_commit drm/msm: add kms->flush_commit() ...
2019-09-19 16:24:24 -07:00
parent 3c2edc36a7 945b584c94
commit 574cc45397
1621 changed files with 262709 additions and 38616 deletions
--- a/drivers/gpu/drm/i915/gem/Makefile
+++ b/drivers/gpu/drm/i915/gem/Makefile
@@ -1 +1,5 @@
-include $(src)/Makefile.header-test # Extra header tests
+# For building individual subdir files on the command line
+subdir-ccflags-y += -I$(srctree)/$(src)/..
+
+# Extra header tests
+header-test-pattern-$(CONFIG_DRM_I915_WERROR) := *.h
--- a/drivers/gpu/drm/i915/gem/Makefile.header-test
+++ b/drivers/gpu/drm/i915/gem/Makefile.header-test
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: MIT
-# Copyright © 2019 Intel Corporation
-
-# Test the headers are compilable as standalone units
-header_test := $(notdir $(wildcard $(src)/*.h))
-
-quiet_cmd_header_test = HDRTEST $@
-      cmd_header_test = echo "\#include \"$(<F)\"" > $@
-
-header_test_%.c: %.h
-	$(call cmd,header_test)
-
-extra-$(CONFIG_DRM_I915_WERROR) += \
-	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
-
-clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -82,7 +82,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	struct reservation_object_list *list;
+	struct dma_resv_list *list;
 	unsigned int seq;
 	int err;

@@ -105,7 +105,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 * Alternatively, we can trade that extra information on read/write
 	 * activity with
 	 *	args->busy =
-	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
+	 *		!dma_resv_test_signaled_rcu(obj->resv, true);
 	 * to report the overall busyness. This is what the wait-ioctl does.
 	 *
 	 */
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -8,87 +8,67 @@

 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
-
-static DEFINE_SPINLOCK(clflush_lock);
+#include "i915_sw_fence_work.h"
+#include "i915_trace.h"

 struct clflush {
-	struct dma_fence dma; /* Must be first for dma_fence_free() */
-	struct i915_sw_fence wait;
-	struct work_struct work;
+	struct dma_fence_work base;
 	struct drm_i915_gem_object *obj;
 };

-static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
-{
-	return DRIVER_NAME;
-}
-
-static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
-{
-	return "clflush";
-}
-
-static void i915_clflush_release(struct dma_fence *fence)
-{
-	struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
-
-	i915_sw_fence_fini(&clflush->wait);
-
-	BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
-	dma_fence_free(&clflush->dma);
-}
-
-static const struct dma_fence_ops i915_clflush_ops = {
-	.get_driver_name = i915_clflush_get_driver_name,
-	.get_timeline_name = i915_clflush_get_timeline_name,
-	.release = i915_clflush_release,
-};
-
-static void __i915_do_clflush(struct drm_i915_gem_object *obj)
+static void __do_clflush(struct drm_i915_gem_object *obj)
 {
 	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 	drm_clflush_sg(obj->mm.pages);
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 }

-static void i915_clflush_work(struct work_struct *work)
+static int clflush_work(struct dma_fence_work *base)
 {
-	struct clflush *clflush = container_of(work, typeof(*clflush), work);
-	struct drm_i915_gem_object *obj = clflush->obj;
+	struct clflush *clflush = container_of(base, typeof(*clflush), base);
+	struct drm_i915_gem_object *obj = fetch_and_zero(&clflush->obj);
+	int err;

-	if (i915_gem_object_pin_pages(obj)) {
-		DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
-		goto out;
-	}
-
-	__i915_do_clflush(obj);
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto put;

+	__do_clflush(obj);
 	i915_gem_object_unpin_pages(obj);

-out:
+put:
 	i915_gem_object_put(obj);
-
-	dma_fence_signal(&clflush->dma);
-	dma_fence_put(&clflush->dma);
+	return err;
 }

-static int __i915_sw_fence_call
-i915_clflush_notify(struct i915_sw_fence *fence,
-		    enum i915_sw_fence_notify state)
+static void clflush_release(struct dma_fence_work *base)
 {
-	struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
+	struct clflush *clflush = container_of(base, typeof(*clflush), base);

-	switch (state) {
-	case FENCE_COMPLETE:
-		schedule_work(&clflush->work);
-		break;
+	if (clflush->obj)
+		i915_gem_object_put(clflush->obj);
+}

-	case FENCE_FREE:
-		dma_fence_put(&clflush->dma);
-		break;
-	}
+static const struct dma_fence_work_ops clflush_ops = {
+	.name = "clflush",
+	.work = clflush_work,
+	.release = clflush_release,
+};

-	return NOTIFY_DONE;
+static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
+{
+	struct clflush *clflush;
+
+	GEM_BUG_ON(!obj->cache_dirty);
+
+	clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+	if (!clflush)
+		return NULL;
+
+	dma_fence_work_init(&clflush->base, &clflush_ops);
+	clflush->obj = i915_gem_object_get(obj); /* obj <-> clflush cycle */
+
+	return clflush;
 }

 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
@@ -126,33 +106,16 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,

 	clflush = NULL;
 	if (!(flags & I915_CLFLUSH_SYNC))
-		clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+		clflush = clflush_work_create(obj);
 	if (clflush) {
-		GEM_BUG_ON(!obj->cache_dirty);
-
-		dma_fence_init(&clflush->dma,
-			       &i915_clflush_ops,
-			       &clflush_lock,
-			       to_i915(obj->base.dev)->mm.unordered_timeline,
-			       0);
-		i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
-
-		clflush->obj = i915_gem_object_get(obj);
-		INIT_WORK(&clflush->work, i915_clflush_work);
-
-		dma_fence_get(&clflush->dma);
-
-		i915_sw_fence_await_reservation(&clflush->wait,
-						obj->base.resv, NULL,
-						true, I915_FENCE_TIMEOUT,
+		i915_sw_fence_await_reservation(&clflush->base.chain,
+						obj->base.resv, NULL, true,
+						I915_FENCE_TIMEOUT,
 						I915_FENCE_GFP);
-
-		reservation_object_add_excl_fence(obj->base.resv,
-						  &clflush->dma);
-
-		i915_sw_fence_commit(&clflush->wait);
+		dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
+		dma_fence_work_commit(&clflush->base);
 	} else if (obj->mm.pages) {
-		__i915_do_clflush(obj);
+		__do_clflush(obj);
 	} else {
 		GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
 	}
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -2,10 +2,13 @@
 /*
 * Copyright © 2019 Intel Corporation
 */
-#include "i915_gem_client_blt.h"

+#include "i915_drv.h"
+#include "gt/intel_context.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_pool.h"
+#include "i915_gem_client_blt.h"
 #include "i915_gem_object_blt.h"
-#include "intel_drv.h"

 struct i915_sleeve {
 	struct i915_vma *vma;
@@ -72,7 +75,6 @@ static struct i915_sleeve *create_sleeve(struct i915_address_space *vm,
 	vma->ops = &proxy_vma_ops;

 	sleeve->vma = vma;
-	sleeve->obj = i915_gem_object_get(obj);
 	sleeve->pages = pages;
 	sleeve->page_sizes = *page_sizes;

@@ -85,7 +87,6 @@ err_free:

 static void destroy_sleeve(struct i915_sleeve *sleeve)
 {
-	i915_gem_object_put(sleeve->obj);
 	kfree(sleeve);
 }

@@ -154,21 +155,23 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence,
 static void clear_pages_worker(struct work_struct *work)
 {
 	struct clear_pages_work *w = container_of(work, typeof(*w), work);
-	struct drm_i915_private *i915 = w->ce->gem_context->i915;
-	struct drm_i915_gem_object *obj = w->sleeve->obj;
+	struct drm_i915_private *i915 = w->ce->engine->i915;
+	struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
 	struct i915_vma *vma = w->sleeve->vma;
 	struct i915_request *rq;
+	struct i915_vma *batch;
 	int err = w->dma.error;

 	if (unlikely(err))
 		goto out_signal;

 	if (obj->cache_dirty) {
-		obj->write_domain = 0;
 		if (i915_gem_object_has_struct_page(obj))
 			drm_clflush_sg(w->sleeve->pages);
 		obj->cache_dirty = false;
 	}
+	obj->read_domains = I915_GEM_GPU_DOMAINS;
+	obj->write_domain = 0;

 	/* XXX: we need to kill this */
 	mutex_lock(&i915->drm.struct_mutex);
@@ -176,10 +179,16 @@ static void clear_pages_worker(struct work_struct *work)
 	if (unlikely(err))
 		goto out_unlock;

-	rq = i915_request_create(w->ce);
+	batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_unpin;
+	}
+
+	rq = intel_context_create_request(w->ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
-		goto out_unpin;
+		goto out_batch;
 	}

 	/* There's no way the fence has signalled */
@@ -187,20 +196,28 @@ static void clear_pages_worker(struct work_struct *work)
 				   clear_pages_dma_fence_cb))
 		GEM_BUG_ON(1);

+	err = intel_emit_vma_mark_active(batch, rq);
+	if (unlikely(err))
+		goto out_request;
+
 	if (w->ce->engine->emit_init_breadcrumb) {
 		err = w->ce->engine->emit_init_breadcrumb(rq);
 		if (unlikely(err))
 			goto out_request;
 	}

-	/* XXX: more feverish nightmares await */
-	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
+	/*
+	 * w->dma is already exported via (vma|obj)->resv we need only
+	 * keep track of the GPU activity within this vma/request, and
+	 * propagate the signal from the request to w->dma.
+	 */
+	err = i915_active_ref(&vma->active, rq->timeline, rq);
 	if (err)
 		goto out_request;

-	err = intel_emit_vma_fill_blt(rq, vma, w->value);
+	err = w->ce->engine->emit_bb_start(rq,
+					   batch->node.start, batch->node.size,
+					   0);
 out_request:
 	if (unlikely(err)) {
 		i915_request_skip(rq, err);
@@ -208,6 +225,8 @@ out_request:
 	}

 	i915_request_add(rq);
+out_batch:
+	intel_emit_vma_release(w->ce, batch);
 out_unpin:
 	i915_vma_unpin(vma);
 out_unlock:
@@ -248,14 +267,11 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
 				     struct i915_page_sizes *page_sizes,
 				     u32 value)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_gem_context *ctx = ce->gem_context;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct clear_pages_work *work;
 	struct i915_sleeve *sleeve;
 	int err;

-	sleeve = create_sleeve(vm, obj, pages, page_sizes);
+	sleeve = create_sleeve(ce->vm, obj, pages, page_sizes);
 	if (IS_ERR(sleeve))
 		return PTR_ERR(sleeve);

@@ -273,11 +289,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,

 	init_irq_work(&work->irq_work, clear_pages_signal_irq_worker);

-	dma_fence_init(&work->dma,
-		       &clear_pages_work_ops,
-		       &fence_lock,
-		       i915->mm.unordered_timeline,
-		       0);
+	dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
 	i915_sw_fence_init(&work->wait, clear_pages_work_notify);

 	i915_gem_object_lock(obj);
@@ -288,7 +300,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
 	if (err < 0) {
 		dma_fence_set_error(&work->dma, err);
 	} else {
-		reservation_object_add_excl_fence(obj->base.resv, &work->dma);
+		dma_resv_add_excl_fence(obj->base.resv, &work->dma);
 		err = 0;
 	}
 	i915_gem_object_unlock(obj);
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -70,6 +70,7 @@
 #include <drm/i915_drm.h>

 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_user.h"

 #include "i915_gem_context.h"
 #include "i915_globals.h"
@@ -158,7 +159,7 @@ lookup_user_engine(struct i915_gem_context *ctx,
 		if (!engine)
 			return ERR_PTR(-EINVAL);

-		idx = engine->id;
+		idx = engine->legacy_idx;
 	} else {
 		idx = ci->engine_instance;
 	}
@@ -172,7 +173,9 @@ static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)

 	lockdep_assert_held(&i915->contexts.mutex);

-	if (INTEL_GEN(i915) >= 11)
+	if (INTEL_GEN(i915) >= 12)
+		max = GEN12_MAX_CONTEXT_HW_ID;
+	else if (INTEL_GEN(i915) >= 11)
 		max = GEN11_MAX_CONTEXT_HW_ID;
 	else if (USES_GUC_SUBMISSION(i915))
 		/*
@@ -278,6 +281,7 @@ static void free_engines_rcu(struct rcu_head *rcu)

 static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 {
+	const struct intel_gt *gt = &ctx->i915->gt;
 	struct intel_engine_cs *engine;
 	struct i915_gem_engines *e;
 	enum intel_engine_id id;
@@ -287,7 +291,7 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 		return ERR_PTR(-ENOMEM);

 	init_rcu_head(&e->rcu);
-	for_each_engine(engine, ctx->i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct intel_context *ce;

 		ce = intel_context_create(ctx, engine);
@@ -297,8 +301,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 		}

 		e->engines[id] = ce;
+		e->num_engines = id + 1;
 	}
-	e->num_engines = id;

 	return e;
 }
@@ -316,7 +320,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	mutex_destroy(&ctx->engines_mutex);

 	if (ctx->timeline)
-		i915_timeline_put(ctx->timeline);
+		intel_timeline_put(ctx->timeline);

 	kfree(ctx->name);
 	put_pid(ctx->pid);
@@ -397,30 +401,6 @@ static void context_close(struct i915_gem_context *ctx)
 	i915_gem_context_put(ctx);
 }

-static u32 default_desc_template(const struct drm_i915_private *i915,
-				 const struct i915_address_space *vm)
-{
-	u32 address_mode;
-	u32 desc;
-
-	desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
-
-	address_mode = INTEL_LEGACY_32B_CONTEXT;
-	if (vm && i915_vm_is_4lvl(vm))
-		address_mode = INTEL_LEGACY_64B_CONTEXT;
-	desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
-	if (IS_GEN(i915, 8))
-		desc |= GEN8_CTX_L3LLC_COHERENT;
-
-	/* TODO: WaDisableLiteRestore when we start using semaphore
-	 * signalling between Command Streamers
-	 * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
-	 */
-
-	return desc;
-}
-
 static struct i915_gem_context *
 __create_context(struct drm_i915_private *i915)
 {
@@ -458,10 +438,6 @@ __create_context(struct drm_i915_private *i915)
 	i915_gem_context_set_bannable(ctx);
 	i915_gem_context_set_recoverable(ctx);

-	ctx->ring_size = 4 * PAGE_SIZE;
-	ctx->desc_template =
-		default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm);
-
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;

@@ -472,13 +448,34 @@ err_free:
 	return ERR_PTR(err);
 }

+static void
+context_apply_all(struct i915_gem_context *ctx,
+		  void (*fn)(struct intel_context *ce, void *data),
+		  void *data)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
+		fn(ce, data);
+	i915_gem_context_unlock_engines(ctx);
+}
+
+static void __apply_ppgtt(struct intel_context *ce, void *vm)
+{
+	i915_vm_put(ce->vm);
+	ce->vm = i915_vm_get(vm);
+}
+
 static struct i915_address_space *
 __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 {
 	struct i915_address_space *old = ctx->vm;

+	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
+
 	ctx->vm = i915_vm_get(vm);
-	ctx->desc_template = default_desc_template(ctx->i915, vm);
+	context_apply_all(ctx, __apply_ppgtt, vm);

 	return old;
 }
@@ -494,6 +491,29 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 		i915_vm_put(vm);
 }

+static void __set_timeline(struct intel_timeline **dst,
+			   struct intel_timeline *src)
+{
+	struct intel_timeline *old = *dst;
+
+	*dst = src ? intel_timeline_get(src) : NULL;
+
+	if (old)
+		intel_timeline_put(old);
+}
+
+static void __apply_timeline(struct intel_context *ce, void *timeline)
+{
+	__set_timeline(&ce->timeline, timeline);
+}
+
+static void __assign_timeline(struct i915_gem_context *ctx,
+			      struct intel_timeline *timeline)
+{
+	__set_timeline(&ctx->timeline, timeline);
+	context_apply_all(ctx, __apply_timeline, timeline);
+}
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 {
@@ -528,15 +548,16 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 	}

 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
-		struct i915_timeline *timeline;
+		struct intel_timeline *timeline;

-		timeline = i915_timeline_create(dev_priv, NULL);
+		timeline = intel_timeline_create(&dev_priv->gt, NULL);
 		if (IS_ERR(timeline)) {
 			context_close(ctx);
 			return ERR_CAST(timeline);
 		}

-		ctx->timeline = timeline;
+		__assign_timeline(ctx, timeline);
+		intel_timeline_put(timeline);
 	}

 	trace_i915_context_create(ctx);
@@ -544,53 +565,6 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 	return ctx;
 }

-/**
- * i915_gem_context_create_gvt - create a GVT GEM context
- * @dev: drm device *
- *
- * This function is used to create a GVT specific GEM context.
- *
- * Returns:
- * pointer to i915_gem_context on success, error pointer if failed
- *
- */
-struct i915_gem_context *
-i915_gem_context_create_gvt(struct drm_device *dev)
-{
-	struct i915_gem_context *ctx;
-	int ret;
-
-	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
-		return ERR_PTR(-ENODEV);
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ERR_PTR(ret);
-
-	ctx = i915_gem_create_context(to_i915(dev), 0);
-	if (IS_ERR(ctx))
-		goto out;
-
-	ret = i915_gem_context_pin_hw_id(ctx);
-	if (ret) {
-		context_close(ctx);
-		ctx = ERR_PTR(ret);
-		goto out;
-	}
-
-	ctx->file_priv = ERR_PTR(-EBADF);
-	i915_gem_context_set_closed(ctx); /* not user accessible */
-	i915_gem_context_clear_bannable(ctx);
-	i915_gem_context_set_force_single_submission(ctx);
-	if (!USES_GUC_SUBMISSION(to_i915(dev)))
-		ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
-
-	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
-out:
-	mutex_unlock(&dev->struct_mutex);
-	return ctx;
-}
-
 static void
 destroy_kernel_context(struct i915_gem_context **ctxp)
 {
@@ -622,7 +596,6 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)

 	i915_gem_context_clear_bannable(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
-	ctx->ring_size = PAGE_SIZE;

 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));

@@ -644,20 +617,13 @@ static void init_contexts(struct drm_i915_private *i915)
 	init_llist_head(&i915->contexts.free_list);
 }

-static bool needs_preempt_context(struct drm_i915_private *i915)
-{
-	return HAS_EXECLISTS(i915);
-}
-
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 {
 	struct i915_gem_context *ctx;

 	/* Reassure ourselves we are only called once */
 	GEM_BUG_ON(dev_priv->kernel_context);
-	GEM_BUG_ON(dev_priv->preempt_context);

-	intel_engine_init_ctx_wa(dev_priv->engine[RCS0]);
 	init_contexts(dev_priv);

 	/* lowest priority; idle task */
@@ -677,15 +643,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
 	dev_priv->kernel_context = ctx;

-	/* highest priority; preempting task */
-	if (needs_preempt_context(dev_priv)) {
-		ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX);
-		if (!IS_ERR(ctx))
-			dev_priv->preempt_context = ctx;
-		else
-			DRM_ERROR("Failed to create preempt context; disabling preemption\n");
-	}
-
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
 			 DRIVER_CAPS(dev_priv)->has_logical_contexts ?
 			 "logical" : "fake");
@@ -696,8 +653,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);

-	if (i915->preempt_context)
-		destroy_kernel_context(&i915->preempt_context);
 	destroy_kernel_context(&i915->kernel_context);

 	/* Must free all deferred contexts (via flush_workqueue) first */
@@ -923,8 +878,12 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	if (!cb)
 		return -ENOMEM;

-	i915_active_init(i915, &cb->base, cb_retire);
-	i915_active_acquire(&cb->base);
+	i915_active_init(i915, &cb->base, NULL, cb_retire);
+	err = i915_active_acquire(&cb->base);
+	if (err) {
+		kfree(cb);
+		return err;
+	}

 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		struct i915_request *rq;
@@ -951,7 +910,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (emit)
 			err = emit(rq, data);
 		if (err == 0)
-			err = i915_active_ref(&cb->base, rq->fence.context, rq);
+			err = i915_active_ref(&cb->base, rq->timeline, rq);

 		i915_request_add(rq);
 		if (err)
@@ -1019,7 +978,7 @@ static void set_ppgtt_barrier(void *data)

 static int emit_ppgtt_update(struct i915_request *rq, void *data)
 {
-	struct i915_address_space *vm = rq->gem_context->vm;
+	struct i915_address_space *vm = rq->hw_context->vm;
 	struct intel_engine_cs *engine = rq->engine;
 	u32 base = engine->mmio_base;
 	u32 *cs;
@@ -1128,9 +1087,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 				   set_ppgtt_barrier,
 				   old);
 	if (err) {
-		ctx->vm = old;
-		ctx->desc_template = default_desc_template(ctx->i915, old);
-		i915_vm_put(vm);
+		i915_vm_put(__set_ppgtt(ctx, old));
+		i915_vm_put(old);
 	}

 unlock:
@@ -1187,26 +1145,11 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);

-	/* Queue this switch after all other activity by this context. */
-	ret = i915_active_request_set(&ce->ring->timeline->last_request, rq);
-	if (ret)
-		goto out_add;
+	/* Serialise with the remote context */
+	ret = intel_context_prepare_remote_request(ce, rq);
+	if (ret == 0)
+		ret = gen8_emit_rpcs_config(rq, ce, sseu);

-	/*
-	 * Guarantee context image and the timeline remains pinned until the
-	 * modifying request is retired by setting the ce activity tracker.
-	 *
-	 * But we only need to take one pin on the account of it. Or in other
-	 * words transfer the pinned ce object to tracked active request.
-	 */
-	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
-	if (ret)
-		goto out_add;
-
-	ret = gen8_emit_rpcs_config(rq, ce, sseu);
-
-out_add:
 	i915_request_add(rq);
 	return ret;
 }
@@ -1217,7 +1160,7 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
 {
 	int ret;

-	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
+	GEM_BUG_ON(INTEL_GEN(ce->engine->i915) < 8);

 	ret = intel_context_lock_pinned(ce);
 	if (ret)
@@ -1239,7 +1182,7 @@ unlock:
 static int
 intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
 {
-	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_private *i915 = ce->engine->i915;
 	int ret;

 	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
@@ -1636,6 +1579,7 @@ set_engines(struct i915_gem_context *ctx,
 	for (n = 0; n < num_engines; n++) {
 		struct i915_engine_class_instance ci;
 		struct intel_engine_cs *engine;
+		struct intel_context *ce;

 		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
 			__free_engines(set.engines, n);
@@ -1658,11 +1602,13 @@ set_engines(struct i915_gem_context *ctx,
 			return -ENOENT;
 		}

-		set.engines->engines[n] = intel_context_create(ctx, engine);
-		if (!set.engines->engines[n]) {
+		ce = intel_context_create(ctx, engine);
+		if (IS_ERR(ce)) {
 			__free_engines(set.engines, n);
-			return -ENOMEM;
+			return PTR_ERR(ce);
 		}
+
+		set.engines->engines[n] = ce;
 	}
 	set.engines->num_engines = num_engines;

@@ -1776,7 +1722,7 @@ get_engines(struct i915_gem_context *ctx,

 		if (e->engines[n]) {
 			ci.engine_class = e->engines[n]->engine->uabi_class;
-			ci.engine_instance = e->engines[n]->engine->instance;
+			ci.engine_instance = e->engines[n]->engine->uabi_instance;
 		}

 		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
@@ -2011,13 +1957,8 @@ unlock:
 static int clone_timeline(struct i915_gem_context *dst,
 			  struct i915_gem_context *src)
 {
-	if (src->timeline) {
-		GEM_BUG_ON(src->timeline == dst->timeline);
-
-		if (dst->timeline)
-			i915_timeline_put(dst->timeline);
-		dst->timeline = i915_timeline_get(src->timeline);
-	}
+	if (src->timeline)
+		__assign_timeline(dst, src->timeline);

 	return 0;
 }
@@ -2141,7 +2082,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
 		return -EINVAL;

-	ret = i915_terminally_wedged(i915);
+	ret = intel_gt_terminally_wedged(&i915->gt);
 	if (ret)
 		return ret;

@@ -2287,8 +2228,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		args->size = 0;
 		if (ctx->vm)
 			args->value = ctx->vm->total;
-		else if (to_i915(dev)->mm.aliasing_ppgtt)
-			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+		else if (to_i915(dev)->ggtt.alias)
+			args->value = to_i915(dev)->ggtt.alias->vm.total;
 		else
 			args->value = to_i915(dev)->ggtt.vm.total;
 		break;
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -141,8 +141,6 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 void i915_gem_context_close(struct drm_file *file);

 void i915_gem_context_release(struct kref *ctx_ref);
-struct i915_gem_context *
-i915_gem_context_create_gvt(struct drm_device *dev);

 int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file);
@@ -197,12 +195,6 @@ i915_gem_context_unlock_engines(struct i915_gem_context *ctx)
 	mutex_unlock(&ctx->engines_mutex);
 }

-static inline struct intel_context *
-i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx)
-{
-	return i915_gem_context_engines(ctx)->engines[idx];
-}
-
 static inline struct intel_context *
 i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx)
 {
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -26,7 +26,7 @@ struct pid;
 struct drm_i915_private;
 struct drm_i915_file_private;
 struct i915_address_space;
-struct i915_timeline;
+struct intel_timeline;
 struct intel_ring;

 struct i915_gem_engines {
@@ -77,7 +77,7 @@ struct i915_gem_context {
 	struct i915_gem_engines __rcu *engines;
 	struct mutex engines_mutex; /* guards writes to engines */

-	struct i915_timeline *timeline;
+	struct intel_timeline *timeline;

 	/**
 	 * @vm: unique address space (GTT)
@@ -169,11 +169,6 @@ struct i915_gem_context {

 	struct i915_sched_attr sched;

-	/** ring_size: size for allocating the per-engine ring buffer */
-	u32 ring_size;
-	/** desc_template: invariant fields for the HW context descriptor */
-	u32 desc_template;
-
 	/** guilty_count: How many times this context has caused a GPU hang. */
 	atomic_t guilty_count;
 	/**
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -6,7 +6,7 @@

 #include <linux/dma-buf.h>
 #include <linux/highmem.h>
-#include <linux/reservation.h>
+#include <linux/dma-resv.h>

 #include "i915_drv.h"
 #include "i915_gem_object.h"
@@ -204,8 +204,7 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.end_cpu_access = i915_gem_end_cpu_access,
 };

-struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
-				      struct drm_gem_object *gem_obj, int flags)
+struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
 {
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
@@ -222,7 +221,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 			return ERR_PTR(ret);
 	}

-	return drm_gem_dmabuf_export(dev, &exp_info);
+	return drm_gem_dmabuf_export(gem_obj->dev, &exp_info);
 }

 static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -221,6 +221,8 @@ restart:
 	 * state and so involves less work.
 	 */
 	if (atomic_read(&obj->bind_count)) {
+		struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
 		/* Before we change the PTE, the GPU must not be accessing it.
 		 * If we wait upon the object, we know that all the bound
 		 * VMA are no longer active.
@@ -232,18 +234,30 @@ restart:
 		if (ret)
 			return ret;

-		if (!HAS_LLC(to_i915(obj->base.dev)) &&
-		    cache_level != I915_CACHE_NONE) {
-			/* Access to snoopable pages through the GTT is
+		if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) {
+			intel_wakeref_t wakeref =
+				intel_runtime_pm_get(&i915->runtime_pm);
+
+			/*
+			 * Access to snoopable pages through the GTT is
 			 * incoherent and on some machines causes a hard
 			 * lockup. Relinquish the CPU mmaping to force
 			 * userspace to refault in the pages and we can
 			 * then double check if the GTT mapping is still
 			 * valid for that pointer access.
 			 */
-			i915_gem_object_release_mmap(obj);
+			ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex);
+			if (ret) {
+				intel_runtime_pm_put(&i915->runtime_pm,
+						     wakeref);
+				return ret;
+			}

-			/* As we no longer need a fence for GTT access,
+			if (obj->userfault_count)
+				__i915_gem_object_release_mmap(obj);
+
+			/*
+			 * As we no longer need a fence for GTT access,
 			 * we can relinquish it now (and so prevent having
 			 * to steal a fence from someone else on the next
 			 * fence request). Note GPU activity would have
@@ -251,12 +265,17 @@ restart:
 			 * supposed to be linear.
 			 */
 			for_each_ggtt_vma(vma, obj) {
-				ret = i915_vma_put_fence(vma);
+				ret = i915_vma_revoke_fence(vma);
 				if (ret)
-					return ret;
+					break;
 			}
+			mutex_unlock(&i915->ggtt.vm.mutex);
+			intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+			if (ret)
+				return ret;
 		} else {
-			/* We either have incoherent backing store and
+			/*
+			 * We either have incoherent backing store and
 			 * so no GTT access or the architecture is fully
 			 * coherent. In such cases, existing GTT mmaps
 			 * ignore the cache bit in the PTE and we can
@@ -551,13 +570,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }

-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 /**
 * Called when user space prepares to use an object with the CPU, either
 * through the mmap ioctl's mapping or a GTT mapping.
@@ -661,9 +673,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,

 	i915_gem_object_unlock(obj);

-	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj,
-					fb_write_origin(obj, write_domain));
+	if (write_domain)
+		intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);

 out_unpin:
 	i915_gem_object_unpin_pages(obj);
@@ -783,7 +794,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	}

 out:
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 	obj->mm.dirty = true;
 	/* return with the pages pinned */
 	return 0;
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -5,7 +5,7 @@
 */

 #include <linux/intel-iommu.h>
-#include <linux/reservation.h>
+#include <linux/dma-resv.h>
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>

@@ -16,13 +16,15 @@

 #include "gem/i915_gem_ioctls.h"
 #include "gt/intel_context.h"
+#include "gt/intel_engine_pool.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"

-#include "i915_gem_ioctls.h"
+#include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
+#include "i915_gem_ioctls.h"
 #include "i915_trace.h"
-#include "intel_drv.h"

 enum {
 	FORCE_CPU_RELOC = 1,
@@ -222,7 +224,6 @@ struct i915_execbuffer {
 	struct intel_engine_cs *engine; /** engine to queue the request to */
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
-	struct i915_address_space *vm; /** GTT and vma for the request */

 	struct i915_request *request; /** our request to build */
 	struct i915_vma *batch; /** identity of the batch obj/vma */
@@ -696,7 +697,7 @@ static int eb_reserve(struct i915_execbuffer *eb)

 		case 1:
 			/* Too fragmented, unbind everything and retry */
-			err = i915_gem_evict_vm(eb->vm);
+			err = i915_gem_evict_vm(eb->context->vm);
 			if (err)
 				return err;
 			break;
@@ -724,12 +725,8 @@ static int eb_select_context(struct i915_execbuffer *eb)
 		return -ENOENT;

 	eb->gem_context = ctx;
-	if (ctx->vm) {
-		eb->vm = ctx->vm;
+	if (ctx->vm)
 		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
-	} else {
-		eb->vm = &eb->i915->ggtt.vm;
-	}

 	eb->context_flags = 0;
 	if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
@@ -738,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }

-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
-	struct i915_request *rq;
-
-	/*
-	 * Completely unscientific finger-in-the-air estimates for suitable
-	 * maximum user request size (to avoid blocking) and then backoff.
-	 */
-	if (intel_ring_update_space(ring) >= PAGE_SIZE)
-		return NULL;
-
-	/*
-	 * Find a request that after waiting upon, there will be at least half
-	 * the ring available. The hysteresis allows us to compete for the
-	 * shared ring and should mean that we sleep less often prior to
-	 * claiming our resources, but not so long that the ring completely
-	 * drains before we can submit our next request.
-	 */
-	list_for_each_entry(rq, &ring->request_list, ring_link) {
-		if (__intel_ring_space(rq->postfix,
-				       ring->emit, ring->size) > ring->size / 2)
-			break;
-	}
-	if (&rq->ring_link == &ring->request_list)
-		return NULL; /* weird, we will check again later for real */
-
-	return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
-	struct i915_request *rq;
-	int ret = 0;
-
-	/*
-	 * Apply a light amount of backpressure to prevent excessive hogs
-	 * from blocking waiting for space whilst holding struct_mutex and
-	 * keeping all of their resources pinned.
-	 */
-
-	rq = __eb_wait_for_ring(eb->context->ring);
-	if (rq) {
-		mutex_unlock(&eb->i915->drm.struct_mutex);
-
-		if (i915_request_wait(rq,
-				      I915_WAIT_INTERRUPTIBLE,
-				      MAX_SCHEDULE_TIMEOUT) < 0)
-			ret = -EINTR;
-
-		i915_request_put(rq);
-
-		mutex_lock(&eb->i915->drm.struct_mutex);
-	}
-
-	return ret;
-}
-
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -831,7 +771,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 			goto err_vma;
 		}

-		vma = i915_vma_instance(obj, eb->vm, NULL);
+		vma = i915_vma_instance(obj, eb->context->vm, NULL);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
 			goto err_obj;
@@ -994,7 +934,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
 	__i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size);
 	i915_gem_object_unpin_map(cache->rq->batch->obj);

-	i915_gem_chipset_flush(cache->rq->i915);
+	intel_gt_chipset_flush(cache->rq->engine->gt);

 	i915_request_add(cache->rq);
 	cache->rq = NULL;
@@ -1018,11 +958,12 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 		kunmap_atomic(vaddr);
 		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
 	} else {
-		wmb();
-		io_mapping_unmap_atomic((void __iomem *)vaddr);
-		if (cache->node.allocated) {
-			struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+		struct i915_ggtt *ggtt = cache_to_ggtt(cache);

+		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+		io_mapping_unmap_atomic((void __iomem *)vaddr);
+
+		if (cache->node.allocated) {
 			ggtt->vm.clear_range(&ggtt->vm,
 					     cache->node.start,
 					     cache->node.size);
@@ -1077,11 +1018,15 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 	void *vaddr;

 	if (cache->vaddr) {
+		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
 	} else {
 		struct i915_vma *vma;
 		int err;

+		if (i915_gem_object_is_tiled(obj))
+			return ERR_PTR(-EINVAL);
+
 		if (use_cpu_reloc(cache, obj))
 			return NULL;

@@ -1093,8 +1038,8 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,

 		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 					       PIN_MAPPABLE |
-					       PIN_NONBLOCK |
-					       PIN_NONFAULT);
+					       PIN_NONBLOCK /* NOWARN */ |
+					       PIN_NOEVICT);
 		if (IS_ERR(vma)) {
 			memset(&cache->node, 0, sizeof(cache->node));
 			err = drm_mm_insert_node_in_range
@@ -1105,12 +1050,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 			if (err) /* no inactive aperture space, use cpu reloc */
 				return NULL;
 		} else {
-			err = i915_vma_put_fence(vma);
-			if (err) {
-				i915_vma_unpin(vma);
-				return ERR_PTR(err);
-			}
-
 			cache->node.start = vma->node.start;
 			cache->node.mm = (void *)vma;
 		}
@@ -1118,7 +1057,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,

 	offset = cache->node.start;
 	if (cache->node.allocated) {
-		wmb();
 		ggtt->vm.insert_page(&ggtt->vm,
 				     i915_gem_object_get_dma_address(obj, page),
 				     offset, I915_CACHE_NONE, 0);
@@ -1201,25 +1139,26 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     unsigned int len)
 {
 	struct reloc_cache *cache = &eb->reloc_cache;
-	struct drm_i915_gem_object *obj;
+	struct intel_engine_pool_node *pool;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	u32 *cmd;
 	int err;

-	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+	pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE);
+	if (IS_ERR(pool))
+		return PTR_ERR(pool);

-	cmd = i915_gem_object_pin_map(obj,
+	cmd = i915_gem_object_pin_map(pool->obj,
 				      cache->has_llc ?
 				      I915_MAP_FORCE_WB :
 				      I915_MAP_FORCE_WC);
-	i915_gem_object_unpin_pages(obj);
-	if (IS_ERR(cmd))
-		return PTR_ERR(cmd);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto out_pool;
+	}

-	batch = i915_vma_instance(obj, vma->vm, NULL);
+	batch = i915_vma_instance(pool->obj, vma->vm, NULL);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		goto err_unmap;
@@ -1235,6 +1174,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unpin;
 	}

+	err = intel_engine_pool_mark_active(pool, rq);
+	if (err)
+		goto err_request;
+
 	err = reloc_move_to_gpu(rq, vma);
 	if (err)
 		goto err_request;
@@ -1246,8 +1189,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto skip_request;

 	i915_vma_lock(batch);
-	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
-	err = i915_vma_move_to_active(batch, rq, 0);
+	err = i915_request_await_object(rq, batch->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(batch, rq, 0);
 	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
@@ -1260,7 +1204,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	cache->rq_size = 0;

 	/* Return with batch mapping (cmd) still pinned */
-	return 0;
+	goto out_pool;

 skip_request:
 	i915_request_skip(rq, err);
@@ -1269,7 +1213,9 @@ err_request:
 err_unpin:
 	i915_vma_unpin(batch);
 err_unmap:
-	i915_gem_object_unpin_map(obj);
+	i915_gem_object_unpin_map(pool->obj);
+out_pool:
+	intel_engine_pool_put(pool);
 	return err;
 }

@@ -1317,7 +1263,7 @@ relocate_entry(struct i915_vma *vma,

 	if (!eb->reloc_cache.vaddr &&
 	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
-	     !reservation_object_test_signaled_rcu(vma->resv, true))) {
+	     !dma_resv_test_signaled_rcu(vma->resv, true))) {
 		const unsigned int gen = eb->reloc_cache.gen;
 		unsigned int len;
 		u32 *batch;
@@ -1952,7 +1898,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 	eb->exec = NULL;

 	/* Unconditionally flush any chipset caches (for streaming writes). */
-	i915_gem_chipset_flush(eb->i915);
+	intel_gt_chipset_flush(eb->engine->gt);
 	return 0;

 err_skip:
@@ -2011,18 +1957,17 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)

 static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 {
-	struct drm_i915_gem_object *shadow_batch_obj;
+	struct intel_engine_pool_node *pool;
 	struct i915_vma *vma;
 	int err;

-	shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
-						   PAGE_ALIGN(eb->batch_len));
-	if (IS_ERR(shadow_batch_obj))
-		return ERR_CAST(shadow_batch_obj);
+	pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len);
+	if (IS_ERR(pool))
+		return ERR_CAST(pool);

 	err = intel_engine_cmd_parser(eb->engine,
 				      eb->batch->obj,
-				      shadow_batch_obj,
+				      pool->obj,
 				      eb->batch_start_offset,
 				      eb->batch_len,
 				      is_master);
@@ -2031,12 +1976,12 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 			vma = NULL;
 		else
 			vma = ERR_PTR(err);
-		goto out;
+		goto err;
 	}

-	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
+	vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
 	if (IS_ERR(vma))
-		goto out;
+		goto err;

 	eb->vma[eb->buffer_count] = i915_vma_get(vma);
 	eb->flags[eb->buffer_count] =
@@ -2044,16 +1989,24 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 	vma->exec_flags = &eb->flags[eb->buffer_count];
 	eb->buffer_count++;

-out:
-	i915_gem_object_unpin_pages(shadow_batch_obj);
+	vma->private = pool;
+	return vma;
+
+err:
+	intel_engine_pool_put(pool);
 	return vma;
 }

 static void
 add_to_client(struct i915_request *rq, struct drm_file *file)
 {
-	rq->file_priv = file->driver_priv;
-	list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+
+	rq->file_priv = file_priv;
+
+	spin_lock(&file_priv->mm.lock);
+	list_add_tail(&rq->client_link, &file_priv->mm.request_list);
+	spin_unlock(&file_priv->mm.lock);
 }

 static int eb_submit(struct i915_execbuffer *eb)
@@ -2093,6 +2046,12 @@ static int eb_submit(struct i915_execbuffer *eb)
 	return 0;
 }

+static int num_vcs_engines(const struct drm_i915_private *i915)
+{
+	return hweight64(INTEL_INFO(i915)->engine_mask &
+			 GENMASK_ULL(VCS0 + I915_MAX_VCS - 1, VCS0));
+}
+
 /*
 * Find one BSD ring to dispatch the corresponding BSD command.
 * The engine index is returned.
@@ -2105,8 +2064,8 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,

 	/* Check whether the file_priv has already selected one ring. */
 	if ((int)file_priv->bsd_engine < 0)
-		file_priv->bsd_engine = atomic_fetch_xor(1,
-			 &dev_priv->mm.bsd_engine_dispatch_index);
+		file_priv->bsd_engine =
+			get_random_int() % num_vcs_engines(dev_priv);

 	return file_priv->bsd_engine;
 }
@@ -2119,15 +2078,80 @@ static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };

-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
 {
+	struct intel_ring *ring = ce->ring;
+	struct intel_timeline *tl = ce->timeline;
+	struct i915_request *rq;
+
+	/*
+	 * Completely unscientific finger-in-the-air estimates for suitable
+	 * maximum user request size (to avoid blocking) and then backoff.
+	 */
+	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+		return NULL;
+
+	/*
+	 * Find a request that after waiting upon, there will be at least half
+	 * the ring available. The hysteresis allows us to compete for the
+	 * shared ring and should mean that we sleep less often prior to
+	 * claiming our resources, but not so long that the ring completely
+	 * drains before we can submit our next request.
+	 */
+	list_for_each_entry(rq, &tl->requests, link) {
+		if (rq->ring != ring)
+			continue;
+
+		if (__intel_ring_space(rq->postfix,
+				       ring->emit, ring->size) > ring->size / 2)
+			break;
+	}
+	if (&rq->link == &tl->requests)
+		return NULL; /* weird, we will check again later for real */
+
+	return i915_request_get(rq);
+}
+
+static int
+__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	int err;
+
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return 0;
+
+	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+	if (err)
+		return err;
+
+	err = __intel_context_do_pin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+
+	return err;
+}
+
+static void
+__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+		return;
+
+	mutex_lock(&eb->i915->drm.struct_mutex);
+	intel_context_unpin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	struct intel_timeline *tl;
+	struct i915_request *rq;
 	int err;

 	/*
 	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 	 * EIO if the GPU is already wedged.
 	 */
-	err = i915_terminally_wedged(eb->i915);
+	err = intel_gt_terminally_wedged(ce->engine->gt);
 	if (err)
 		return err;

@@ -2136,18 +2160,64 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	err = intel_context_pin(ce);
+	err = __eb_pin_context(eb, ce);
 	if (err)
 		return err;

+	/*
+	 * Take a local wakeref for preparing to dispatch the execbuf as
+	 * we expect to access the hardware fairly frequently in the
+	 * process, and require the engine to be kept awake between accesses.
+	 * Upon dispatch, we acquire another prolonged wakeref that we hold
+	 * until the timeline is idle, which in turn releases the wakeref
+	 * taken on the engine, and the parent device.
+	 */
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl)) {
+		err = PTR_ERR(tl);
+		goto err_unpin;
+	}
+
+	intel_context_enter(ce);
+	rq = eb_throttle(ce);
+
+	intel_context_timeline_unlock(tl);
+
+	if (rq) {
+		if (i915_request_wait(rq,
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0) {
+			i915_request_put(rq);
+			err = -EINTR;
+			goto err_exit;
+		}
+
+		i915_request_put(rq);
+	}
+
 	eb->engine = ce->engine;
 	eb->context = ce;
 	return 0;
+
+err_exit:
+	mutex_lock(&tl->mutex);
+	intel_context_exit(ce);
+	intel_context_timeline_unlock(tl);
+err_unpin:
+	__eb_unpin_context(eb, ce);
+	return err;
 }

-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
 {
-	intel_context_unpin(eb->context);
+	struct intel_context *ce = eb->context;
+	struct intel_timeline *tl = ce->timeline;
+
+	mutex_lock(&tl->mutex);
+	intel_context_exit(ce);
+	mutex_unlock(&tl->mutex);
+
+	__eb_unpin_context(eb, ce);
 }

 static unsigned int
@@ -2165,7 +2235,7 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 		return -1;
 	}

-	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
+	if (user_ring_id == I915_EXEC_BSD && num_vcs_engines(i915) > 1) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;

 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
@@ -2192,9 +2262,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 }

 static int
-eb_select_engine(struct i915_execbuffer *eb,
-		 struct drm_file *file,
-		 struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+	      struct drm_file *file,
+	      struct drm_i915_gem_execbuffer2 *args)
 {
 	struct intel_context *ce;
 	unsigned int idx;
@@ -2209,7 +2279,7 @@ eb_select_engine(struct i915_execbuffer *eb,
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);

-	err = eb_pin_context(eb, ce);
+	err = __eb_pin_engine(eb, ce);
 	intel_context_put(ce);

 	return err;
@@ -2427,25 +2497,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;

-	/*
-	 * Take a local wakeref for preparing to dispatch the execbuf as
-	 * we expect to access the hardware fairly frequently in the
-	 * process. Upon first dispatch, we acquire another prolonged
-	 * wakeref that we hold until the GPU has been idle for at least
-	 * 100ms.
-	 */
-	intel_gt_pm_get(eb.i915);
+	err = eb_pin_engine(&eb, file, args);
+	if (unlikely(err))
+		goto err_context;

 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
-		goto err_rpm;
-
-	err = eb_select_engine(&eb, file, args);
-	if (unlikely(err))
-		goto err_unlock;
-
-	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
-	if (unlikely(err))
 		goto err_engine;

 	err = eb_relocate(&eb);
@@ -2572,6 +2629,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * to explicitly hold another reference here.
 	 */
 	eb.request->batch = eb.batch;
+	if (eb.batch->private)
+		intel_engine_pool_mark_active(eb.batch->private, eb.request);

 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb);
@@ -2596,15 +2655,15 @@ err_request:
 err_batch_unpin:
 	if (eb.batch_flags & I915_DISPATCH_SECURE)
 		i915_vma_unpin(eb.batch);
+	if (eb.batch->private)
+		intel_engine_pool_put(eb.batch->private);
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
-err_engine:
-	eb_unpin_context(&eb);
-err_unlock:
 	mutex_unlock(&dev->struct_mutex);
-err_rpm:
-	intel_gt_pm_put(eb.i915);
+err_engine:
+	eb_unpin_engine(&eb);
+err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
--- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -69,8 +69,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)

 	i915_sw_fence_init(&stub->chain, stub_notify);
 	dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock,
-		       to_i915(obj->base.dev)->mm.unordered_timeline,
-		       0);
+		       0, 0);

 	if (i915_sw_fence_await_reservation(&stub->chain,
 					    obj->base.resv, NULL,
@@ -78,7 +77,7 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
 					    I915_FENCE_GFP) < 0)
 		goto err;

-	reservation_object_add_excl_fence(obj->base.resv, &stub->dma);
+	dma_resv_add_excl_fence(obj->base.resv, &stub->dma);

 	return &stub->dma;

--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -7,12 +7,14 @@
 #include <linux/mman.h>
 #include <linux/sizes.h>

+#include "gt/intel_gt.h"
+
 #include "i915_drv.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
+#include "i915_trace.h"
 #include "i915_vma.h"
-#include "intel_drv.h"

 static inline bool
 __vma_matches(struct vm_area_struct *vma, struct file *filp,
@@ -99,9 +101,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		up_write(&mm->mmap_sem);
 		if (IS_ERR_VALUE(addr))
 			goto err;
-
-		/* This may race, but that's ok, it only gets set */
-		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
 	}
 	i915_gem_object_put(obj);

@@ -246,7 +245,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)

 	wakeref = intel_runtime_pm_get(rpm);

-	srcu = i915_reset_trylock(i915);
+	srcu = intel_gt_reset_trylock(ggtt->vm.gt);
 	if (srcu < 0) {
 		ret = srcu;
 		goto err_rpm;
@@ -265,15 +264,15 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	/* Now pin it into the GTT as needed */
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
-				       PIN_NONBLOCK |
-				       PIN_NONFAULT);
+				       PIN_NONBLOCK /* NOWARN */ |
+				       PIN_NOEVICT);
 	if (IS_ERR(vma)) {
 		/* Use a partial view if it is bigger than available space */
 		struct i915_ggtt_view view =
 			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
 		unsigned int flags;

-		flags = PIN_MAPPABLE;
+		flags = PIN_MAPPABLE | PIN_NOSEARCH;
 		if (view.type == I915_GGTT_VIEW_NORMAL)
 			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */

@@ -281,10 +280,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		 * Userspace is now writing through an untracked VMA, abandon
 		 * all hope that the hardware is able to track future writes.
 		 */
-		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;

 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		if (IS_ERR(vma) && !view.type) {
+		if (IS_ERR(vma)) {
 			flags = PIN_MAPPABLE;
 			view.type = I915_GGTT_VIEW_PARTIAL;
 			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
@@ -308,14 +306,17 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err_fence;

-	/* Mark as being mmapped into userspace for later revocation */
 	assert_rpm_wakelock_held(rpm);
+
+	/* Mark as being mmapped into userspace for later revocation */
+	mutex_lock(&i915->ggtt.vm.mutex);
 	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
 		list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
+	mutex_unlock(&i915->ggtt.vm.mutex);
+
 	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
 		intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
 				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
-	GEM_BUG_ON(!obj->userfault_count);

 	i915_vma_set_ggtt_write(vma);

@@ -326,7 +327,7 @@ err_unpin:
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_reset:
-	i915_reset_unlock(i915, srcu);
+	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
 err_rpm:
 	intel_runtime_pm_put(rpm, wakeref);
 	i915_gem_object_unpin_pages(obj);
@@ -339,7 +340,7 @@ err:
 		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
 		 * and so needs to be reported.
 		 */
-		if (!i915_terminally_wedged(i915))
+		if (!intel_gt_is_wedged(ggtt->vm.gt))
 			return VM_FAULT_SIGBUS;
 		/* else, fall through */
 	case -EAGAIN:
@@ -410,8 +411,8 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 	 * requirement that operations to the GGTT be made holding the RPM
 	 * wakeref.
 	 */
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	mutex_lock(&i915->ggtt.vm.mutex);

 	if (!obj->userfault_count)
 		goto out;
@@ -428,6 +429,7 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 	wmb();

 out:
+	mutex_unlock(&i915->ggtt.vm.mutex);
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }

--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -23,12 +23,13 @@
 */

 #include "display/intel_frontbuffer.h"
-
+#include "gt/intel_gt.h"
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_object.h"
 #include "i915_globals.h"
+#include "i915_trace.h"

 static struct i915_global_object {
 	struct i915_global base;
@@ -45,16 +46,6 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 	return kmem_cache_free(global.slab_objects, obj);
 }

-static void
-frontbuffer_retire(struct i915_active_request *active,
-		   struct i915_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, typeof(*obj), frontbuffer_write);
-
-	intel_fb_obj_flush(obj, ORIGIN_CS);
-}
-
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
@@ -63,17 +54,14 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	spin_lock_init(&obj->vma.lock);
 	INIT_LIST_HEAD(&obj->vma.list);

+	INIT_LIST_HEAD(&obj->mm.link);
+
 	INIT_LIST_HEAD(&obj->lut_list);
-	INIT_LIST_HEAD(&obj->batch_pool_link);

 	init_rcu_head(&obj->rcu);

 	obj->ops = ops;

-	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-	i915_active_request_init(&obj->frontbuffer_write,
-				 NULL, frontbuffer_retire);
-
 	obj->mm.madv = I915_MADV_WILLNEED;
 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->mm.get_page.lock);
@@ -146,6 +134,19 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 	}
 }

+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	dma_resv_fini(&obj->base._resv);
+	i915_gem_object_free(obj);
+
+	GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+	atomic_dec(&i915->mm.free_count);
+}
+
 static void __i915_gem_free_objects(struct drm_i915_private *i915,
 				    struct llist_node *freed)
 {
@@ -160,7 +161,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,

 		mutex_lock(&i915->drm.struct_mutex);

-		GEM_BUG_ON(i915_gem_object_is_active(obj));
 		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
 			GEM_BUG_ON(i915_vma_is_active(vma));
 			vma->flags &= ~I915_VMA_PIN_MASK;
@@ -169,110 +169,70 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		GEM_BUG_ON(!list_empty(&obj->vma.list));
 		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));

-		/*
-		 * This serializes freeing with the shrinker. Since the free
-		 * is delayed, first by RCU then by the workqueue, we want the
-		 * shrinker to be able to free pages of unreferenced objects,
-		 * or else we may oom whilst there are plenty of deferred
-		 * freed objects.
-		 */
-		if (i915_gem_object_has_pages(obj) &&
-		    i915_gem_object_is_shrinkable(obj)) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&i915->mm.obj_lock, flags);
-			list_del_init(&obj->mm.link);
-			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
-		}
-
 		mutex_unlock(&i915->drm.struct_mutex);

 		GEM_BUG_ON(atomic_read(&obj->bind_count));
 		GEM_BUG_ON(obj->userfault_count);
-		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
 		GEM_BUG_ON(!list_empty(&obj->lut_list));

-		if (obj->ops->release)
-			obj->ops->release(obj);
-
 		atomic_set(&obj->mm.pages_pin_count, 0);
 		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
 		GEM_BUG_ON(i915_gem_object_has_pages(obj));
+		bitmap_free(obj->bit_17);

 		if (obj->base.import_attach)
 			drm_prime_gem_destroy(&obj->base, NULL);

-		drm_gem_object_release(&obj->base);
+		drm_gem_free_mmap_offset(&obj->base);

-		bitmap_free(obj->bit_17);
-		i915_gem_object_free(obj);
+		if (obj->ops->release)
+			obj->ops->release(obj);

-		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
-		atomic_dec(&i915->mm.free_count);
-
-		cond_resched();
+		/* But keep the pointer alive for RCU-protected lookups */
+		call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
 	}
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }

 void i915_gem_flush_free_objects(struct drm_i915_private *i915)
 {
-	struct llist_node *freed;
+	struct llist_node *freed = llist_del_all(&i915->mm.free_list);

-	/* Free the oldest, most stale object to keep the free_list short */
-	freed = NULL;
-	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
-		/* Only one consumer of llist_del_first() allowed */
-		spin_lock(&i915->mm.free_lock);
-		freed = llist_del_first(&i915->mm.free_list);
-		spin_unlock(&i915->mm.free_lock);
-	}
-	if (unlikely(freed)) {
-		freed->next = NULL;
+	if (unlikely(freed))
 		__i915_gem_free_objects(i915, freed);
-	}
 }

 static void __i915_gem_free_work(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
 		container_of(work, struct drm_i915_private, mm.free_work);
-	struct llist_node *freed;

-	/*
-	 * All file-owned VMA should have been released by this point through
-	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
-	 * However, the object may also be bound into the global GTT (e.g.
-	 * older GPUs without per-process support, or for direct access through
-	 * the GTT either for the user or for scanout). Those VMA still need to
-	 * unbound now.
-	 */
-
-	spin_lock(&i915->mm.free_lock);
-	while ((freed = llist_del_all(&i915->mm.free_list))) {
-		spin_unlock(&i915->mm.free_lock);
-
-		__i915_gem_free_objects(i915, freed);
-		if (need_resched())
-			return;
-
-		spin_lock(&i915->mm.free_lock);
-	}
-	spin_unlock(&i915->mm.free_lock);
+	i915_gem_flush_free_objects(i915);
 }

-static void __i915_gem_free_object_rcu(struct rcu_head *head)
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
 {
-	struct drm_i915_gem_object *obj =
-		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);

+	GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
+
 	/*
-	 * We reuse obj->rcu for the freed list, so we had better not treat
-	 * it like a rcu_head from this point forwards. And we expect all
-	 * objects to be freed via this path.
+	 * Before we free the object, make sure any pure RCU-only
+	 * read-side critical sections are complete, e.g.
+	 * i915_gem_busy_ioctl(). For the corresponding synchronized
+	 * lookup see i915_gem_object_lookup_rcu().
 	 */
-	destroy_rcu_head(&obj->rcu);
+	atomic_inc(&i915->mm.free_count);
+
+	/*
+	 * This serializes freeing with the shrinker. Since the free
+	 * is delayed, first by RCU then by the workqueue, we want the
+	 * shrinker to be able to free pages of unreferenced objects,
+	 * or else we may oom whilst there are plenty of deferred
+	 * freed objects.
+	 */
+	i915_gem_object_make_unshrinkable(obj);

 	/*
 	 * Since we require blocking on struct_mutex to unbind the freed
@@ -288,27 +248,6 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head)
 		queue_work(i915->wq, &i915->mm.free_work);
 }

-void i915_gem_free_object(struct drm_gem_object *gem_obj)
-{
-	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-
-	/*
-	 * Before we free the object, make sure any pure RCU-only
-	 * read-side critical sections are complete, e.g.
-	 * i915_gem_busy_ioctl(). For the corresponding synchronized
-	 * lookup see i915_gem_object_lookup_rcu().
-	 */
-	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
-	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
-}
-
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	return !(obj->cache_level == I915_CACHE_NONE ||
@@ -319,7 +258,6 @@ void
 i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 				   unsigned int flush_domains)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_vma *vma;

 	assert_object_held(obj);
@@ -329,10 +267,10 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,

 	switch (obj->write_domain) {
 	case I915_GEM_DOMAIN_GTT:
-		i915_gem_flush_ggtt_writes(dev_priv);
+		for_each_ggtt_vma(vma, obj)
+			intel_gt_flush_ggtt_writes(vma->vm->gt);

-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+		intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);

 		for_each_ggtt_vma(vma, obj) {
 			if (vma->iomap)
@@ -340,6 +278,7 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,

 			i915_vma_unset_ggtt_write(vma);
 		}
+
 		break;

 	case I915_GEM_DOMAIN_WC:
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -81,7 +81,7 @@ i915_gem_object_lookup(struct drm_file *file, u32 handle)
 }

 __deprecated
-extern struct drm_gem_object *
+struct drm_gem_object *
 drm_gem_object_lookup(struct drm_file *file, u32 handle);

 __attribute__((nonnull))
@@ -99,22 +99,22 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
 	__drm_gem_object_put(&obj->base);
 }

-#define assert_object_held(obj) reservation_object_assert_held((obj)->base.resv)
+#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)

 static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
 {
-	reservation_object_lock(obj->base.resv, NULL);
+	dma_resv_lock(obj->base.resv, NULL);
 }

 static inline int
 i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
 {
-	return reservation_object_lock_interruptible(obj->base.resv, NULL);
+	return dma_resv_lock_interruptible(obj->base.resv, NULL);
 }

 static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
 {
-	reservation_object_unlock(obj->base.resv);
+	dma_resv_unlock(obj->base.resv);
 }

 struct dma_fence *
@@ -158,16 +158,10 @@ i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
 	return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
 }

-static inline bool
-i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
-{
-	return READ_ONCE(obj->active_count);
-}
-
 static inline bool
 i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
 {
-	return READ_ONCE(obj->framebuffer_references);
+	return READ_ONCE(obj->frontbuffer);
 }

 static inline unsigned int
@@ -373,7 +367,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 	struct dma_fence *fence;

 	rcu_read_lock();
-	fence = reservation_object_get_excl_rcu(obj->base.resv);
+	fence = dma_resv_get_excl_rcu(obj->base.resv);
 	rcu_read_unlock();

 	if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
@@ -400,6 +394,10 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     unsigned int flags);
 void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);

+void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
+void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
+
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	if (obj->cache_dirty)
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -3,59 +3,136 @@
 * Copyright © 2019 Intel Corporation
 */

+#include "i915_drv.h"
+#include "gt/intel_context.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_pool.h"
+#include "gt/intel_gt.h"
+#include "i915_gem_clflush.h"
 #include "i915_gem_object_blt.h"

-#include "i915_gem_clflush.h"
-#include "intel_drv.h"
-
-int intel_emit_vma_fill_blt(struct i915_request *rq,
-			    struct i915_vma *vma,
-			    u32 value)
+struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
+					 struct i915_vma *vma,
+					 u32 value)
 {
-	u32 *cs;
+	struct drm_i915_private *i915 = ce->vm->i915;
+	const u32 block_size = S16_MAX * PAGE_SIZE;
+	struct intel_engine_pool_node *pool;
+	struct i915_vma *batch;
+	u64 offset;
+	u64 count;
+	u64 rem;
+	u32 size;
+	u32 *cmd;
+	int err;

-	cs = intel_ring_begin(rq, 8);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
+	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
+	intel_engine_pm_get(ce->engine);

-	if (INTEL_GEN(rq->i915) >= 8) {
-		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
-		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
-		*cs++ = 0;
-		*cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
-		*cs++ = lower_32_bits(vma->node.start);
-		*cs++ = upper_32_bits(vma->node.start);
-		*cs++ = value;
-		*cs++ = MI_NOOP;
-	} else {
-		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
-		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
-		*cs++ = 0;
-		*cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
-		*cs++ = vma->node.start;
-		*cs++ = value;
-		*cs++ = MI_NOOP;
-		*cs++ = MI_NOOP;
+	count = div_u64(vma->size, block_size);
+	size = (1 + 8 * count) * sizeof(u32);
+	size = round_up(size, PAGE_SIZE);
+	pool = intel_engine_pool_get(&ce->engine->pool, size);
+	if (IS_ERR(pool)) {
+		err = PTR_ERR(pool);
+		goto out_pm;
 	}

-	intel_ring_advance(rq, cs);
+	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto out_put;
+	}

-	return 0;
+	rem = vma->size;
+	offset = vma->node.start;
+
+	do {
+		u32 size = min_t(u64, rem, block_size);
+
+		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
+
+		if (INTEL_GEN(i915) >= 8) {
+			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
+			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+			*cmd++ = 0;
+			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+			*cmd++ = lower_32_bits(offset);
+			*cmd++ = upper_32_bits(offset);
+			*cmd++ = value;
+		} else {
+			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+			*cmd++ = 0;
+			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+			*cmd++ = offset;
+			*cmd++ = value;
+		}
+
+		/* Allow ourselves to be preempted in between blocks. */
+		*cmd++ = MI_ARB_CHECK;
+
+		offset += size;
+		rem -= size;
+	} while (rem);
+
+	*cmd = MI_BATCH_BUFFER_END;
+	intel_gt_chipset_flush(ce->vm->gt);
+
+	i915_gem_object_unpin_map(pool->obj);
+
+	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_put;
+	}
+
+	err = i915_vma_pin(batch, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_put;
+
+	batch->private = pool;
+	return batch;
+
+out_put:
+	intel_engine_pool_put(pool);
+out_pm:
+	intel_engine_pm_put(ce->engine);
+	return ERR_PTR(err);
+}
+
+int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
+{
+	int err;
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
+	if (unlikely(err))
+		return err;
+
+	return intel_engine_pool_mark_active(vma->private, rq);
+}
+
+void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
+{
+	i915_vma_unpin(vma);
+	intel_engine_pool_put(vma->private);
+	intel_engine_pm_put(ce->engine);
 }

 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 			     struct intel_context *ce,
 			     u32 value)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_gem_context *ctx = ce->gem_context;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct i915_request *rq;
+	struct i915_vma *batch;
 	struct i915_vma *vma;
 	int err;

-	/* XXX: ce->vm please */
-	vma = i915_vma_instance(obj, vm, NULL);
+	vma = i915_vma_instance(obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);

@@ -69,12 +146,22 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 		i915_gem_object_unlock(obj);
 	}

-	rq = i915_request_create(ce);
-	if (IS_ERR(rq)) {
-		err = PTR_ERR(rq);
+	batch = intel_emit_vma_fill_blt(ce, vma, value);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
 		goto out_unpin;
 	}

+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_batch;
+	}
+
+	err = intel_emit_vma_mark_active(batch, rq);
+	if (unlikely(err))
+		goto out_request;
+
 	err = i915_request_await_object(rq, obj, true);
 	if (unlikely(err))
 		goto out_request;
@@ -86,22 +173,229 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 	}

 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unlock(vma);
 	if (unlikely(err))
 		goto out_request;

-	err = intel_emit_vma_fill_blt(rq, vma, value);
+	err = ce->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					0);
 out_request:
 	if (unlikely(err))
 		i915_request_skip(rq, err);

 	i915_request_add(rq);
+out_batch:
+	intel_emit_vma_release(ce, batch);
 out_unpin:
 	i915_vma_unpin(vma);
 	return err;
 }

+struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+					 struct i915_vma *src,
+					 struct i915_vma *dst)
+{
+	struct drm_i915_private *i915 = ce->vm->i915;
+	const u32 block_size = S16_MAX * PAGE_SIZE;
+	struct intel_engine_pool_node *pool;
+	struct i915_vma *batch;
+	u64 src_offset, dst_offset;
+	u64 count, rem;
+	u32 size, *cmd;
+	int err;
+
+	GEM_BUG_ON(src->size != dst->size);
+
+	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
+	intel_engine_pm_get(ce->engine);
+
+	count = div_u64(dst->size, block_size);
+	size = (1 + 11 * count) * sizeof(u32);
+	size = round_up(size, PAGE_SIZE);
+	pool = intel_engine_pool_get(&ce->engine->pool, size);
+	if (IS_ERR(pool)) {
+		err = PTR_ERR(pool);
+		goto out_pm;
+	}
+
+	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto out_put;
+	}
+
+	rem = src->size;
+	src_offset = src->node.start;
+	dst_offset = dst->node.start;
+
+	do {
+		size = min_t(u64, rem, block_size);
+		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
+
+		if (INTEL_GEN(i915) >= 9) {
+			*cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
+			*cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
+			*cmd++ = 0;
+			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+			*cmd++ = lower_32_bits(dst_offset);
+			*cmd++ = upper_32_bits(dst_offset);
+			*cmd++ = 0;
+			*cmd++ = PAGE_SIZE;
+			*cmd++ = lower_32_bits(src_offset);
+			*cmd++ = upper_32_bits(src_offset);
+		} else if (INTEL_GEN(i915) >= 8) {
+			*cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
+			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
+			*cmd++ = 0;
+			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+			*cmd++ = lower_32_bits(dst_offset);
+			*cmd++ = upper_32_bits(dst_offset);
+			*cmd++ = 0;
+			*cmd++ = PAGE_SIZE;
+			*cmd++ = lower_32_bits(src_offset);
+			*cmd++ = upper_32_bits(src_offset);
+		} else {
+			*cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
+			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
+			*cmd++ = dst_offset;
+			*cmd++ = PAGE_SIZE;
+			*cmd++ = src_offset;
+		}
+
+		/* Allow ourselves to be preempted in between blocks. */
+		*cmd++ = MI_ARB_CHECK;
+
+		src_offset += size;
+		dst_offset += size;
+		rem -= size;
+	} while (rem);
+
+	*cmd = MI_BATCH_BUFFER_END;
+	intel_gt_chipset_flush(ce->vm->gt);
+
+	i915_gem_object_unpin_map(pool->obj);
+
+	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_put;
+	}
+
+	err = i915_vma_pin(batch, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_put;
+
+	batch->private = pool;
+	return batch;
+
+out_put:
+	intel_engine_pool_put(pool);
+out_pm:
+	intel_engine_pm_put(ce->engine);
+	return ERR_PTR(err);
+}
+
+static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	if (obj->cache_dirty & ~obj->cache_coherent)
+		i915_gem_clflush_object(obj, 0);
+
+	return i915_request_await_object(rq, obj, write);
+}
+
+int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
+			     struct drm_i915_gem_object *dst,
+			     struct intel_context *ce)
+{
+	struct drm_gem_object *objs[] = { &src->base, &dst->base };
+	struct i915_address_space *vm = ce->vm;
+	struct i915_vma *vma[2], *batch;
+	struct ww_acquire_ctx acquire;
+	struct i915_request *rq;
+	int err, i;
+
+	vma[0] = i915_vma_instance(src, vm, NULL);
+	if (IS_ERR(vma[0]))
+		return PTR_ERR(vma[0]);
+
+	err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
+	if (unlikely(err))
+		return err;
+
+	vma[1] = i915_vma_instance(dst, vm, NULL);
+	if (IS_ERR(vma[1]))
+		goto out_unpin_src;
+
+	err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_unpin_src;
+
+	batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto out_unpin_dst;
+	}
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_batch;
+	}
+
+	err = intel_emit_vma_mark_active(batch, rq);
+	if (unlikely(err))
+		goto out_request;
+
+	err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+	if (unlikely(err))
+		goto out_request;
+
+	for (i = 0; i < ARRAY_SIZE(vma); i++) {
+		err = move_to_gpu(vma[i], rq, i);
+		if (unlikely(err))
+			goto out_unlock;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(vma); i++) {
+		unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
+
+		err = i915_vma_move_to_active(vma[i], rq, flags);
+		if (unlikely(err))
+			goto out_unlock;
+	}
+
+	if (rq->engine->emit_init_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (unlikely(err))
+			goto out_unlock;
+	}
+
+	err = rq->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					0);
+out_unlock:
+	drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+out_request:
+	if (unlikely(err))
+		i915_request_skip(rq, err);
+
+	i915_request_add(rq);
+out_batch:
+	intel_emit_vma_release(ce, batch);
+out_unpin_dst:
+	i915_vma_unpin(vma[1]);
+out_unpin_src:
+	i915_vma_unpin(vma[0]);
+	return err;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_gem_object_blt.c"
 #endif
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -8,17 +8,30 @@

 #include <linux/types.h>

-struct drm_i915_gem_object;
-struct intel_context;
-struct i915_request;
-struct i915_vma;
+#include "gt/intel_context.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_pool.h"
+#include "i915_vma.h"

-int intel_emit_vma_fill_blt(struct i915_request *rq,
-			    struct i915_vma *vma,
-			    u32 value);
+struct drm_i915_gem_object;
+
+struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
+					 struct i915_vma *vma,
+					 u32 value);
+
+struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
+					 struct i915_vma *src,
+					 struct i915_vma *dst);
+
+int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq);
+void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma);

 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 			     struct intel_context *ce,
 			     u32 value);

+int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
+			     struct drm_i915_gem_object *dst,
+			     struct intel_context *ce);
+
 #endif
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -13,6 +13,7 @@
 #include "i915_selftest.h"

 struct drm_i915_gem_object;
+struct intel_fronbuffer;

 /*
 * struct i915_lut_handle tracks the fast lookups from handle to vma used
@@ -114,7 +115,6 @@ struct drm_i915_gem_object {
 	unsigned int userfault_count;
 	struct list_head userfault_link;

-	struct list_head batch_pool_link;
 	I915_SELFTEST_DECLARE(struct list_head st_link);

 	/*
@@ -142,9 +142,7 @@ struct drm_i915_gem_object {
 	 */
 	u16 write_domain;

-	atomic_t frontbuffer_bits;
-	unsigned int frontbuffer_ggtt_origin; /* write once */
-	struct i915_active_request frontbuffer_write;
+	struct intel_frontbuffer *frontbuffer;

 	/** Current tiling stride for the object, if it's tiled. */
 	unsigned int tiling_and_stride;
@@ -154,7 +152,6 @@ struct drm_i915_gem_object {

 	/** Count of VMA actually bound by this object */
 	atomic_t bind_count;
-	unsigned int active_count;
 	/** Count of how many global VMA are currently pinned for use by HW */
 	unsigned int pin_global;

@@ -226,9 +223,6 @@ struct drm_i915_gem_object {
 		bool quirked:1;
 	} mm;

-	/** References from framebuffers, locks out tiling changes. */
-	unsigned int framebuffer_references;
-
 	/** Record of address bit 17 of each page at last unbind. */
 	unsigned long *bit_17;

--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -153,24 +153,13 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 struct sg_table *
 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *pages;

 	pages = fetch_and_zero(&obj->mm.pages);
 	if (IS_ERR_OR_NULL(pages))
 		return pages;

-	if (i915_gem_object_is_shrinkable(obj)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&i915->mm.obj_lock, flags);
-
-		list_del(&obj->mm.link);
-		i915->mm.shrink_count--;
-		i915->mm.shrink_memory -= obj->base.size;
-
-		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
-	}
+	i915_gem_object_make_unshrinkable(obj);

 	if (obj->mm.mapping) {
 		void *ptr;
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -13,6 +13,7 @@
 #include <drm/drm_legacy.h> /* for drm_pci.h! */
 #include <drm/drm_pci.h>

+#include "gt/intel_gt.h"
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
@@ -60,7 +61,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 		vaddr += PAGE_SIZE;
 	}

-	i915_gem_chipset_flush(to_i915(obj->base.dev));
+	intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);

 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st) {
@@ -132,16 +133,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 	drm_pci_free(obj->base.dev, obj->phys_handle);
 }

-static void
-i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+static void phys_release(struct drm_i915_gem_object *obj)
 {
-	i915_gem_object_unpin_pages(obj);
+	fput(obj->base.filp);
 }

 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 	.get_pages = i915_gem_object_get_pages_phys,
 	.put_pages = i915_gem_object_put_pages_phys,
-	.release = i915_gem_object_release_phys,
+
+	.release = phys_release,
 };

 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
@@ -158,7 +159,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	if (obj->ops != &i915_gem_shmem_ops)
 		return -EINVAL;

-	err = i915_gem_object_unbind(obj);
+	err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
 	if (err)
 		return err;

--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -5,6 +5,7 @@
 */

 #include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"

 #include "i915_drv.h"
@@ -33,12 +34,9 @@ static void i915_gem_park(struct drm_i915_private *i915)

 	lockdep_assert_held(&i915->drm.struct_mutex);

-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, i915, id)
 		call_idle_barriers(engine); /* cleanup after wedging */
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-	}

-	i915_timelines_park(i915);
 	i915_vma_parked(i915);

 	i915_globals_park();
@@ -54,7 +52,8 @@ static void idle_work_handler(struct work_struct *work)
 	mutex_lock(&i915->drm.struct_mutex);

 	intel_wakeref_lock(&i915->gt.wakeref);
-	park = !intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work);
+	park = (!intel_wakeref_is_active(&i915->gt.wakeref) &&
+		!work_pending(work));
 	intel_wakeref_unlock(&i915->gt.wakeref);
 	if (park)
 		i915_gem_park(i915);
@@ -105,18 +104,18 @@ static int pm_notifier(struct notifier_block *nb,
 	return NOTIFY_OK;
 }

-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
+static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
-	bool result = !i915_terminally_wedged(i915);
+	bool result = !intel_gt_is_wedged(gt);

 	do {
-		if (i915_gem_wait_for_idle(i915,
+		if (i915_gem_wait_for_idle(gt->i915,
 					   I915_WAIT_LOCKED |
 					   I915_WAIT_FOR_IDLE_BOOST,
 					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 			/* XXX hide warning from gem_eio */
 			if (i915_modparams.reset) {
-				dev_err(i915->drm.dev,
+				dev_err(gt->i915->drm.dev,
 					"Failed to idle engines, declaring wedged!\n");
 				GEM_TRACE_DUMP();
 			}
@@ -125,18 +124,20 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 			 * Forcibly cancel outstanding work and leave
 			 * the gpu quiet.
 			 */
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(gt);
 			result = false;
 		}
-	} while (i915_retire_requests(i915) && result);
+	} while (i915_retire_requests(gt->i915) && result);
+
+	if (intel_gt_pm_wait_for_idle(gt))
+		result = false;

-	GEM_BUG_ON(i915->gt.awake);
 	return result;
 }

 bool i915_gem_load_power_context(struct drm_i915_private *i915)
 {
-	return switch_to_kernel_context_sync(i915);
+	return switch_to_kernel_context_sync(&i915->gt);
 }

 void i915_gem_suspend(struct drm_i915_private *i915)
@@ -157,22 +158,15 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(i915);
+	switch_to_kernel_context_sync(&i915->gt);

 	mutex_unlock(&i915->drm.struct_mutex);

-	/*
-	 * Assert that we successfully flushed all the work and
-	 * reset the GPU back to its idle, low power state.
-	 */
-	GEM_BUG_ON(i915->gt.awake);
-	flush_work(&i915->gem.idle_work);
-
-	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+	cancel_delayed_work_sync(&i915->gt.hangcheck.work);

 	i915_gem_drain_freed_objects(i915);

-	intel_uc_suspend(i915);
+	intel_uc_suspend(&i915->gt.uc);
 }

 static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
@@ -237,7 +231,6 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
 	}
 	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);

-	intel_uc_sanitize(i915);
 	i915_gem_sanitize(i915);
 }

@@ -245,8 +238,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
 {
 	GEM_TRACE("\n");

-	WARN_ON(i915->gt.awake);
-
 	mutex_lock(&i915->drm.struct_mutex);
 	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);

@@ -261,10 +252,10 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	 * guarantee that the context image is complete. So let's just reset
 	 * it and start again.
 	 */
-	if (intel_gt_resume(i915))
+	if (intel_gt_resume(&i915->gt))
 		goto err_wedged;

-	intel_uc_resume(i915);
+	intel_uc_resume(&i915->gt.uc);

 	/* Always reload a context for powersaving. */
 	if (!i915_gem_load_power_context(i915))
@@ -276,10 +267,10 @@ out_unlock:
 	return;

 err_wedged:
-	if (!i915_reset_failed(i915)) {
+	if (!intel_gt_is_wedged(&i915->gt)) {
 		dev_err(i915->drm.dev,
 			"Failed to re-initialize GPU, declaring it wedged!\n");
-		i915_gem_set_wedged(i915);
+		intel_gt_set_wedged(&i915->gt);
 	}
 	goto out_unlock;
 }
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -10,6 +10,7 @@
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
+#include "i915_trace.h"

 /*
 * Move pages to appropriate lru and release the pagevec, decrementing the
@@ -414,6 +415,11 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
 	return 0;
 }

+static void shmem_release(struct drm_i915_gem_object *obj)
+{
+	fput(obj->base.filp);
+}
+
 const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 		 I915_GEM_OBJECT_IS_SHRINKABLE,
@@ -424,6 +430,8 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 	.writeback = shmem_writeback,

 	.pwrite = shmem_pwrite,
+
+	.release = shmem_release,
 };

 static int create_shmem(struct drm_i915_private *i915,
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -88,10 +88,18 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
 }

-static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
+static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
+			      unsigned long shrink)
 {
-	if (i915_gem_object_unbind(obj) == 0)
+	unsigned long flags;
+
+	flags = 0;
+	if (shrink & I915_SHRINK_ACTIVE)
+		flags = I915_GEM_OBJECT_UNBIND_ACTIVE;
+
+	if (i915_gem_object_unbind(obj, flags) == 0)
 		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+
 	return !i915_gem_object_has_pages(obj);
 }

@@ -169,7 +177,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	 */

 	trace_i915_gem_shrink(i915, target, shrink);
-	i915_retire_requests(i915);

 	/*
 	 * Unbinding of objects will require HW access; Let us not wake the
@@ -230,8 +237,7 @@ i915_gem_shrink(struct drm_i915_private *i915,
 				continue;

 			if (!(shrink & I915_SHRINK_ACTIVE) &&
-			    (i915_gem_object_is_active(obj) ||
-			     i915_gem_object_is_framebuffer(obj)))
+			    i915_gem_object_is_framebuffer(obj))
 				continue;

 			if (!(shrink & I915_SHRINK_BOUND) &&
@@ -246,7 +252,7 @@ i915_gem_shrink(struct drm_i915_private *i915,

 			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);

-			if (unsafe_drop_pages(obj)) {
+			if (unsafe_drop_pages(obj, shrink)) {
 				/* May arrive from get_pages on another bo */
 				mutex_lock_nested(&obj->mm.lock,
 						  I915_MM_SHRINKER);
@@ -269,8 +275,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	if (shrink & I915_SHRINK_BOUND)
 		intel_runtime_pm_put(&i915->runtime_pm, wakeref);

-	i915_retire_requests(i915);
-
 	shrinker_unlock(i915, unlock);

 	if (nr_scanned)
@@ -427,12 +431,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	if (!shrinker_lock(i915, 0, &unlock))
 		return NOTIFY_DONE;

-	/* Force everything onto the inactive lists */
-	if (i915_gem_wait_for_idle(i915,
-				   I915_WAIT_LOCKED,
-				   MAX_SCHEDULE_TIMEOUT))
-		goto out;
-
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
 					       I915_SHRINK_BOUND |
@@ -455,20 +453,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	}
 	mutex_unlock(&i915->ggtt.vm.mutex);

-out:
 	shrinker_unlock(i915, unlock);

 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
 }

-/**
- * i915_gem_shrinker_register - Register the i915 shrinker
- * @i915: i915 device
- *
- * This function registers and sets up the i915 shrinker and OOM handler.
- */
-void i915_gem_shrinker_register(struct drm_i915_private *i915)
+void i915_gem_driver_register__shrinker(struct drm_i915_private *i915)
 {
 	i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
 	i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
@@ -483,13 +474,7 @@ void i915_gem_shrinker_register(struct drm_i915_private *i915)
 	WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier));
 }

-/**
- * i915_gem_shrinker_unregister - Unregisters the i915 shrinker
- * @i915: i915 device
- *
- * This function unregisters the i915 shrinker and OOM handler.
- */
-void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
+void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915)
 {
 	WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
 	WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier));
@@ -533,3 +518,61 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 	if (unlock)
 		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
 }
+
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
+void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * We can only be called while the pages are pinned or when
+	 * the pages are released. If pinned, we should only be called
+	 * from a single caller under controlled conditions; and on release
+	 * only one caller may release us. Neither the two may cross.
+	 */
+	if (!list_empty(&obj->mm.link)) { /* pinned by caller */
+		struct drm_i915_private *i915 = obj_to_i915(obj);
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		GEM_BUG_ON(list_empty(&obj->mm.link));
+
+		list_del_init(&obj->mm.link);
+		i915->mm.shrink_count--;
+		i915->mm.shrink_memory -= obj->base.size;
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+					      struct list_head *head)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	GEM_BUG_ON(!list_empty(&obj->mm.link));
+
+	if (i915_gem_object_is_shrinkable(obj)) {
+		struct drm_i915_private *i915 = obj_to_i915(obj);
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		GEM_BUG_ON(!kref_read(&obj->base.refcount));
+
+		list_add_tail(&obj->mm.link, head);
+		i915->mm.shrink_count++;
+		i915->mm.shrink_memory += obj->base.size;
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_make_shrinkable(obj,
+					  &obj_to_i915(obj)->mm.shrink_list);
+}
+
+void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_make_shrinkable(obj,
+					  &obj_to_i915(obj)->mm.purge_list);
+}
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_SHRINKER_H__
+#define __I915_GEM_SHRINKER_H__
+
+#include <linux/bits.h>
+
+struct drm_i915_private;
+struct mutex;
+
+/* i915_gem_shrinker.c */
+unsigned long i915_gem_shrink(struct drm_i915_private *i915,
+			      unsigned long target,
+			      unsigned long *nr_scanned,
+			      unsigned flags);
+#define I915_SHRINK_UNBOUND	BIT(0)
+#define I915_SHRINK_BOUND	BIT(1)
+#define I915_SHRINK_ACTIVE	BIT(2)
+#define I915_SHRINK_VMAPS	BIT(3)
+#define I915_SHRINK_WRITEBACK	BIT(4)
+
+unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
+void i915_gem_driver_register__shrinker(struct drm_i915_private *i915);
+void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915);
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+				    struct mutex *mutex);
+
+#endif /* __I915_GEM_SHRINKER_H__ */
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -11,6 +11,7 @@
 #include <drm/i915_drm.h>

 #include "i915_drv.h"
+#include "i915_gem_stolen.h"

 /*
 * The BIOS typically reserves some of the system's memory for the exclusive
@@ -362,12 +363,16 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 	mutex_init(&dev_priv->mm.stolen_lock);

 	if (intel_vgpu_active(dev_priv)) {
-		DRM_INFO("iGVT-g active, disabling use of stolen memory\n");
+		dev_notice(dev_priv->drm.dev,
+			   "%s, disabling use of stolen memory\n",
+			   "iGVT-g active");
 		return 0;
 	}

 	if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) {
-		DRM_INFO("DMAR active, disabling use of stolen memory\n");
+		dev_notice(dev_priv->drm.dev,
+			   "%s, disabling use of stolen memory\n",
+			   "DMAR active");
 		return 0;
 	}

@@ -529,8 +534,6 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)

 	GEM_BUG_ON(!stolen);

-	__i915_gem_object_unpin_pages(obj);
-
 	i915_gem_stolen_remove_node(dev_priv, stolen);
 	kfree(stolen);
 }
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_STOLEN_H__
+#define __I915_GEM_STOLEN_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_mm_node;
+struct drm_i915_gem_object;
+
+int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
+				struct drm_mm_node *node, u64 size,
+				unsigned alignment);
+int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
+					 struct drm_mm_node *node, u64 size,
+					 unsigned alignment, u64 start,
+					 u64 end);
+void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
+				 struct drm_mm_node *node);
+int i915_gem_init_stolen(struct drm_i915_private *dev_priv);
+void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv);
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+			      resource_size_t size);
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
+					       resource_size_t stolen_offset,
+					       resource_size_t gtt_offset,
+					       resource_size_t size);
+
+#endif /* __I915_GEM_STOLEN_H__ */
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -41,7 +41,7 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 	long ret;

 	/* ABI: return -EIO if already wedged */
-	ret = i915_terminally_wedged(to_i915(dev));
+	ret = intel_gt_terminally_wedged(&to_i915(dev)->gt);
 	if (ret)
 		return ret;

--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -12,11 +12,10 @@

 #include <drm/i915_drm.h>

+#include "i915_drv.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
-#include "i915_trace.h"
-#include "intel_drv.h"

 struct i915_mm_struct {
 	struct mm_struct *mm;
@@ -150,7 +149,8 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 			}
 		}

-		ret = i915_gem_object_unbind(obj);
+		ret = i915_gem_object_unbind(obj,
+					     I915_GEM_OBJECT_UNBIND_ACTIVE);
 		if (ret == 0)
 			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
 		i915_gem_object_put(obj);
@@ -662,6 +662,14 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
 	__i915_gem_object_release_shmem(obj, pages, true);
 	i915_gem_gtt_finish_pages(obj, pages);

+	/*
+	 * We always mark objects as dirty when they are used by the GPU,
+	 * just in case. However, if we set the vma as being read-only we know
+	 * that the object will never have been written to.
+	 */
+	if (i915_gem_object_is_readonly(obj))
+		obj->mm.dirty = false;
+
 	for_each_sgt_page(page, sgt_iter, pages) {
 		if (obj->mm.dirty)
 			set_page_dirty(page);
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -31,11 +31,10 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 }

 static long
-i915_gem_object_wait_reservation(struct reservation_object *resv,
+i915_gem_object_wait_reservation(struct dma_resv *resv,
 				 unsigned int flags,
 				 long timeout)
 {
-	unsigned int seq = __read_seqcount_begin(&resv->seq);
 	struct dma_fence *excl;
 	bool prune_fences = false;

@@ -44,7 +43,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 		unsigned int count, i;
 		int ret;

-		ret = reservation_object_get_fences_rcu(resv,
+		ret = dma_resv_get_fences_rcu(resv,
 							&excl, &count, &shared);
 		if (ret)
 			return ret;
@@ -73,7 +72,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 		 */
 		prune_fences = count && timeout >= 0;
 	} else {
-		excl = reservation_object_get_excl_rcu(resv);
+		excl = dma_resv_get_excl_rcu(resv);
 	}

 	if (excl && timeout >= 0)
@@ -83,15 +82,12 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,

 	/*
 	 * Opportunistically prune the fences iff we know they have *all* been
-	 * signaled and that the reservation object has not been changed (i.e.
-	 * no new fences have been added).
+	 * signaled.
 	 */
-	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
-		if (reservation_object_trylock(resv)) {
-			if (!__read_seqcount_retry(&resv->seq, seq))
-				reservation_object_add_excl_fence(resv, NULL);
-			reservation_object_unlock(resv);
-		}
+	if (prune_fences && dma_resv_trylock(resv)) {
+		if (dma_resv_test_signaled_rcu(resv, true))
+			dma_resv_add_excl_fence(resv, NULL);
+		dma_resv_unlock(resv);
 	}

 	return timeout;
@@ -144,7 +140,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 		unsigned int count, i;
 		int ret;

-		ret = reservation_object_get_fences_rcu(obj->base.resv,
+		ret = dma_resv_get_fences_rcu(obj->base.resv,
 							&excl, &count, &shared);
 		if (ret)
 			return ret;
@@ -156,7 +152,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,

 		kfree(shared);
 	} else {
-		excl = reservation_object_get_excl_rcu(obj->base.resv);
+		excl = dma_resv_get_excl_rcu(obj->base.resv);
 	}

 	if (excl) {
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -20,32 +20,19 @@ int i915_gemfs_init(struct drm_i915_private *i915)
 	if (!type)
 		return -ENODEV;

+	/*
+	 * By creating our own shmemfs mountpoint, we can pass in
+	 * mount flags that better match our usecase.
+	 *
+	 * One example, although it is probably better with a per-file
+	 * control, is selecting huge page allocations ("huge=within_size").
+	 * Currently unused due to bandwidth issues (slow reads) on Broadwell+.
+	 */
+
 	gemfs = kern_mount(type);
 	if (IS_ERR(gemfs))
 		return PTR_ERR(gemfs);

-	/*
-	 * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most
-	 * likely 2M. Note that within_size may overallocate huge-pages, if say
-	 * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under
-	 * memory pressure shmem should split any huge-pages which can be
-	 * shrunk.
-	 */
-
-	if (has_transparent_hugepage()) {
-		struct super_block *sb = gemfs->mnt_sb;
-		/* FIXME: Disabled until we get W/A for read BW issue. */
-		char options[] = "huge=never";
-		int flags = 0;
-		int err;
-
-		err = sb->s_op->remount_fs(sb, &flags, options);
-		if (err) {
-			kern_unmount(gemfs);
-			return err;
-		}
-	}
-
 	i915->mm.gemfs = gemfs;

 	return 0;
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -10,6 +10,8 @@

 #include "gem/i915_gem_pm.h"

+#include "gt/intel_gt.h"
+
 #include "igt_gem_utils.h"
 #include "mock_context.h"

@@ -877,126 +879,22 @@ out_object_put:
 	return err;
 }

-static struct i915_vma *
-gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val)
-{
-	struct drm_i915_private *i915 = vma->vm->i915;
-	const int gen = INTEL_GEN(i915);
-	unsigned int count = vma->size >> PAGE_SHIFT;
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *batch;
-	unsigned int size;
-	u32 *cmd;
-	int n;
-	int err;
-
-	size = (1 + 4 * count) * sizeof(u32);
-	size = round_up(size, PAGE_SIZE);
-	obj = i915_gem_object_create_internal(i915, size);
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
-	if (IS_ERR(cmd)) {
-		err = PTR_ERR(cmd);
-		goto err;
-	}
-
-	offset += vma->node.start;
-
-	for (n = 0; n < count; n++) {
-		if (gen >= 8) {
-			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
-			*cmd++ = lower_32_bits(offset);
-			*cmd++ = upper_32_bits(offset);
-			*cmd++ = val;
-		} else if (gen >= 4) {
-			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
-				(gen < 6 ? MI_USE_GGTT : 0);
-			*cmd++ = 0;
-			*cmd++ = offset;
-			*cmd++ = val;
-		} else {
-			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-			*cmd++ = offset;
-			*cmd++ = val;
-		}
-
-		offset += PAGE_SIZE;
-	}
-
-	*cmd = MI_BATCH_BUFFER_END;
-	i915_gem_chipset_flush(i915);
-
-	i915_gem_object_unpin_map(obj);
-
-	batch = i915_vma_instance(obj, vma->vm, NULL);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto err;
-	}
-
-	err = i915_vma_pin(batch, 0, 0, PIN_USER);
-	if (err)
-		goto err;
-
-	return batch;
-
-err:
-	i915_gem_object_put(obj);
-
-	return ERR_PTR(err);
-}
-
 static int gpu_write(struct i915_vma *vma,
 		     struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine,
-		     u32 dword,
-		     u32 value)
+		     u32 dw,
+		     u32 val)
 {
-	struct i915_request *rq;
-	struct i915_vma *batch;
 	int err;

-	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
-
-	batch = gpu_write_dw(vma, dword * sizeof(u32), value);
-	if (IS_ERR(batch))
-		return PTR_ERR(batch);
-
-	rq = igt_request_alloc(ctx, engine);
-	if (IS_ERR(rq)) {
-		err = PTR_ERR(rq);
-		goto err_batch;
-	}
-
-	i915_vma_lock(batch);
-	err = i915_vma_move_to_active(batch, rq, 0);
-	i915_vma_unlock(batch);
+	i915_gem_object_lock(vma->obj);
+	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+	i915_gem_object_unlock(vma->obj);
 	if (err)
-		goto err_request;
+		return err;

-	i915_vma_lock(vma);
-	err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
-	if (err == 0)
-		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
-	if (err)
-		goto err_request;
-
-	err = engine->emit_bb_start(rq,
-				    batch->node.start, batch->node.size,
-				    0);
-err_request:
-	if (err)
-		i915_request_skip(rq, err);
-	i915_request_add(rq);
-err_batch:
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
-	i915_vma_put(batch);
-
-	return err;
+	return igt_gpu_fill_dw(vma, ctx, engine, dw * sizeof(u32),
+			       vma->size >> PAGE_SHIFT, val);
 }

 static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
@@ -1037,8 +935,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
 			    u64 size, u64 offset,
 			    u32 dword, u32 val)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
 	struct i915_vma *vma;
 	int err;
@@ -1421,6 +1318,9 @@ static int igt_ppgtt_pin_update(void *arg)
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int n;
 	int first, last;
 	int err;

@@ -1518,11 +1418,20 @@ static int igt_ppgtt_pin_update(void *arg)
 	 * land in the now stale 2M page.
 	 */

-	err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf);
-	if (err)
-		goto out_unpin;
+	n = 0;
+	for_each_engine(engine, dev_priv, id) {
+		if (!intel_engine_can_store_dword(engine))
+			continue;

-	err = cpu_check(obj, 0, 0xdeadbeaf);
+		err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf);
+		if (err)
+			goto out_unpin;
+	}
+	while (n--) {
+		err = cpu_check(obj, n, 0xdeadbeaf);
+		if (err)
+			goto out_unpin;
+	}

 out_unpin:
 	i915_vma_unpin(vma);
@@ -1598,8 +1507,11 @@ static int igt_shrink_thp(void *arg)
 	struct drm_i915_private *i915 = ctx->i915;
 	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	struct i915_vma *vma;
 	unsigned int flags = PIN_USER;
+	unsigned int n;
 	int err;

 	/*
@@ -1635,9 +1547,15 @@ static int igt_shrink_thp(void *arg)
 	if (err)
 		goto out_unpin;

-	err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf);
-	if (err)
-		goto out_unpin;
+	n = 0;
+	for_each_engine(engine, i915, id) {
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf);
+		if (err)
+			goto out_unpin;
+	}

 	i915_vma_unpin(vma);

@@ -1662,7 +1580,12 @@ static int igt_shrink_thp(void *arg)
 	if (err)
 		goto out_close;

-	err = cpu_check(obj, 0, 0xdeadbeaf);
+	while (n--) {
+		err = cpu_check(obj, n, 0xdeadbeaf);
+		if (err)
+			goto out_unpin;
+	}
+

 out_unpin:
 	i915_vma_unpin(vma);
@@ -1726,7 +1649,7 @@ out_unlock:
 	return err;
 }

-int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
+int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_shrink_thp),
@@ -1741,22 +1664,22 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	intel_wakeref_t wakeref;
 	int err;

-	if (!HAS_PPGTT(dev_priv)) {
+	if (!HAS_PPGTT(i915)) {
 		pr_info("PPGTT not supported, skipping live-selftests\n");
 		return 0;
 	}

-	if (i915_terminally_wedged(dev_priv))
+	if (intel_gt_is_wedged(&i915->gt))
 		return 0;

-	file = mock_file(dev_priv);
+	file = mock_file(i915);
 	if (IS_ERR(file))
 		return PTR_ERR(file);

-	mutex_lock(&dev_priv->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);

-	ctx = live_context(dev_priv, file);
+	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out_unlock;
@@ -1768,10 +1691,10 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 	err = i915_subtests(tests, ctx);

 out_unlock:
-	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);

-	mock_file_free(dev_priv, file);
+	mock_file_free(i915, file);

 	return err;
 }
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -5,14 +5,17 @@

 #include "i915_selftest.h"

+#include "gt/intel_gt.h"
+
 #include "selftests/igt_flush_test.h"
 #include "selftests/mock_drm.h"
+#include "huge_gem_object.h"
 #include "mock_context.h"

 static int igt_client_fill(void *arg)
 {
-	struct intel_context *ce = arg;
-	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_private *i915 = arg;
+	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
 	struct drm_i915_gem_object *obj;
 	struct rnd_state prng;
 	IGT_TIMEOUT(end);
@@ -22,15 +25,19 @@ static int igt_client_fill(void *arg)
 	prandom_seed_state(&prng, i915_selftest.random_seed);

 	do {
-		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		const u32 max_block_size = S16_MAX * PAGE_SIZE;
+		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
+		u32 phys_sz = sz % (max_block_size + 1);
 		u32 val = prandom_u32_state(&prng);
 		u32 i;

 		sz = round_up(sz, PAGE_SIZE);
+		phys_sz = round_up(phys_sz, PAGE_SIZE);

-		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+			 phys_sz, sz, val);

-		obj = i915_gem_object_create_internal(i915, sz);
+		obj = huge_gem_object(i915, phys_sz, sz);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
 			goto err_flush;
@@ -52,7 +59,8 @@ static int igt_client_fill(void *arg)
 		 * values after we do the set_to_cpu_domain and pick it up as a
 		 * test failure.
 		 */
-		memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+		memset32(vaddr, val ^ 0xdeadbeaf,
+			 huge_gem_object_phys_size(obj) / sizeof(u32));

 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			obj->cache_dirty = true;
@@ -63,24 +71,13 @@ static int igt_client_fill(void *arg)
 		if (err)
 			goto err_unpin;

-		/*
-		 * XXX: For now do the wait without the object resv lock to
-		 * ensure we don't deadlock.
-		 */
-		err = i915_gem_object_wait(obj,
-					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_ALL,
-					   MAX_SCHEDULE_TIMEOUT);
-		if (err)
-			goto err_unpin;
-
 		i915_gem_object_lock(obj);
 		err = i915_gem_object_set_to_cpu_domain(obj, false);
 		i915_gem_object_unlock(obj);
 		if (err)
 			goto err_unpin;

-		for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+		for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
 			if (vaddr[i] != val) {
 				pr_err("vaddr[%u]=%x, expected=%x\n", i,
 				       vaddr[i], val);
@@ -100,11 +97,6 @@ err_unpin:
 err_put:
 	i915_gem_object_put(obj);
 err_flush:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	if (err == -ENOMEM)
 		err = 0;

@@ -117,11 +109,11 @@ int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_client_fill),
 	};

-	if (i915_terminally_wedged(i915))
+	if (intel_gt_is_wedged(&i915->gt))
 		return 0;

 	if (!HAS_ENGINE(i915, BCS0))
 		return 0;

-	return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+	return i915_live_subtests(tests, i915);
 }
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -6,6 +6,8 @@

 #include <linux/prime_numbers.h>

+#include "gt/intel_gt.h"
+
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"

@@ -226,7 +228,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	intel_ring_advance(rq, cs);

 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unlock(vma);
 	i915_vma_unpin(vma);

@@ -242,12 +246,15 @@ static bool always_valid(struct drm_i915_private *i915)

 static bool needs_fence_registers(struct drm_i915_private *i915)
 {
-	return !i915_terminally_wedged(i915);
+	return !intel_gt_is_wedged(&i915->gt);
 }

 static bool needs_mi_store_dword(struct drm_i915_private *i915)
 {
-	if (i915_terminally_wedged(i915))
+	if (intel_gt_is_wedged(&i915->gt))
+		return false;
+
+	if (!HAS_ENGINE(i915, RCS0))
 		return false;

 	return intel_engine_can_store_dword(i915->engine[RCS0]);
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -7,6 +7,7 @@
 #include <linux/prime_numbers.h>

 #include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"

@@ -31,7 +32,6 @@ static int live_nop_switch(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_gem_context **ctx;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct drm_file *file;
 	unsigned long n;
@@ -53,7 +53,6 @@ static int live_nop_switch(void *arg)
 		return PTR_ERR(file);

 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);

 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx) {
@@ -85,7 +84,7 @@ static int live_nop_switch(void *arg)
 		}
 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 			pr_err("Failed to populated %d contexts\n", nctx);
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(&i915->gt);
 			err = -EIO;
 			goto out_unlock;
 		}
@@ -129,7 +128,7 @@ static int live_nop_switch(void *arg)
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Switching between %ld contexts timed out\n",
 				       prime);
-				i915_gem_set_wedged(i915);
+				intel_gt_set_wedged(&i915->gt);
 				break;
 			}

@@ -152,76 +151,11 @@ static int live_nop_switch(void *arg)
 	}

 out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	mock_file_free(i915, file);
 	return err;
 }

-static struct i915_vma *
-gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
-{
-	struct drm_i915_gem_object *obj;
-	const int gen = INTEL_GEN(vma->vm->i915);
-	unsigned long n, size;
-	u32 *cmd;
-	int err;
-
-	size = (4 * count + 1) * sizeof(u32);
-	size = round_up(size, PAGE_SIZE);
-	obj = i915_gem_object_create_internal(vma->vm->i915, size);
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	if (IS_ERR(cmd)) {
-		err = PTR_ERR(cmd);
-		goto err;
-	}
-
-	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
-	offset += vma->node.start;
-
-	for (n = 0; n < count; n++) {
-		if (gen >= 8) {
-			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
-			*cmd++ = lower_32_bits(offset);
-			*cmd++ = upper_32_bits(offset);
-			*cmd++ = value;
-		} else if (gen >= 4) {
-			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
-				(gen < 6 ? MI_USE_GGTT : 0);
-			*cmd++ = 0;
-			*cmd++ = offset;
-			*cmd++ = value;
-		} else {
-			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-			*cmd++ = offset;
-			*cmd++ = value;
-		}
-		offset += PAGE_SIZE;
-	}
-	*cmd = MI_BATCH_BUFFER_END;
-	i915_gem_object_flush_map(obj);
-	i915_gem_object_unpin_map(obj);
-
-	vma = i915_vma_instance(obj, vma->vm, NULL);
-	if (IS_ERR(vma)) {
-		err = PTR_ERR(vma);
-		goto err;
-	}
-
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
-		goto err;
-
-	return vma;
-
-err:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
 {
 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
@@ -237,12 +171,8 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		    struct intel_engine_cs *engine,
 		    unsigned int dw)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
-	struct i915_request *rq;
+	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
 	struct i915_vma *vma;
-	struct i915_vma *batch;
-	unsigned int flags;
 	int err;

 	GEM_BUG_ON(obj->base.size > vm->total);
@@ -253,7 +183,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		return PTR_ERR(vma);

 	i915_gem_object_lock(obj);
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
 	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
@@ -262,70 +192,23 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		return err;

-	/* Within the GTT the huge objects maps every page onto
+	/*
+	 * Within the GTT the huge objects maps every page onto
 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
 	 * We set the nth dword within the page using the nth
 	 * mapping via the GTT - this should exercise the GTT mapping
 	 * whilst checking that each context provides a unique view
 	 * into the object.
 	 */
-	batch = gpu_fill_dw(vma,
-			    (dw * real_page_count(obj)) << PAGE_SHIFT |
-			    (dw * sizeof(u32)),
-			    real_page_count(obj),
-			    dw);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto err_vma;
-	}
-
-	rq = igt_request_alloc(ctx, engine);
-	if (IS_ERR(rq)) {
-		err = PTR_ERR(rq);
-		goto err_batch;
-	}
-
-	flags = 0;
-	if (INTEL_GEN(vm->i915) <= 5)
-		flags |= I915_DISPATCH_SECURE;
-
-	err = engine->emit_bb_start(rq,
-				    batch->node.start, batch->node.size,
-				    flags);
-	if (err)
-		goto err_request;
-
-	i915_vma_lock(batch);
-	err = i915_vma_move_to_active(batch, rq, 0);
-	i915_vma_unlock(batch);
-	if (err)
-		goto skip_request;
-
-	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
-	if (err)
-		goto skip_request;
-
-	i915_request_add(rq);
-
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
-	i915_vma_put(batch);
-
+	err = igt_gpu_fill_dw(vma,
+			      ctx,
+			      engine,
+			      (dw * real_page_count(obj)) << PAGE_SHIFT |
+			      (dw * sizeof(u32)),
+			      real_page_count(obj),
+			      dw);
 	i915_vma_unpin(vma);

-	return 0;
-
-skip_request:
-	i915_request_skip(rq, err);
-err_request:
-	i915_request_add(rq);
-err_batch:
-	i915_vma_unpin(batch);
-	i915_vma_put(batch);
-err_vma:
-	i915_vma_unpin(vma);
 	return err;
 }

@@ -431,6 +314,9 @@ create_test_object(struct i915_gem_context *ctx,
 	u64 size;
 	int err;

+	/* Keep in GEM's good graces */
+	i915_retire_requests(ctx->i915);
+
 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);

@@ -507,7 +393,6 @@ static int igt_ctx_exec(void *arg)
 		dw = 0;
 		while (!time_after(jiffies, end_time)) {
 			struct i915_gem_context *ctx;
-			intel_wakeref_t wakeref;

 			ctx = live_context(i915, file);
 			if (IS_ERR(ctx)) {
@@ -523,8 +408,7 @@ static int igt_ctx_exec(void *arg)
 				}
 			}

-			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-				err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -565,6 +449,8 @@ out_unlock:
 		mock_file_free(i915, file);
 		if (err)
 			return err;
+
+		i915_gem_drain_freed_objects(i915);
 	}

 	return 0;
@@ -623,7 +509,6 @@ static int igt_shared_ctx_exec(void *arg)
 		ncontexts = 0;
 		while (!time_after(jiffies, end_time)) {
 			struct i915_gem_context *ctx;
-			intel_wakeref_t wakeref;

 			ctx = kernel_context(i915);
 			if (IS_ERR(ctx)) {
@@ -642,9 +527,7 @@ static int igt_shared_ctx_exec(void *arg)
 				}
 			}

-			err = 0;
-			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-				err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -678,6 +561,10 @@ static int igt_shared_ctx_exec(void *arg)

 			dw += rem;
 		}
+
+		mutex_unlock(&i915->drm.struct_mutex);
+		i915_gem_drain_freed_objects(i915);
+		mutex_lock(&i915->drm.struct_mutex);
 	}
 out_test:
 	if (igt_live_test_end(&t))
@@ -746,7 +633,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,

 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));

-	vma = i915_vma_instance(obj, ce->gem_context->vm, NULL);
+	vma = i915_vma_instance(obj, ce->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);

@@ -779,13 +666,17 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 		goto err_request;

 	i915_vma_lock(batch);
-	err = i915_vma_move_to_active(batch, rq, 0);
+	err = i915_request_await_object(rq, batch->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(batch, rq, 0);
 	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;

 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
@@ -820,8 +711,7 @@ err_vma:
 #define TEST_RESET	BIT(2)

 static int
-__sseu_prepare(struct drm_i915_private *i915,
-	       const char *name,
+__sseu_prepare(const char *name,
 	       unsigned int flags,
 	       struct intel_context *ce,
 	       struct igt_spinner **spin)
@@ -837,14 +727,11 @@ __sseu_prepare(struct drm_i915_private *i915,
 	if (!*spin)
 		return -ENOMEM;

-	ret = igt_spinner_init(*spin, i915);
+	ret = igt_spinner_init(*spin, ce->engine->gt);
 	if (ret)
 		goto err_free;

-	rq = igt_spinner_create_request(*spin,
-					ce->gem_context,
-					ce->engine,
-					MI_NOOP);
+	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto err_fini;
@@ -870,8 +757,7 @@ err_free:
 }

 static int
-__read_slice_count(struct drm_i915_private *i915,
-		   struct intel_context *ce,
+__read_slice_count(struct intel_context *ce,
 		   struct drm_i915_gem_object *obj,
 		   struct igt_spinner *spin,
 		   u32 *rpcs)
@@ -900,7 +786,7 @@ __read_slice_count(struct drm_i915_private *i915,
 		return ret;
 	}

-	if (INTEL_GEN(i915) >= 11) {
+	if (INTEL_GEN(ce->engine->i915) >= 11) {
 		s_mask = GEN11_RPCS_S_CNT_MASK;
 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
 	} else {
@@ -943,8 +829,7 @@ __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
 }

 static int
-__sseu_finish(struct drm_i915_private *i915,
-	      const char *name,
+__sseu_finish(const char *name,
 	      unsigned int flags,
 	      struct intel_context *ce,
 	      struct drm_i915_gem_object *obj,
@@ -956,19 +841,18 @@ __sseu_finish(struct drm_i915_private *i915,
 	int ret = 0;

 	if (flags & TEST_RESET) {
-		ret = i915_reset_engine(ce->engine, "sseu");
+		ret = intel_engine_reset(ce->engine, "sseu");
 		if (ret)
 			goto out;
 	}

-	ret = __read_slice_count(i915, ce, obj,
+	ret = __read_slice_count(ce, obj,
 				 flags & TEST_RESET ? NULL : spin, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
 	if (ret)
 		goto out;

-	ret = __read_slice_count(i915, ce->engine->kernel_context, obj,
-				 NULL, &rpcs);
+	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");

 out:
@@ -976,11 +860,12 @@ out:
 		igt_spinner_end(spin);

 	if ((flags & TEST_IDLE) && ret == 0) {
-		ret = i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT);
+		ret = i915_gem_wait_for_idle(ce->engine->i915,
+					     0, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;

-		ret = __read_slice_count(i915, ce, obj, NULL, &rpcs);
+		ret = __read_slice_count(ce, obj, NULL, &rpcs);
 		ret = __check_rpcs(name, rpcs, ret, expected,
 				   "Context", " after idle!");
 	}
@@ -989,8 +874,7 @@ out:
 }

 static int
-__sseu_test(struct drm_i915_private *i915,
-	    const char *name,
+__sseu_test(const char *name,
 	    unsigned int flags,
 	    struct intel_context *ce,
 	    struct drm_i915_gem_object *obj,
@@ -999,7 +883,7 @@ __sseu_test(struct drm_i915_private *i915,
 	struct igt_spinner *spin = NULL;
 	int ret;

-	ret = __sseu_prepare(i915, name, flags, ce, &spin);
+	ret = __sseu_prepare(name, flags, ce, &spin);
 	if (ret)
 		return ret;

@@ -1007,7 +891,7 @@ __sseu_test(struct drm_i915_private *i915,
 	if (ret)
 		goto out_spin;

-	ret = __sseu_finish(i915, name, flags, ce, obj,
+	ret = __sseu_finish(name, flags, ce, obj,
 			    hweight32(sseu.slice_mask), spin);

 out_spin:
@@ -1025,35 +909,33 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	       unsigned int flags)
 {
 	struct intel_engine_cs *engine = i915->engine[RCS0];
-	struct intel_sseu default_sseu = engine->sseu;
 	struct drm_i915_gem_object *obj;
 	struct i915_gem_context *ctx;
 	struct intel_context *ce;
 	struct intel_sseu pg_sseu;
-	intel_wakeref_t wakeref;
 	struct drm_file *file;
 	int ret;

-	if (INTEL_GEN(i915) < 9)
+	if (INTEL_GEN(i915) < 9 || !engine)
 		return 0;

 	if (!RUNTIME_INFO(i915)->sseu.has_slice_pg)
 		return 0;

-	if (hweight32(default_sseu.slice_mask) < 2)
+	if (hweight32(engine->sseu.slice_mask) < 2)
 		return 0;

 	/*
 	 * Gen11 VME friendly power-gated configuration with half enabled
 	 * sub-slices.
 	 */
-	pg_sseu = default_sseu;
+	pg_sseu = engine->sseu;
 	pg_sseu.slice_mask = 1;
 	pg_sseu.subslice_mask =
-		~(~0 << (hweight32(default_sseu.subslice_mask) / 2));
+		~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));

 	pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
-		name, flags, hweight32(default_sseu.slice_mask),
+		name, flags, hweight32(engine->sseu.slice_mask),
 		hweight32(pg_sseu.slice_mask));

 	file = mock_file(i915);
@@ -1061,7 +943,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		return PTR_ERR(file);

 	if (flags & TEST_RESET)
-		igt_global_reset_lock(i915);
+		igt_global_reset_lock(&i915->gt);

 	mutex_lock(&i915->drm.struct_mutex);

@@ -1078,12 +960,10 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		goto out_unlock;
 	}

-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	ce = i915_gem_context_get_engine(ctx, RCS0);
 	if (IS_ERR(ce)) {
 		ret = PTR_ERR(ce);
-		goto out_rpm;
+		goto out_put;
 	}

 	ret = intel_context_pin(ce);
@@ -1091,22 +971,22 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		goto out_context;

 	/* First set the default mask. */
-	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
+	ret = __sseu_test(name, flags, ce, obj, engine->sseu);
 	if (ret)
 		goto out_fail;

 	/* Then set a power-gated configuration. */
-	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
+	ret = __sseu_test(name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;

 	/* Back to defaults. */
-	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
+	ret = __sseu_test(name, flags, ce, obj, engine->sseu);
 	if (ret)
 		goto out_fail;

 	/* One last power-gated configuration for the road. */
-	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
+	ret = __sseu_test(name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;

@@ -1117,15 +997,14 @@ out_fail:
 	intel_context_unpin(ce);
 out_context:
 	intel_context_put(ce);
-out_rpm:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+out_put:
 	i915_gem_object_put(obj);

 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);

 	if (flags & TEST_RESET)
-		igt_global_reset_unlock(i915);
+		igt_global_reset_unlock(&i915->gt);

 	mock_file_free(i915, file);

@@ -1194,7 +1073,7 @@ static int igt_ctx_readonly(void *arg)
 		goto out_unlock;
 	}

-	vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm;
+	vm = ctx->vm ?: &i915->ggtt.alias->vm;
 	if (!vm || !vm->has_read_only) {
 		err = 0;
 		goto out_unlock;
@@ -1207,8 +1086,6 @@ static int igt_ctx_readonly(void *arg)
 		unsigned int id;

 		for_each_engine(engine, i915, id) {
-			intel_wakeref_t wakeref;
-
 			if (!intel_engine_can_store_dword(engine))
 				continue;

@@ -1223,9 +1100,7 @@ static int igt_ctx_readonly(void *arg)
 					i915_gem_object_set_readonly(obj);
 			}

-			err = 0;
-			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
-				err = gpu_fill(obj, ctx, engine, dw);
+			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
@@ -1347,7 +1222,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 		goto err_request;

 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, 0);
+	err = i915_request_await_object(rq, vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, 0);
 	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
@@ -1444,7 +1321,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 		goto err_request;

 	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
@@ -1488,7 +1367,6 @@ static int igt_vm_isolation(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx_a, *ctx_b;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
@@ -1535,8 +1413,6 @@ static int igt_vm_isolation(void *arg)
 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
 	vm_total -= I915_GTT_PAGE_SIZE;

-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for_each_engine(engine, i915, id) {
 		IGT_TIMEOUT(end_time);
@@ -1551,7 +1427,7 @@ static int igt_vm_isolation(void *arg)

 			div64_u64_rem(i915_prandom_u64_state(&prng),
 				      vm_total, &offset);
-			offset &= -sizeof(u32);
+			offset = round_down(offset, alignof_dword);
 			offset += I915_GTT_PAGE_SIZE;

 			err = write_to_scratch(ctx_a, engine,
@@ -1560,7 +1436,7 @@ static int igt_vm_isolation(void *arg)
 				err = read_from_scratch(ctx_b, engine,
 							offset, &value);
 			if (err)
-				goto out_rpm;
+				goto out_unlock;

 			if (value) {
 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
@@ -1569,7 +1445,7 @@ static int igt_vm_isolation(void *arg)
 				       lower_32_bits(offset),
 				       this);
 				err = -EINVAL;
-				goto out_rpm;
+				goto out_unlock;
 			}

 			this++;
@@ -1579,8 +1455,6 @@ static int igt_vm_isolation(void *arg)
 	pr_info("Checked %lu scratch offsets across %d engines\n",
 		count, RUNTIME_INFO(i915)->num_engines);

-out_rpm:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 out_unlock:
 	if (igt_live_test_end(&t))
 		err = -EIO;
@@ -1736,7 +1610,7 @@ int i915_gem_context_mock_selftests(void)
 	return err;
 }

-int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
+int i915_gem_context_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_nop_switch),
@@ -1747,8 +1621,8 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(igt_vm_isolation),
 	};

-	if (i915_terminally_wedged(dev_priv))
+	if (intel_gt_is_wedged(&i915->gt))
 		return 0;

-	return i915_subtests(tests, dev_priv);
+	return i915_live_subtests(tests, i915);
 }
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -20,7 +20,7 @@ static int igt_dmabuf_export(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);

-	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
 	i915_gem_object_put(obj);
 	if (IS_ERR(dmabuf)) {
 		pr_err("i915_gem_prime_export failed with err=%d\n",
@@ -44,7 +44,7 @@ static int igt_dmabuf_import_self(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);

-	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
 	if (IS_ERR(dmabuf)) {
 		pr_err("i915_gem_prime_export failed with err=%d\n",
 		       (int)PTR_ERR(dmabuf));
@@ -219,7 +219,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);

-	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
 	if (IS_ERR(dmabuf)) {
 		pr_err("i915_gem_prime_export failed with err=%d\n",
 		       (int)PTR_ERR(dmabuf));
@@ -266,7 +266,7 @@ static int igt_dmabuf_export_kmap(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);

-	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
 	i915_gem_object_put(obj);
 	if (IS_ERR(dmabuf)) {
 		err = PTR_ERR(dmabuf);
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -6,6 +6,7 @@

 #include <linux/prime_numbers.h>

+#include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "huge_gem_object.h"
 #include "i915_selftest.h"
@@ -143,7 +144,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;

-		i915_gem_flush_ggtt_writes(to_i915(obj->base.dev));
+		intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);

 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
@@ -327,7 +328,8 @@ out:
 static int make_obj_busy(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_request *rq;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	struct i915_vma *vma;
 	int err;

@@ -339,18 +341,25 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 	if (err)
 		return err;

-	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
-	if (IS_ERR(rq)) {
-		i915_vma_unpin(vma);
-		return PTR_ERR(rq);
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = i915_request_create(engine->kernel_context);
+		if (IS_ERR(rq)) {
+			i915_vma_unpin(vma);
+			return PTR_ERR(rq);
+		}
+
+		i915_vma_lock(vma);
+		err = i915_request_await_object(rq, vma->obj, true);
+		if (err == 0)
+			err = i915_vma_move_to_active(vma, rq,
+						      EXEC_OBJECT_WRITE);
+		i915_vma_unlock(vma);
+
+		i915_request_add(rq);
 	}

-	i915_vma_lock(vma);
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	i915_vma_unlock(vma);
-
-	i915_request_add(rq);
-
 	i915_vma_unpin(vma);
 	i915_gem_object_put(obj); /* leave it only alive via its active ref */

@@ -376,9 +385,9 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,

 static void disable_retire_worker(struct drm_i915_private *i915)
 {
-	i915_gem_shrinker_unregister(i915);
+	i915_gem_driver_unregister__shrinker(i915);

-	intel_gt_pm_get(i915);
+	intel_gt_pm_get(&i915->gt);

 	cancel_delayed_work_sync(&i915->gem.retire_work);
 	flush_work(&i915->gem.idle_work);
@@ -386,13 +395,25 @@ static void disable_retire_worker(struct drm_i915_private *i915)

 static void restore_retire_worker(struct drm_i915_private *i915)
 {
-	intel_gt_pm_put(i915);
+	intel_gt_pm_put(&i915->gt);

 	mutex_lock(&i915->drm.struct_mutex);
 	igt_flush_test(i915, I915_WAIT_LOCKED);
 	mutex_unlock(&i915->drm.struct_mutex);

-	i915_gem_shrinker_register(i915);
+	i915_gem_driver_register__shrinker(i915);
+}
+
+static void mmap_offset_lock(struct drm_i915_private *i915)
+	__acquires(&i915->drm.vma_offset_manager->vm_lock)
+{
+	write_lock(&i915->drm.vma_offset_manager->vm_lock);
+}
+
+static void mmap_offset_unlock(struct drm_i915_private *i915)
+	__releases(&i915->drm.vma_offset_manager->vm_lock)
+{
+	write_unlock(&i915->drm.vma_offset_manager->vm_lock);
 }

 static int igt_mmap_offset_exhaustion(void *arg)
@@ -413,7 +434,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
 	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
 		resv.start = hole_start;
 		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
+		mmap_offset_lock(i915);
 		err = drm_mm_reserve_node(mm, &resv);
+		mmap_offset_unlock(i915);
 		if (err) {
 			pr_err("Failed to trim VMA manager, err=%d\n", err);
 			goto out_park;
@@ -458,7 +481,7 @@ static int igt_mmap_offset_exhaustion(void *arg)

 	/* Now fill with busy dead objects that we expect to reap */
 	for (loop = 0; loop < 3; loop++) {
-		if (i915_terminally_wedged(i915))
+		if (intel_gt_is_wedged(&i915->gt))
 			break;

 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
@@ -474,19 +497,12 @@ static int igt_mmap_offset_exhaustion(void *arg)
 			pr_err("[loop %d] Failed to busy the object\n", loop);
 			goto err_obj;
 		}
-
-		/* NB we rely on the _active_ reference to access obj now */
-		GEM_BUG_ON(!i915_gem_object_is_active(obj));
-		err = create_mmap_offset(obj);
-		if (err) {
-			pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
-			       loop, err);
-			goto out;
-		}
 	}

 out:
+	mmap_offset_lock(i915);
 	drm_mm_remove_node(&resv);
+	mmap_offset_unlock(i915);
 out_park:
 	restore_retire_worker(i915);
 	return err;
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -3,16 +3,19 @@
 * Copyright © 2019 Intel Corporation
 */

+#include "gt/intel_gt.h"
+
 #include "i915_selftest.h"

 #include "selftests/igt_flush_test.h"
 #include "selftests/mock_drm.h"
+#include "huge_gem_object.h"
 #include "mock_context.h"

 static int igt_fill_blt(void *arg)
 {
-	struct intel_context *ce = arg;
-	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_private *i915 = arg;
+	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
 	struct drm_i915_gem_object *obj;
 	struct rnd_state prng;
 	IGT_TIMEOUT(end);
@@ -21,16 +24,26 @@ static int igt_fill_blt(void *arg)

 	prandom_seed_state(&prng, i915_selftest.random_seed);

+	/*
+	 * XXX: needs some threads to scale all these tests, also maybe throw
+	 * in submission from higher priority context to see if we are
+	 * preempted for very large objects...
+	 */
+
 	do {
-		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		const u32 max_block_size = S16_MAX * PAGE_SIZE;
+		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
+		u32 phys_sz = sz % (max_block_size + 1);
 		u32 val = prandom_u32_state(&prng);
 		u32 i;

 		sz = round_up(sz, PAGE_SIZE);
+		phys_sz = round_up(phys_sz, PAGE_SIZE);

-		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+			 phys_sz, sz, val);

-		obj = i915_gem_object_create_internal(i915, sz);
+		obj = huge_gem_object(i915, phys_sz, sz);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
 			goto err_flush;
@@ -46,7 +59,8 @@ static int igt_fill_blt(void *arg)
 		 * Make sure the potentially async clflush does its job, if
 		 * required.
 		 */
-		memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+		memset32(vaddr, val ^ 0xdeadbeaf,
+			 huge_gem_object_phys_size(obj) / sizeof(u32));

 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			obj->cache_dirty = true;
@@ -63,7 +77,7 @@ static int igt_fill_blt(void *arg)
 		if (err)
 			goto err_unpin;

-		for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+		for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
 			if (vaddr[i] != val) {
 				pr_err("vaddr[%u]=%x, expected=%x\n", i,
 				       vaddr[i], val);
@@ -83,11 +97,111 @@ err_unpin:
 err_put:
 	i915_gem_object_put(obj);
 err_flush:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
+	if (err == -ENOMEM)
+		err = 0;

+	return err;
+}
+
+static int igt_copy_blt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_context *ce = i915->engine[BCS0]->kernel_context;
+	struct drm_i915_gem_object *src, *dst;
+	struct rnd_state prng;
+	IGT_TIMEOUT(end);
+	u32 *vaddr;
+	int err = 0;
+
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+
+	do {
+		const u32 max_block_size = S16_MAX * PAGE_SIZE;
+		u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
+		u32 phys_sz = sz % (max_block_size + 1);
+		u32 val = prandom_u32_state(&prng);
+		u32 i;
+
+		sz = round_up(sz, PAGE_SIZE);
+		phys_sz = round_up(phys_sz, PAGE_SIZE);
+
+		pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
+			 phys_sz, sz, val);
+
+		src = huge_gem_object(i915, phys_sz, sz);
+		if (IS_ERR(src)) {
+			err = PTR_ERR(src);
+			goto err_flush;
+		}
+
+		vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_src;
+		}
+
+		memset32(vaddr, val,
+			 huge_gem_object_phys_size(src) / sizeof(u32));
+
+		i915_gem_object_unpin_map(src);
+
+		if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+			src->cache_dirty = true;
+
+		dst = huge_gem_object(i915, phys_sz, sz);
+		if (IS_ERR(dst)) {
+			err = PTR_ERR(dst);
+			goto err_put_src;
+		}
+
+		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_dst;
+		}
+
+		memset32(vaddr, val ^ 0xdeadbeaf,
+			 huge_gem_object_phys_size(dst) / sizeof(u32));
+
+		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+			dst->cache_dirty = true;
+
+		mutex_lock(&i915->drm.struct_mutex);
+		err = i915_gem_object_copy_blt(src, dst, ce);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err)
+			goto err_unpin;
+
+		i915_gem_object_lock(dst);
+		err = i915_gem_object_set_to_cpu_domain(dst, false);
+		i915_gem_object_unlock(dst);
+		if (err)
+			goto err_unpin;
+
+		for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) {
+			if (vaddr[i] != val) {
+				pr_err("vaddr[%u]=%x, expected=%x\n", i,
+				       vaddr[i], val);
+				err = -EINVAL;
+				goto err_unpin;
+			}
+		}
+
+		i915_gem_object_unpin_map(dst);
+
+		i915_gem_object_put(src);
+		i915_gem_object_put(dst);
+	} while (!time_after(jiffies, end));
+
+	goto err_flush;
+
+err_unpin:
+	i915_gem_object_unpin_map(dst);
+err_put_dst:
+	i915_gem_object_put(dst);
+err_put_src:
+	i915_gem_object_put(src);
+err_flush:
 	if (err == -ENOMEM)
 		err = 0;

@@ -98,13 +212,14 @@ int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_fill_blt),
+		SUBTEST(igt_copy_blt),
 	};

-	if (i915_terminally_wedged(i915))
+	if (intel_gt_is_wedged(&i915->gt))
 		return 0;

 	if (!HAS_ENGINE(i915, BCS0))
 		return 0;

-	return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+	return i915_live_subtests(tests, i915);
 }
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -9,6 +9,8 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
+#include "i915_vma.h"
+#include "i915_drv.h"

 #include "i915_request.h"

@@ -23,7 +25,7 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ce = i915_gem_context_get_engine(ctx, engine->id);
+	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
 	if (IS_ERR(ce))
 		return ERR_CAST(ce);

@@ -32,3 +34,140 @@ igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)

 	return rq;
 }
+
+struct i915_vma *
+igt_emit_store_dw(struct i915_vma *vma,
+		  u64 offset,
+		  unsigned long count,
+		  u32 val)
+{
+	struct drm_i915_gem_object *obj;
+	const int gen = INTEL_GEN(vma->vm->i915);
+	unsigned long n, size;
+	u32 *cmd;
+	int err;
+
+	size = (4 * count + 1) * sizeof(u32);
+	size = round_up(size, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vma->vm->i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
+	offset += vma->node.start;
+
+	for (n = 0; n < count; n++) {
+		if (gen >= 8) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
+			*cmd++ = lower_32_bits(offset);
+			*cmd++ = upper_32_bits(offset);
+			*cmd++ = val;
+		} else if (gen >= 4) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
+				(gen < 6 ? MI_USE_GGTT : 0);
+			*cmd++ = 0;
+			*cmd++ = offset;
+			*cmd++ = val;
+		} else {
+			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+			*cmd++ = offset;
+			*cmd++ = val;
+		}
+		offset += PAGE_SIZE;
+	}
+	*cmd = MI_BATCH_BUFFER_END;
+	i915_gem_object_unpin_map(obj);
+
+	vma = i915_vma_instance(obj, vma->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err;
+
+	return vma;
+
+err:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+int igt_gpu_fill_dw(struct i915_vma *vma,
+		    struct i915_gem_context *ctx,
+		    struct intel_engine_cs *engine,
+		    u64 offset,
+		    unsigned long count,
+		    u32 val)
+{
+	struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm;
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	unsigned int flags;
+	int err;
+
+	GEM_BUG_ON(vma->size > vm->total);
+	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+	batch = igt_emit_store_dw(vma, offset, count, val);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	flags = 0;
+	if (INTEL_GEN(vm->i915) <= 5)
+		flags |= I915_DISPATCH_SECURE;
+
+	err = engine->emit_bb_start(rq,
+				    batch->node.start, batch->node.size,
+				    flags);
+	if (err)
+		goto err_request;
+
+	i915_vma_lock(batch);
+	err = i915_request_await_object(rq, batch->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
+	if (err)
+		goto skip_request;
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto skip_request;
+
+	i915_request_add(rq);
+
+	i915_vma_unpin(batch);
+	i915_vma_close(batch);
+	i915_vma_put(batch);
+
+	return 0;
+
+skip_request:
+	i915_request_skip(rq, err);
+err_request:
+	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin(batch);
+	i915_vma_put(batch);
+	return err;
+}
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
@@ -7,11 +7,27 @@
 #ifndef __IGT_GEM_UTILS_H__
 #define __IGT_GEM_UTILS_H__

+#include <linux/types.h>
+
 struct i915_request;
 struct i915_gem_context;
 struct intel_engine_cs;
+struct i915_vma;

 struct i915_request *
 igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);

+struct i915_vma *
+igt_emit_store_dw(struct i915_vma *vma,
+		  u64 offset,
+		  unsigned long count,
+		  u32 val);
+
+int igt_gpu_fill_dw(struct i915_vma *vma,
+		    struct i915_gem_context *ctx,
+		    struct intel_engine_cs *engine,
+		    u64 offset,
+		    unsigned long count,
+		    u32 val);
+
 #endif /* __IGT_GEM_UTILS_H__ */