Merge tag 'drm-intel-next-2019-04-04' into gvt-next

Merge back drm-intel-next for engine name definition refinement and 54939ea0bd ("drm/i915: Switch to use HWS indices rather than addresses") that would need gvt fixes to depend on. Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
2019-04-16 16:50:34 +08:00
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -61,10 +61,12 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 	}

 	mutex_lock(&dev_priv->drm.struct_mutex);
+	mmio_hw_access_pre(dev_priv);
 	ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node,
 				  size, I915_GTT_PAGE_SIZE,
 				  I915_COLOR_UNEVICTABLE,
 				  start, end, flags);
+	mmio_hw_access_post(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (ret)
 		gvt_err("fail to alloc %s gm space from host\n",
@@ -178,7 +180,7 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu)
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);

-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }

 static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
@@ -204,7 +206,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 	_clear_vgpu_fence(vgpu);

 	mutex_unlock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return 0;
 out_free_fence:
 	gvt_vgpu_err("Failed to alloc fences\n");
@@ -217,7 +219,7 @@ out_free_fence:
 		vgpu->fence.regs[i] = NULL;
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	return -ENOSPC;
 }

@@ -315,7 +317,7 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu)

 	intel_runtime_pm_get(dev_priv);
 	_clear_vgpu_fence(vgpu);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }

 /**
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -391,12 +391,12 @@ struct cmd_info {
 #define F_POST_HANDLE	(1<<2)
 	u32 flag;

-#define R_RCS	(1 << RCS)
-#define R_VCS1  (1 << VCS)
-#define R_VCS2  (1 << VCS2)
+#define R_RCS	BIT(RCS0)
+#define R_VCS1  BIT(VCS0)
+#define R_VCS2  BIT(VCS1)
 #define R_VCS	(R_VCS1 | R_VCS2)
-#define R_BCS	(1 << BCS)
-#define R_VECS	(1 << VECS)
+#define R_BCS	BIT(BCS0)
+#define R_VECS	BIT(VECS0)
 #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS)
 	/* rings that support this cmd: BLT/RCS/VCS/VECS */
 	u16 rings;
@@ -558,7 +558,7 @@ static const struct decode_info decode_info_vebox = {
 };

 static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
-	[RCS] = {
+	[RCS0] = {
 		&decode_info_mi,
 		NULL,
 		NULL,
@@ -569,7 +569,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
 		NULL,
 	},

-	[VCS] = {
+	[VCS0] = {
 		&decode_info_mi,
 		NULL,
 		NULL,
@@ -580,7 +580,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
 		NULL,
 	},

-	[BCS] = {
+	[BCS0] = {
 		&decode_info_mi,
 		NULL,
 		&decode_info_2d,
@@ -591,7 +591,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
 		NULL,
 	},

-	[VECS] = {
+	[VECS0] = {
 		&decode_info_mi,
 		NULL,
 		NULL,
@@ -602,7 +602,7 @@ static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
 		NULL,
 	},

-	[VCS2] = {
+	[VCS1] = {
 		&decode_info_mi,
 		NULL,
 		NULL,
@@ -631,8 +631,7 @@ static inline const struct cmd_info *find_cmd_entry(struct intel_gvt *gvt,
 	struct cmd_entry *e;

 	hash_for_each_possible(gvt->cmd_table, e, hlist, opcode) {
-		if ((opcode == e->info->opcode) &&
-				(e->info->rings & (1 << ring_id)))
+		if (opcode == e->info->opcode && e->info->rings & BIT(ring_id))
 			return e->info;
 	}
 	return NULL;
@@ -943,15 +942,12 @@ static int cmd_handler_lri(struct parser_exec_state *s)
 	struct intel_gvt *gvt = s->vgpu->gvt;

 	for (i = 1; i < cmd_len; i += 2) {
-		if (IS_BROADWELL(gvt->dev_priv) &&
-				(s->ring_id != RCS)) {
-			if (s->ring_id == BCS &&
-					cmd_reg(s, i) ==
-					i915_mmio_reg_offset(DERRMR))
+		if (IS_BROADWELL(gvt->dev_priv) && s->ring_id != RCS0) {
+			if (s->ring_id == BCS0 &&
+			    cmd_reg(s, i) == i915_mmio_reg_offset(DERRMR))
 				ret |= 0;
 			else
-				ret |= (cmd_reg_inhibit(s, i)) ?
-					-EBADRQC : 0;
+				ret |= cmd_reg_inhibit(s, i) ? -EBADRQC : 0;
 		}
 		if (ret)
 			break;
@@ -1047,27 +1043,27 @@ struct cmd_interrupt_event {
 };

 static struct cmd_interrupt_event cmd_interrupt_events[] = {
-	[RCS] = {
+	[RCS0] = {
 		.pipe_control_notify = RCS_PIPE_CONTROL,
 		.mi_flush_dw = INTEL_GVT_EVENT_RESERVED,
 		.mi_user_interrupt = RCS_MI_USER_INTERRUPT,
 	},
-	[BCS] = {
+	[BCS0] = {
 		.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
 		.mi_flush_dw = BCS_MI_FLUSH_DW,
 		.mi_user_interrupt = BCS_MI_USER_INTERRUPT,
 	},
-	[VCS] = {
+	[VCS0] = {
 		.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
 		.mi_flush_dw = VCS_MI_FLUSH_DW,
 		.mi_user_interrupt = VCS_MI_USER_INTERRUPT,
 	},
-	[VCS2] = {
+	[VCS1] = {
 		.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
 		.mi_flush_dw = VCS2_MI_FLUSH_DW,
 		.mi_user_interrupt = VCS2_MI_USER_INTERRUPT,
 	},
-	[VECS] = {
+	[VECS0] = {
 		.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
 		.mi_flush_dw = VECS_MI_FLUSH_DW,
 		.mi_user_interrupt = VECS_MI_USER_INTERRUPT,
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -153,7 +153,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj;

-	obj = i915_gem_object_alloc(dev_priv);
+	obj = i915_gem_object_alloc();
 	if (obj == NULL)
 		return NULL;

--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -47,17 +47,16 @@
 		((a)->lrca == (b)->lrca))

 static int context_switch_events[] = {
-	[RCS] = RCS_AS_CONTEXT_SWITCH,
-	[BCS] = BCS_AS_CONTEXT_SWITCH,
-	[VCS] = VCS_AS_CONTEXT_SWITCH,
-	[VCS2] = VCS2_AS_CONTEXT_SWITCH,
-	[VECS] = VECS_AS_CONTEXT_SWITCH,
+	[RCS0]  = RCS_AS_CONTEXT_SWITCH,
+	[BCS0]  = BCS_AS_CONTEXT_SWITCH,
+	[VCS0]  = VCS_AS_CONTEXT_SWITCH,
+	[VCS1]  = VCS2_AS_CONTEXT_SWITCH,
+	[VECS0] = VECS_AS_CONTEXT_SWITCH,
 };

-static int ring_id_to_context_switch_event(int ring_id)
+static int ring_id_to_context_switch_event(unsigned int ring_id)
 {
-	if (WARN_ON(ring_id < RCS ||
-		    ring_id >= ARRAY_SIZE(context_switch_events)))
+	if (WARN_ON(ring_id >= ARRAY_SIZE(context_switch_events)))
 		return -EINVAL;

 	return context_switch_events[ring_id];
@@ -411,7 +410,7 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 	gvt_dbg_el("complete workload %p status %d\n", workload,
 			workload->status);

-	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
+	if (workload->status || (vgpu->resetting_eng & BIT(ring_id)))
 		goto out;

 	if (!list_empty(workload_q_head(vgpu, ring_id))) {
@@ -527,12 +526,13 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 }

-static void clean_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask)
+static void clean_execlist(struct intel_vgpu *vgpu,
+			   intel_engine_mask_t engine_mask)
 {
-	unsigned int tmp;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine;
 	struct intel_vgpu_submission *s = &vgpu->submission;
+	intel_engine_mask_t tmp;

 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
 		kfree(s->ring_scan_buffer[engine->id]);
@@ -542,18 +542,18 @@ static void clean_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask)
 }

 static void reset_execlist(struct intel_vgpu *vgpu,
-		unsigned long engine_mask)
+			   intel_engine_mask_t engine_mask)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine;
-	unsigned int tmp;
+	intel_engine_mask_t tmp;

 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
 		init_vgpu_execlist(vgpu, engine->id);
 }

 static int init_execlist(struct intel_vgpu *vgpu,
-			 unsigned long engine_mask)
+			 intel_engine_mask_t engine_mask)
 {
 	reset_execlist(vgpu, engine_mask);
 	return 0;
--- a/drivers/gpu/drm/i915/gvt/execlist.h
+++ b/drivers/gpu/drm/i915/gvt/execlist.h
@@ -180,6 +180,6 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu);
 int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id);

 void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
-		unsigned long engine_mask);
+			       intel_engine_mask_t engine_mask);

 #endif /*_GVT_EXECLIST_H_*/
--- a/drivers/gpu/drm/i915/gvt/fb_decoder.c
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c
@@ -231,7 +231,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
 		plane->bpp = skl_pixel_formats[fmt].bpp;
 		plane->drm_format = skl_pixel_formats[fmt].drm_format;
 	} else {
-		plane->tiled = !!(val & DISPPLANE_TILED);
+		plane->tiled = val & DISPPLANE_TILED;
 		fmt = bdw_format_to_drm(val & DISPPLANE_PIXFORMAT_MASK);
 		plane->bpp = bdw_pixel_formats[fmt].bpp;
 		plane->drm_format = bdw_pixel_formats[fmt].drm_format;
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2447,10 +2447,11 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)

 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
 {
-	struct intel_gvt_partial_pte *pos;
+	struct intel_gvt_partial_pte *pos, *next;

-	list_for_each_entry(pos,
-			&vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, list) {
+	list_for_each_entry_safe(pos, next,
+				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
+				 list) {
 		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
 			pos->offset, pos->data);
 		kfree(pos);
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -141,9 +141,9 @@ enum {

 struct intel_vgpu_submission_ops {
 	const char *name;
-	int (*init)(struct intel_vgpu *vgpu, unsigned long engine_mask);
-	void (*clean)(struct intel_vgpu *vgpu, unsigned long engine_mask);
-	void (*reset)(struct intel_vgpu *vgpu, unsigned long engine_mask);
+	int (*init)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask);
+	void (*clean)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask);
+	void (*reset)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask);
 };

 struct intel_vgpu_submission {
@@ -481,7 +481,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
 void intel_gvt_release_vgpu(struct intel_vgpu *vgpu);
 void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
-				 unsigned int engine_mask);
+				 intel_engine_mask_t engine_mask);
 void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu);
 void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu);
 void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu);
@@ -589,7 +589,7 @@ static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)

 static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 }

 /**
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -311,7 +311,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu,
 static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 			    void *p_data, unsigned int bytes)
 {
-	unsigned int engine_mask = 0;
+	intel_engine_mask_t engine_mask = 0;
 	u32 data;

 	write_vreg(vgpu, offset, p_data, bytes);
@@ -323,25 +323,25 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	} else {
 		if (data & GEN6_GRDOM_RENDER) {
 			gvt_dbg_mmio("vgpu%d: request RCS reset\n", vgpu->id);
-			engine_mask |= (1 << RCS);
+			engine_mask |= BIT(RCS0);
 		}
 		if (data & GEN6_GRDOM_MEDIA) {
 			gvt_dbg_mmio("vgpu%d: request VCS reset\n", vgpu->id);
-			engine_mask |= (1 << VCS);
+			engine_mask |= BIT(VCS0);
 		}
 		if (data & GEN6_GRDOM_BLT) {
 			gvt_dbg_mmio("vgpu%d: request BCS Reset\n", vgpu->id);
-			engine_mask |= (1 << BCS);
+			engine_mask |= BIT(BCS0);
 		}
 		if (data & GEN6_GRDOM_VECS) {
 			gvt_dbg_mmio("vgpu%d: request VECS Reset\n", vgpu->id);
-			engine_mask |= (1 << VECS);
+			engine_mask |= BIT(VECS0);
 		}
 		if (data & GEN8_GRDOM_MEDIA2) {
 			gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id);
-			if (HAS_BSD2(vgpu->gvt->dev_priv))
-				engine_mask |= (1 << VCS2);
+			engine_mask |= BIT(VCS1);
 		}
+		engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask;
 	}

 	/* vgpu_lock already hold by emulate mmio r/w */
@@ -1729,7 +1729,7 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 			return 0;

 		ret = intel_vgpu_select_submission_ops(vgpu,
-			       ENGINE_MASK(ring_id),
+			       BIT(ring_id),
 			       INTEL_VGPU_EXECLIST_SUBMISSION);
 		if (ret)
 			return ret;
@@ -1749,19 +1749,19 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu,

 	switch (offset) {
 	case 0x4260:
-		id = RCS;
+		id = RCS0;
 		break;
 	case 0x4264:
-		id = VCS;
+		id = VCS0;
 		break;
 	case 0x4268:
-		id = VCS2;
+		id = VCS1;
 		break;
 	case 0x426c:
-		id = BCS;
+		id = BCS0;
 		break;
 	case 0x4270:
-		id = VECS;
+		id = VECS0;
 		break;
 	default:
 		return -EINVAL;
@@ -1818,7 +1818,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
 	MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \
 	MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \
 	MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \
-	if (HAS_BSD2(dev_priv)) \
+	if (HAS_ENGINE(dev_priv, VCS1)) \
 		MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \
 } while (0)

@@ -1873,7 +1873,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL);

 	MMIO_GM_RDR(_MMIO(0x2148), D_ALL, NULL, NULL);
-	MMIO_GM_RDR(CCID, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(CCID(RENDER_RING_BASE), D_ALL, NULL, NULL);
 	MMIO_GM_RDR(_MMIO(0x12198), D_ALL, NULL, NULL);
 	MMIO_D(GEN7_CXT_SIZE, D_ALL);

@@ -2836,6 +2836,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x21f0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }

--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -47,7 +47,7 @@ struct intel_gvt_mpt {
 	int (*host_init)(struct device *dev, void *gvt, const void *ops);
 	void (*host_exit)(struct device *dev);
 	int (*attach_vgpu)(void *vgpu, unsigned long *handle);
-	void (*detach_vgpu)(unsigned long handle);
+	void (*detach_vgpu)(void *vgpu);
 	int (*inject_msi)(unsigned long handle, u32 addr, u16 data);
 	unsigned long (*from_virt_to_mfn)(void *p);
 	int (*enable_page_track)(unsigned long handle, u64 gfn);
--- a/drivers/gpu/drm/i915/gvt/interrupt.c
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@@ -536,7 +536,7 @@ static void gen8_init_irq(
 	SET_BIT_INFO(irq, 4, VCS_MI_FLUSH_DW, INTEL_GVT_IRQ_INFO_GT1);
 	SET_BIT_INFO(irq, 8, VCS_AS_CONTEXT_SWITCH, INTEL_GVT_IRQ_INFO_GT1);

-	if (HAS_BSD2(gvt->dev_priv)) {
+	if (HAS_ENGINE(gvt->dev_priv, VCS1)) {
 		SET_BIT_INFO(irq, 16, VCS2_MI_USER_INTERRUPT,
 			INTEL_GVT_IRQ_INFO_GT1);
 		SET_BIT_INFO(irq, 20, VCS2_MI_FLUSH_DW,
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1147,7 +1147,7 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
 {
 	unsigned int index;
 	u64 virtaddr;
-	unsigned long req_size, pgoff = 0;
+	unsigned long req_size, pgoff, req_start;
 	pgprot_t pg_prot;
 	struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);

@@ -1165,7 +1165,17 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
 	pg_prot = vma->vm_page_prot;
 	virtaddr = vma->vm_start;
 	req_size = vma->vm_end - vma->vm_start;
-	pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	req_start = pgoff << PAGE_SHIFT;
+
+	if (!intel_vgpu_in_aperture(vgpu, req_start))
+		return -EINVAL;
+	if (req_start + req_size >
+	    vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
+		return -EINVAL;
+
+	pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;

 	return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
 }
@@ -1813,9 +1823,21 @@ static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
 	return 0;
 }

-static void kvmgt_detach_vgpu(unsigned long handle)
+static void kvmgt_detach_vgpu(void *p_vgpu)
 {
-	/* nothing to do here */
+	int i;
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
+
+	if (!vgpu->vdev.region)
+		return;
+
+	for (i = 0; i < vgpu->vdev.num_regions; i++)
+		if (vgpu->vdev.region[i].ops->release)
+			vgpu->vdev.region[i].ops->release(vgpu,
+					&vgpu->vdev.region[i]);
+	vgpu->vdev.num_regions = 0;
+	kfree(vgpu->vdev.region);
+	vgpu->vdev.region = NULL;
 }

 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -41,102 +41,102 @@

 /* Raw offset is appened to each line for convenience. */
 static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = {
-	{RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */
-	{RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
-	{RCS, HWSTAM, 0x0, false}, /* 0x2098 */
-	{RCS, INSTPM, 0xffff, true}, /* 0x20c0 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */
-	{RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */
-	{RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */
-	{RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */
-	{RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */
-	{RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */
-	{RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */
+	{RCS0, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */
+	{RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS0, HWSTAM, 0x0, false}, /* 0x2098 */
+	{RCS0, INSTPM, 0xffff, true}, /* 0x20c0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */
+	{RCS0, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */
+	{RCS0, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */
+	{RCS0, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */
+	{RCS0, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */
+	{RCS0, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */
+	{RCS0, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */

-	{BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
-	{BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
-	{BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */
-	{BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */
-	{BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */
-	{RCS, INVALID_MMIO_REG, 0, false } /* Terminated */
+	{BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
+	{BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
+	{BCS0, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */
+	{BCS0, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */
+	{BCS0, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */
+	{RCS0, INVALID_MMIO_REG, 0, false } /* Terminated */
 };

 static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
-	{RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */
-	{RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
-	{RCS, HWSTAM, 0x0, false}, /* 0x2098 */
-	{RCS, INSTPM, 0xffff, true}, /* 0x20c0 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */
-	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */
-	{RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */
-	{RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */
-	{RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */
-	{RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */
-	{RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */
-	{RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */
+	{RCS0, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */
+	{RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS0, HWSTAM, 0x0, false}, /* 0x2098 */
+	{RCS0, INSTPM, 0xffff, true}, /* 0x20c0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */
+	{RCS0, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */
+	{RCS0, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */
+	{RCS0, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */
+	{RCS0, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */
+	{RCS0, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */
+	{RCS0, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */

-	{RCS, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */
-	{RCS, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */
-	{RCS, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
-	{RCS, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
-	{RCS, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
-	{RCS, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
-	{RCS, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
-	{RCS, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
-	{RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
-	{RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
-	{RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
-	{RCS, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
-	{RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
-	{RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
-	{RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
-	{RCS, TRINVTILEDETCT, 0, false}, /* 0x4dec */
-	{RCS, TRVADR, 0, false}, /* 0x4df0 */
-	{RCS, TRTTE, 0, false}, /* 0x4df4 */
+	{RCS0, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */
+	{RCS0, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */
+	{RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
+	{RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
+	{RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
+	{RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
+	{RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
+	{RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
+	{RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
+	{RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
+	{RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
+	{RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
+	{RCS0, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
+	{RCS0, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
+	{RCS0, TRNULLDETCT, 0, false}, /* 0x4de8 */
+	{RCS0, TRINVTILEDETCT, 0, false}, /* 0x4dec */
+	{RCS0, TRVADR, 0, false}, /* 0x4df0 */
+	{RCS0, TRTTE, 0, false}, /* 0x4df4 */

-	{BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
-	{BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
-	{BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */
-	{BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */
-	{BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */
+	{BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
+	{BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
+	{BCS0, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */
+	{BCS0, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */
+	{BCS0, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */

-	{VCS2, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */
+	{VCS1, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */

-	{VECS, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */
+	{VECS0, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */

-	{RCS, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */
-	{RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
-	{RCS, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */
-	{RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */
+	{RCS0, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */
+	{RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS0, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */
+	{RCS0, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */

-	{RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
-	{RCS, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
+	{RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
+	{RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */

-	{RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
-	{RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
-	{RCS, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */
-	{RCS, INVALID_MMIO_REG, 0, false } /* Terminated */
+	{RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
+	{RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
+	{RCS0, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */
+	{RCS0, INVALID_MMIO_REG, 0, false } /* Terminated */
 };

 static struct {
@@ -149,15 +149,17 @@ static void load_render_mocs(struct drm_i915_private *dev_priv)
 {
 	i915_reg_t offset;
 	u32 regs[] = {
-		[RCS] = 0xc800,
-		[VCS] = 0xc900,
-		[VCS2] = 0xca00,
-		[BCS] = 0xcc00,
-		[VECS] = 0xcb00,
+		[RCS0]  = 0xc800,
+		[VCS0]  = 0xc900,
+		[VCS1]  = 0xca00,
+		[BCS0]  = 0xcc00,
+		[VECS0] = 0xcb00,
 	};
 	int ring_id, i;

 	for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) {
+		if (!HAS_ENGINE(dev_priv, ring_id))
+			continue;
 		offset.reg = regs[ring_id];
 		for (i = 0; i < GEN9_MOCS_SIZE; i++) {
 			gen9_render_mocs.control_table[ring_id][i] =
@@ -299,7 +301,7 @@ int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
 		goto out;

 	/* no MOCS register in context except render engine */
-	if (req->engine->id != RCS)
+	if (req->engine->id != RCS0)
 		goto out;

 	ret = restore_render_mocs_control_for_inhibit(vgpu, req);
@@ -325,15 +327,16 @@ out:
 static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	struct intel_uncore *uncore = &dev_priv->uncore;
 	struct intel_vgpu_submission *s = &vgpu->submission;
 	enum forcewake_domains fw;
 	i915_reg_t reg;
 	u32 regs[] = {
-		[RCS] = 0x4260,
-		[VCS] = 0x4264,
-		[VCS2] = 0x4268,
-		[BCS] = 0x426c,
-		[VECS] = 0x4270,
+		[RCS0]  = 0x4260,
+		[VCS0]  = 0x4264,
+		[VCS1]  = 0x4268,
+		[BCS0]  = 0x426c,
+		[VECS0] = 0x4270,
 	};

 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
@@ -349,21 +352,21 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
 	 * otherwise device can go to RC6 state and interrupt invalidation
 	 * process
 	 */
-	fw = intel_uncore_forcewake_for_reg(dev_priv, reg,
+	fw = intel_uncore_forcewake_for_reg(uncore, reg,
 					    FW_REG_READ | FW_REG_WRITE);
-	if (ring_id == RCS && (INTEL_GEN(dev_priv) >= 9))
+	if (ring_id == RCS0 && INTEL_GEN(dev_priv) >= 9)
 		fw |= FORCEWAKE_RENDER;

-	intel_uncore_forcewake_get(dev_priv, fw);
+	intel_uncore_forcewake_get(uncore, fw);

-	I915_WRITE_FW(reg, 0x1);
+	intel_uncore_write_fw(uncore, reg, 0x1);

-	if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
+	if (wait_for_atomic((intel_uncore_read_fw(uncore, reg) == 0), 50))
 		gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id);
 	else
 		vgpu_vreg_t(vgpu, reg) = 0;

-	intel_uncore_forcewake_put(dev_priv, fw);
+	intel_uncore_forcewake_put(uncore, fw);

 	gvt_dbg_core("invalidate TLB for ring %d\n", ring_id);
 }
@@ -376,11 +379,11 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
 	u32 old_v, new_v;

 	u32 regs[] = {
-		[RCS] = 0xc800,
-		[VCS] = 0xc900,
-		[VCS2] = 0xca00,
-		[BCS] = 0xcc00,
-		[VECS] = 0xcb00,
+		[RCS0]  = 0xc800,
+		[VCS0]  = 0xc900,
+		[VCS1]  = 0xca00,
+		[BCS0]  = 0xcc00,
+		[VECS0] = 0xcb00,
 	};
 	int i;

@@ -388,8 +391,10 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
 		return;

-	if ((IS_KABYLAKE(dev_priv)  || IS_BROXTON(dev_priv)
-		|| IS_COFFEELAKE(dev_priv)) && ring_id == RCS)
+	if (ring_id == RCS0 &&
+	    (IS_KABYLAKE(dev_priv) ||
+	     IS_BROXTON(dev_priv) ||
+	     IS_COFFEELAKE(dev_priv)))
 		return;

 	if (!pre && !gen9_render_mocs.initialized)
@@ -412,7 +417,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
 		offset.reg += 4;
 	}

-	if (ring_id == RCS) {
+	if (ring_id == RCS0) {
 		l3_offset.reg = 0xb020;
 		for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) {
 			if (pre)
@@ -490,7 +495,8 @@ static void switch_mmio(struct intel_vgpu *pre,
 			 * itself.
 			 */
 			if (mmio->in_context &&
-			    !is_inhibit_context(&s->shadow_ctx->__engine[ring_id]))
+			    !is_inhibit_context(intel_context_lookup(s->shadow_ctx,
+								     dev_priv->engine[ring_id])))
 				continue;

 			if (mmio->mask)
@@ -547,9 +553,9 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
 	 * performace for batch mmio read/write, so we need
 	 * handle forcewake mannually.
 	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
 	switch_mmio(pre, next, ring_id);
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 }

 /**
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -99,7 +99,7 @@ static inline void intel_gvt_hypervisor_detach_vgpu(struct intel_vgpu *vgpu)
 	if (!intel_gvt_host.mpt->detach_vgpu)
 		return;

-	intel_gvt_host.mpt->detach_vgpu(vgpu->handle);
+	intel_gvt_host.mpt->detach_vgpu(vgpu);
 }

 #define MSI_CAP_CONTROL(offset) (offset + 2)
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -474,6 +474,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 		}
 	}
 	spin_unlock_bh(&scheduler->mmio_context_lock);
-	intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put_unchecked(dev_priv);
 	mutex_unlock(&vgpu->gvt->sched_lock);
 }
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -93,7 +93,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload,
 		i915_mmio_reg_offset(EU_PERF_CNTL6),
 	};

-	if (workload->ring_id != RCS)
+	if (workload->ring_id != RCS0)
 		return;

 	if (save) {
@@ -149,7 +149,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	COPY_REG_MASKED(ctx_ctrl);
 	COPY_REG(ctx_timestamp);

-	if (ring_id == RCS) {
+	if (ring_id == RCS0) {
 		COPY_REG(bb_per_ctx_ptr);
 		COPY_REG(rcs_indirect_ctx);
 		COPY_REG(rcs_indirect_ctx_offset);
@@ -177,7 +177,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)

 	context_page_num = context_page_num >> PAGE_SHIFT;

-	if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
+	if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS0)
 		context_page_num = 19;

 	i = 2;
@@ -333,6 +333,9 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)

 	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
 	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
+
+	wa_ctx->indirect_ctx.obj = NULL;
+	wa_ctx->indirect_ctx.shadow_va = NULL;
 }

 static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
@@ -357,6 +360,33 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
 	return 0;
 }

+static int
+intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
+	struct i915_request *rq;
+	int ret = 0;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	if (workload->req)
+		goto out;
+
+	rq = i915_request_alloc(engine, shadow_ctx);
+	if (IS_ERR(rq)) {
+		gvt_vgpu_err("fail to allocate gem request\n");
+		ret = PTR_ERR(rq);
+		goto out;
+	}
+	workload->req = i915_request_get(rq);
+out:
+	return ret;
+}
+
 /**
 * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
 * shadow it as well, include ringbuffer,wa_ctx and ctx.
@@ -373,12 +403,11 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
 	struct intel_context *ce;
-	struct i915_request *rq;
 	int ret;

 	lockdep_assert_held(&dev_priv->drm.struct_mutex);

-	if (workload->req)
+	if (workload->shadow)
 		return 0;

 	ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
@@ -411,29 +440,14 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	if (ret)
 		goto err_unpin;

-	if ((workload->ring_id == RCS) &&
-	    (workload->wa_ctx.indirect_ctx.size != 0)) {
+	if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) {
 		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
 		if (ret)
 			goto err_shadow;
 	}

-	rq = i915_request_alloc(engine, shadow_ctx);
-	if (IS_ERR(rq)) {
-		gvt_vgpu_err("fail to allocate gem request\n");
-		ret = PTR_ERR(rq);
-		goto err_shadow;
-	}
-	workload->req = i915_request_get(rq);
-
-	ret = populate_shadow_context(workload);
-	if (ret)
-		goto err_req;
-
+	workload->shadow = true;
 	return 0;
-err_req:
-	rq = fetch_and_zero(&workload->req);
-	i915_request_put(rq);
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
 err_unpin:
@@ -672,23 +686,31 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	mutex_lock(&vgpu->vgpu_lock);
 	mutex_lock(&dev_priv->drm.struct_mutex);

+	ret = intel_gvt_workload_req_alloc(workload);
+	if (ret)
+		goto err_req;
+
 	ret = intel_gvt_scan_and_shadow_workload(workload);
 	if (ret)
 		goto out;

+	ret = populate_shadow_context(workload);
+	if (ret) {
+		release_shadow_wa_ctx(&workload->wa_ctx);
+		goto out;
+	}
+
 	ret = prepare_workload(workload);
-
 out:
-	if (ret)
-		workload->status = ret;
-
 	if (!IS_ERR_OR_NULL(workload->req)) {
 		gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
 				ring_id, workload->req);
 		i915_request_add(workload->req);
 		workload->dispatched = true;
 	}
-
+err_req:
+	if (ret)
+		workload->status = ret;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	mutex_unlock(&vgpu->vgpu_lock);
 	return ret;
@@ -768,7 +790,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 	context_page_num = rq->engine->context_size;
 	context_page_num = context_page_num >> PAGE_SHIFT;

-	if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS)
+	if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS0)
 		context_page_num = 19;

 	i = 2;
@@ -816,13 +838,13 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 }

 void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
-				unsigned long engine_mask)
+				intel_engine_mask_t engine_mask)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine;
 	struct intel_vgpu_workload *pos, *n;
-	unsigned int tmp;
+	intel_engine_mask_t tmp;

 	/* free the unsubmited workloads in the queues. */
 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
@@ -868,8 +890,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 				workload->status = 0;
 		}

-		if (!workload->status && !(vgpu->resetting_eng &
-					   ENGINE_MASK(ring_id))) {
+		if (!workload->status &&
+		    !(vgpu->resetting_eng & BIT(ring_id))) {
 			update_guest_context(workload);

 			for_each_set_bit(event, workload->pending_events,
@@ -892,12 +914,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)

 	list_del_init(&workload->list);

-	if (!workload->status) {
-		release_shadow_batch_buffer(workload);
-		release_shadow_wa_ctx(&workload->wa_ctx);
-	}
-
-	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
+	if (workload->status || vgpu->resetting_eng & BIT(ring_id)) {
 		/* if workload->status is not successful means HW GPU
 		 * has occurred GPU hang or something wrong with i915/GVT,
 		 * and GVT won't inject context switch interrupt to guest.
@@ -911,7 +928,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 		 * cleaned up during the resetting process later, so doing
 		 * the workload clean up here doesn't have any impact.
 		 **/
-		intel_vgpu_clean_workloads(vgpu, ENGINE_MASK(ring_id));
+		intel_vgpu_clean_workloads(vgpu, BIT(ring_id));
 	}

 	workload->complete(workload);
@@ -971,7 +988,7 @@ static int workload_thread(void *priv)
 				workload->ring_id, workload);

 		if (need_force_wake)
-			intel_uncore_forcewake_get(gvt->dev_priv,
+			intel_uncore_forcewake_get(&gvt->dev_priv->uncore,
 					FORCEWAKE_ALL);

 		ret = dispatch_workload(workload);
@@ -993,10 +1010,10 @@ complete:
 		complete_current_workload(gvt, ring_id);

 		if (need_force_wake)
-			intel_uncore_forcewake_put(gvt->dev_priv,
+			intel_uncore_forcewake_put(&gvt->dev_priv->uncore,
 					FORCEWAKE_ALL);

-		intel_runtime_pm_put(gvt->dev_priv);
+		intel_runtime_pm_put_unchecked(gvt->dev_priv);
 		if (ret && (vgpu_is_vm_unhealthy(ret)))
 			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
 	}
@@ -1084,9 +1101,9 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
 	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
 	int i;

-	if (i915_vm_is_48bit(&i915_ppgtt->vm))
+	if (i915_vm_is_4lvl(&i915_ppgtt->vm)) {
 		px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4;
-	else {
+	} else {
 		for (i = 0; i < GEN8_3LVL_PDPES; i++)
 			px_dma(i915_ppgtt->pdp.page_directory[i]) =
 						s->i915_context_pdps[i];
@@ -1120,7 +1137,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
 *
 */
 void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
-		unsigned long engine_mask)
+				 intel_engine_mask_t engine_mask)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;

@@ -1137,7 +1154,7 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
 	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
 	int i;

-	if (i915_vm_is_48bit(&i915_ppgtt->vm))
+	if (i915_vm_is_4lvl(&i915_ppgtt->vm))
 		s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4);
 	else {
 		for (i = 0; i < GEN8_3LVL_PDPES; i++)
@@ -1210,7 +1227,7 @@ out_shadow_ctx:
 *
 */
 int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu,
-				     unsigned long engine_mask,
+				     intel_engine_mask_t engine_mask,
 				     unsigned int interface)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
@@ -1262,6 +1279,9 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu_submission *s = &workload->vgpu->submission;

+	release_shadow_batch_buffer(workload);
+	release_shadow_wa_ctx(&workload->wa_ctx);
+
 	if (workload->shadow_mm)
 		intel_vgpu_mm_put(workload->shadow_mm);

@@ -1417,7 +1437,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 	workload->rb_start = start;
 	workload->rb_ctl = ctl;

-	if (ring_id == RCS) {
+	if (ring_id == RCS0) {
 		intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
 			RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
 		intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
@@ -1450,7 +1470,7 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 		mutex_lock(&dev_priv->drm.struct_mutex);
 		ret = intel_gvt_scan_and_shadow_workload(workload);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
-		intel_runtime_pm_put(dev_priv);
+		intel_runtime_pm_put_unchecked(dev_priv);
 	}

 	if (ret && (vgpu_is_vm_unhealthy(ret))) {
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -83,6 +83,7 @@ struct intel_vgpu_workload {
 	struct i915_request *req;
 	/* if this workload has been dispatched to i915? */
 	bool dispatched;
+	bool shadow;      /* if workload has done shadow of guest request */
 	int status;

 	struct intel_vgpu_mm *shadow_mm;
@@ -141,12 +142,12 @@ void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu);
 int intel_vgpu_setup_submission(struct intel_vgpu *vgpu);

 void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
-				 unsigned long engine_mask);
+				 intel_engine_mask_t engine_mask);

 void intel_vgpu_clean_submission(struct intel_vgpu *vgpu);

 int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu,
-				     unsigned long engine_mask,
+				     intel_engine_mask_t engine_mask,
 				     unsigned int interface);

 extern const struct intel_vgpu_submission_ops
@@ -159,6 +160,6 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload);

 void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
-				unsigned long engine_mask);
+				intel_engine_mask_t engine_mask);

 #endif
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -44,7 +44,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
 	vgpu_vreg_t(vgpu, vgtif_reg(display_ready)) = 0;
 	vgpu_vreg_t(vgpu, vgtif_reg(vgt_id)) = vgpu->id;

-	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT;
+	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_PPGTT;
 	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION;
 	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HUGE_GTT;

@@ -148,10 +148,10 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 		gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
 						   high_avail / vgpu_types[i].high_mm);

-		if (IS_GEN8(gvt->dev_priv))
+		if (IS_GEN(gvt->dev_priv, 8))
 			sprintf(gvt->types[i].name, "GVTg_V4_%s",
 						vgpu_types[i].name);
-		else if (IS_GEN9(gvt->dev_priv))
+		else if (IS_GEN(gvt->dev_priv, 9))
 			sprintf(gvt->types[i].name, "GVTg_V5_%s",
 						vgpu_types[i].name);

@@ -526,11 +526,11 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 * GPU engines. For FLR, engine_mask is ignored.
 */
 void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
-				 unsigned int engine_mask)
+				 intel_engine_mask_t engine_mask)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
-	unsigned int resetting_eng = dmlr ? ALL_ENGINES : engine_mask;
+	intel_engine_mask_t resetting_eng = dmlr ? ALL_ENGINES : engine_mask;

 	gvt_dbg_core("------------------------------------------\n");
 	gvt_dbg_core("resseting vgpu%d, dmlr %d, engine_mask %08x\n",