Merge tag 'drm-for-v4.10' of git://people.freedesktop.org/~airlied/linux

Pull drm updates from Dave Airlie: "This is the main pull request for drm for 4.10 kernel. New drivers: - ZTE VOU display driver (zxdrm) - Amlogic Meson Graphic Controller GXBB/GXL/GXM SoCs (meson) - MXSFB support (mxsfb) Core: - Format handling has been reworked - Better atomic state debugging - drm_mm leak debugging - Atomic explicit fencing support - fbdev helper ops - Documentation updates - MST fbcon fixes Bridge: - Silicon Image SiI8620 driver Panel: - Add support for new simple panels i915: - GVT Device model - Better HDMI2.0 support on skylake - More watermark fixes - GPU idling rework for suspend/resume - DP Audio workarounds - Scheduler prep-work - Opregion CADL handling - GPU scheduler and priority boosting amdgfx/radeon: - Support for virtual devices - New VM manager for non-contig VRAM buffers - UVD powergating - SI register header cleanup - Cursor fixes - Powermanagement fixes nouveau: - Powermangement reworks for better voltage/clock changes - Atomic modesetting support - Displayport Multistream (MST) support. - GP102/104 hang and cursor fixes - GP106 support hisilicon: - hibmc support (BMC chip for aarch64 servers) armada: - add tracing support for overlay change - refactor plane support - de-midlayer the driver omapdrm: - Timing code cleanups rcar-du: - R8A7792/R8A7796 support - Misc fixes. sunxi: - A31 SoC display engine support imx-drm: - YUV format support - Cleanup plane atomic update mali-dp: - Misc fixes dw-hdmi: - Add support for HDMI i2c master controller tegra: - IOMMU support fixes - Error handling fixes tda998x: - Fix connector registration - Improved robustness - Fix infoframe/audio compliance virtio: - fix busid issues - allocate more vbufs qxl: - misc fixes and cleanups. vc4: - Fragment shader threading - ETC1 support - VEC (tv-out) support msm: - A5XX GPU support - Lots of atomic changes tilcdc: - Misc fixes and cleanups. etnaviv: - Fix dma-buf export path - DRAW_INSTANCED support - fix driver on i.MX6SX exynos: - HDMI refactoring fsl-dcu: - fbdev changes" * tag 'drm-for-v4.10' of git://people.freedesktop.org/~airlied/linux: (1343 commits) drm/nouveau/kms/nv50: fix atomic regression on original G80 drm/nouveau/bl: Do not register interface if Apple GMUX detected drm/nouveau/bl: Assign different names to interfaces drm/nouveau/bios/dp: fix handling of LevelEntryTableIndex on DP table 4.2 drm/nouveau/ltc: protect clearing of comptags with mutex drm/nouveau/gr/gf100-: handle GPC/TPC/MPC trap drm/nouveau/core: recognise GP106 chipset drm/nouveau/ttm: wait for bo fence to signal before unmapping vmas drm/nouveau/gr/gf100-: FECS intr handling is not relevant on proprietary ucode drm/nouveau/gr/gf100-: properly ack all FECS error interrupts drm/nouveau/fifo/gf100-: recover from host mmu faults drm: Add fake controlD* symlinks for backwards compat drm/vc4: Don't use drm_put_dev drm/vc4: Document VEC DT binding drm/vc4: Add support for the VEC (Video Encoder) IP drm: Add TV connector states to drm_connector_state drm: Turn DRM_MODE_SUBCONNECTOR_xx definitions into an enum drm/vc4: Fix ->clock_select setting for the VEC encoder drm/amdgpu/dce6: Set MASTER_UPDATE_MODE to 0 in resume_mc_access as well drm/amdgpu: use pin rather than pin_restricted in a few cases ...
2016-12-13 09:35:09 -08:00
parent 7079efc9d3 2cf026ae85
commit 9439b3710d
1010 changed files with 129375 additions and 27317 deletions
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -12,6 +12,7 @@ menuconfig DRM
 	select I2C
 	select I2C_ALGOBIT
 	select DMA_SHARED_BUFFER
+	select SYNC_FILE
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select
@@ -33,6 +34,20 @@ config DRM_DP_AUX_CHARDEV
 	  read and write values to arbitrary DPCD registers on the DP aux
 	  channel.

+config DRM_DEBUG_MM
+	bool "Insert extra checks and debug info into the DRM range managers"
+	default n
+	depends on DRM=y
+	depends on STACKTRACE_SUPPORT
+	select STACKDEPOT
+	help
+	  Enable allocation tracking of memory manager and leak detection on
+	  shutdown.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
 config DRM_KMS_HELPER
 	tristate
 	depends on DRM
@@ -223,6 +238,12 @@ source "drivers/gpu/drm/hisilicon/Kconfig"

 source "drivers/gpu/drm/mediatek/Kconfig"

+source "drivers/gpu/drm/zte/Kconfig"
+
+source "drivers/gpu/drm/mxsfb/Kconfig"
+
+source "drivers/gpu/drm/meson/Kconfig"
+
 # Keep legacy drivers last

 menuconfig DRM_LEGACY
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -9,13 +9,14 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_scatter.o drm_pci.o \
 		drm_platform.o drm_sysfs.o drm_hashtab.o drm_mm.o \
 		drm_crtc.o drm_fourcc.o drm_modes.o drm_edid.o \
-		drm_info.o drm_debugfs.o drm_encoder_slave.o \
+		drm_info.o drm_encoder_slave.o \
 		drm_trace_points.o drm_global.o drm_prime.o \
 		drm_rect.o drm_vma_manager.o drm_flip_work.o \
 		drm_modeset_lock.o drm_atomic.o drm_bridge.o \
 		drm_framebuffer.o drm_connector.o drm_blend.o \
 		drm_encoder.o drm_mode_object.o drm_property.o \
-		drm_plane.o drm_color_mgmt.o
+		drm_plane.o drm_color_mgmt.o drm_print.o \
+		drm_dumb_buffers.o drm_mode_config.o

 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o
@@ -23,6 +24,7 @@ drm-$(CONFIG_PCI) += ati_pcigart.o
 drm-$(CONFIG_DRM_PANEL) += drm_panel.o
 drm-$(CONFIG_OF) += drm_of.o
 drm-$(CONFIG_AGP) += drm_agpsupport.o
+drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o

 drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
 		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
@@ -79,6 +81,7 @@ obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-$(CONFIG_DRM_STI) += sti/
 obj-$(CONFIG_DRM_IMX) += imx/
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
+obj-$(CONFIG_DRM_MESON)	+= meson/
 obj-y			+= i2c/
 obj-y			+= panel/
 obj-y			+= bridge/
@@ -86,3 +89,5 @@ obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/
 obj-$(CONFIG_DRM_ETNAVIV) += etnaviv/
 obj-$(CONFIG_DRM_ARCPGU)+= arc/
 obj-y			+= hisilicon/
+obj-$(CONFIG_DRM_ZTE)	+= zte/
+obj-$(CONFIG_DRM_MXSFB)	+= mxsfb/
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o
+	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o

 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
--- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@@ -90,7 +90,6 @@
 #define ENCODER_OBJECT_ID_INTERNAL_VCE            0x24
 #define ENCODER_OBJECT_ID_INTERNAL_UNIPHY3        0x25
 #define ENCODER_OBJECT_ID_INTERNAL_AMCLK          0x27
-#define ENCODER_OBJECT_ID_VIRTUAL                 0x28

 #define ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO    0xFF

@@ -120,7 +119,6 @@
 #define CONNECTOR_OBJECT_ID_eDP                   0x14
 #define CONNECTOR_OBJECT_ID_MXM                   0x15
 #define CONNECTOR_OBJECT_ID_LVDS_eDP              0x16
-#define CONNECTOR_OBJECT_ID_VIRTUAL               0x17

 /* deleted */

@@ -149,7 +147,6 @@
 #define GRAPH_OBJECT_ENUM_ID5                     0x05
 #define GRAPH_OBJECT_ENUM_ID6                     0x06
 #define GRAPH_OBJECT_ENUM_ID7                     0x07
-#define GRAPH_OBJECT_ENUM_VIRTUAL                 0x08

 /****************************************************/
 /* Graphics Object ID Bit definition                */
@@ -411,10 +408,6 @@
                                                  GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
                                                  ENCODER_OBJECT_ID_HDMI_ANX9805 << OBJECT_ID_SHIFT)

-#define ENCODER_VIRTUAL_ENUM_VIRTUAL            ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
-                                                  GRAPH_OBJECT_ENUM_VIRTUAL << ENUM_ID_SHIFT |\
-                                                  ENCODER_OBJECT_ID_VIRTUAL << OBJECT_ID_SHIFT)
-
 /****************************************************/
 /* Connector Object ID definition - Shared with BIOS */
 /****************************************************/
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -265,14 +265,14 @@ static int acp_hw_init(void *handle)

 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-	const struct amdgpu_ip_block_version *ip_version =
+	const struct amdgpu_ip_block *ip_block =
 		amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);

-	if (!ip_version)
+	if (!ip_block)
 		return -EINVAL;

 	r = amd_acp_hw_init(adev->acp.cgs_device,
-			    ip_version->major, ip_version->minor);
+			    ip_block->version->major, ip_block->version->minor);
 	/* -ENODEV means board uses AZ rather than ACP */
 	if (r == -ENODEV)
 		return 0;
@@ -459,7 +459,7 @@ static int acp_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs acp_ip_funcs = {
+static const struct amd_ip_funcs acp_ip_funcs = {
 	.name = "acp_ip",
 	.early_init = acp_early_init,
 	.late_init = NULL,
@@ -475,3 +475,12 @@ const struct amd_ip_funcs acp_ip_funcs = {
 	.set_clockgating_state = acp_set_clockgating_state,
 	.set_powergating_state = acp_set_powergating_state,
 };
+
+const struct amdgpu_ip_block_version acp_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_ACP,
+	.major = 2,
+	.minor = 2,
+	.rev = 0,
+	.funcs = &acp_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
@@ -37,6 +37,6 @@ struct amdgpu_acp {
 	struct acp_pm_domain *acp_genpd;
 };

-extern const struct amd_ip_funcs acp_ip_funcs;
+extern const struct amdgpu_ip_block_version acp_ip_block;

 #endif /* __AMDGPU_ACP_H__ */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1115,49 +1115,6 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
 	return 0;
 }

-uint32_t amdgpu_atombios_get_engine_clock(struct amdgpu_device *adev)
-{
-	GET_ENGINE_CLOCK_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, GetEngineClock);
-
-	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
-	return le32_to_cpu(args.ulReturnEngineClock);
-}
-
-uint32_t amdgpu_atombios_get_memory_clock(struct amdgpu_device *adev)
-{
-	GET_MEMORY_CLOCK_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, GetMemoryClock);
-
-	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
-	return le32_to_cpu(args.ulReturnMemoryClock);
-}
-
-void amdgpu_atombios_set_engine_clock(struct amdgpu_device *adev,
-				      uint32_t eng_clock)
-{
-	SET_ENGINE_CLOCK_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, SetEngineClock);
-
-	args.ulTargetEngineClock = cpu_to_le32(eng_clock);	/* 10 khz */
-
-	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
-}
-
-void amdgpu_atombios_set_memory_clock(struct amdgpu_device *adev,
-				      uint32_t mem_clock)
-{
-	SET_MEMORY_CLOCK_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, SetMemoryClock);
-
-	if (adev->flags & AMD_IS_APU)
-		return;
-
-	args.ulTargetMemoryClock = cpu_to_le32(mem_clock);	/* 10 khz */
-
-	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
-}
-
 void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
 					     u32 eng_clock, u32 mem_clock)
 {
@@ -1256,45 +1213,6 @@ int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *
 	return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
 }

-void amdgpu_atombios_set_voltage(struct amdgpu_device *adev,
-				 u16 voltage_level,
-				 u8 voltage_type)
-{
-	union set_voltage args;
-	int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
-	u8 frev, crev, volt_index = voltage_level;
-
-	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
-		return;
-
-	/* 0xff01 is a flag rather then an actual voltage */
-	if (voltage_level == 0xff01)
-		return;
-
-	switch (crev) {
-	case 1:
-		args.v1.ucVoltageType = voltage_type;
-		args.v1.ucVoltageMode = SET_ASIC_VOLTAGE_MODE_ALL_SOURCE;
-		args.v1.ucVoltageIndex = volt_index;
-		break;
-	case 2:
-		args.v2.ucVoltageType = voltage_type;
-		args.v2.ucVoltageMode = SET_ASIC_VOLTAGE_MODE_SET_VOLTAGE;
-		args.v2.usVoltageLevel = cpu_to_le16(voltage_level);
-		break;
-	case 3:
-		args.v3.ucVoltageType = voltage_type;
-		args.v3.ucVoltageMode = ATOM_SET_VOLTAGE;
-		args.v3.usVoltageLevel = cpu_to_le16(voltage_level);
-		break;
-	default:
-		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
-		return;
-	}
-
-	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
-}
-
 int amdgpu_atombios_get_leakage_id_from_vbios(struct amdgpu_device *adev,
 					      u16 *leakage_id)
 {
@@ -1784,6 +1702,19 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev)
 		WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]);
 }

+void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
+					      bool hung)
+{
+	u32 tmp = RREG32(mmBIOS_SCRATCH_3);
+
+	if (hung)
+		tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+	else
+		tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+
+	WREG32(mmBIOS_SCRATCH_3, tmp);
+}
+
 /* Atom needs data in little endian format
 * so swap as appropriate when copying data to
 * or from atom. Note that atom operates on
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -163,16 +163,6 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
 					    bool strobe_mode,
 					    struct atom_mpll_param *mpll_param);

-uint32_t amdgpu_atombios_get_engine_clock(struct amdgpu_device *adev);
-uint32_t amdgpu_atombios_get_memory_clock(struct amdgpu_device *adev);
-void amdgpu_atombios_set_engine_clock(struct amdgpu_device *adev,
-				      uint32_t eng_clock);
-void amdgpu_atombios_set_memory_clock(struct amdgpu_device *adev,
-				      uint32_t mem_clock);
-void amdgpu_atombios_set_voltage(struct amdgpu_device *adev,
-				 u16 voltage_level,
-				 u8 voltage_type);
-
 void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
 					     u32 eng_clock, u32 mem_clock);

@@ -206,6 +196,8 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
 void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev);
 void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
 void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev);
+void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
+					      bool hung);

 void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
 int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
 {
 	unsigned long start_jiffies;
 	unsigned long end_jiffies;
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 	int i, r;

 	start_jiffies = jiffies;
@@ -43,17 +43,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
 				       false);
 		if (r)
 			goto exit_do_move;
-		r = fence_wait(fence, false);
+		r = dma_fence_wait(fence, false);
 		if (r)
 			goto exit_do_move;
-		fence_put(fence);
+		dma_fence_put(fence);
 	}
 	end_jiffies = jiffies;
 	r = jiffies_to_msecs(end_jiffies - start_jiffies);

 exit_do_move:
 	if (fence)
-		fence_put(fence);
+		dma_fence_put(fence);
 	return r;
 }

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -70,10 +70,11 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
 		return false;
 	}
 	adev->bios = kmalloc(size, GFP_KERNEL);
-	if (adev->bios == NULL) {
+	if (!adev->bios) {
 		iounmap(bios);
 		return false;
 	}
+	adev->bios_size = size;
 	memcpy_fromio(adev->bios, bios, size);
 	iounmap(bios);
 	return true;
@@ -103,6 +104,7 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
 		pci_unmap_rom(adev->pdev, bios);
 		return false;
 	}
+	adev->bios_size = size;
 	memcpy_fromio(adev->bios, bios, size);
 	pci_unmap_rom(adev->pdev, bios);
 	return true;
@@ -135,6 +137,7 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
 		DRM_ERROR("no memory to allocate for BIOS\n");
 		return false;
 	}
+	adev->bios_size = len;

 	/* read complete BIOS */
 	return amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
@@ -159,6 +162,7 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
 	if (adev->bios == NULL) {
 		return false;
 	}
+	adev->bios_size = size;

 	return true;
 }
@@ -273,6 +277,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
 		kfree(adev->bios);
 		return false;
 	}
+	adev->bios_size = size;
 	return true;
 }
 #else
@@ -334,6 +339,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
 	}

 	adev->bios = kmemdup(&vbios->VbiosContent, vhdr->ImageLength, GFP_KERNEL);
+	adev->bios_size = vhdr->ImageLength;
 	ret = !!adev->bios;

 out_unmap:
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -146,7 +146,8 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
 	switch(type) {
 	case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
 	case CGS_GPU_MEM_TYPE__VISIBLE_FB:
-		flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+		flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 		domain = AMDGPU_GEM_DOMAIN_VRAM;
 		if (max_offset > adev->mc.real_vram_size)
 			return -EINVAL;
@@ -157,7 +158,8 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
 		break;
 	case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
 	case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
-		flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+		flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 		domain = AMDGPU_GEM_DOMAIN_VRAM;
 		if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
 			place.fpfn =
@@ -240,7 +242,7 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h
 	r = amdgpu_bo_reserve(obj, false);
 	if (unlikely(r != 0))
 		return r;
-	r = amdgpu_bo_pin_restricted(obj, AMDGPU_GEM_DOMAIN_GTT,
+	r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains,
 				     min_offset, max_offset, mcaddr);
 	amdgpu_bo_unreserve(obj);
 	return r;
@@ -624,11 +626,11 @@ static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
 	int i, r = -1;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;

-		if (adev->ip_blocks[i].type == block_type) {
-			r = adev->ip_blocks[i].funcs->set_clockgating_state(
+		if (adev->ip_blocks[i].version->type == block_type) {
+			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
 								(void *)adev,
 									state);
 			break;
@@ -645,11 +647,11 @@ static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
 	int i, r = -1;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;

-		if (adev->ip_blocks[i].type == block_type) {
-			r = adev->ip_blocks[i].funcs->set_powergating_state(
+		if (adev->ip_blocks[i].version->type == block_type) {
+			r = adev->ip_blocks[i].version->funcs->set_powergating_state(
 								(void *)adev,
 									state);
 			break;
@@ -685,15 +687,21 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 		result = AMDGPU_UCODE_ID_CP_MEC1;
 		break;
 	case CGS_UCODE_ID_CP_MEC_JT2:
-		if (adev->asic_type == CHIP_TONGA || adev->asic_type == CHIP_POLARIS11
-		  || adev->asic_type == CHIP_POLARIS10)
-			result = AMDGPU_UCODE_ID_CP_MEC2;
-		else
+		/* for VI. JT2 should be the same as JT1, because:
+			1, MEC2 and MEC1 use exactly same FW.
+			2, JT2 is not pached but JT1 is.
+		*/
+		if (adev->asic_type >= CHIP_TOPAZ)
 			result = AMDGPU_UCODE_ID_CP_MEC1;
+		else
+			result = AMDGPU_UCODE_ID_CP_MEC2;
 		break;
 	case CGS_UCODE_ID_RLC_G:
 		result = AMDGPU_UCODE_ID_RLC_G;
 		break;
+	case CGS_UCODE_ID_STORAGE:
+		result = AMDGPU_UCODE_ID_STORAGE;
+		break;
 	default:
 		DRM_ERROR("Firmware type not supported\n");
 	}
@@ -715,7 +723,7 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type)
 {
 	CGS_FUNC_ADEV;
-	uint16_t fw_version;
+	uint16_t fw_version = 0;

 	switch (type) {
 		case CGS_UCODE_ID_SDMA0:
@@ -745,9 +753,11 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
 		case CGS_UCODE_ID_RLC_G:
 			fw_version = adev->gfx.rlc_fw_version;
 			break;
+		case CGS_UCODE_ID_STORAGE:
+			break;
 		default:
 			DRM_ERROR("firmware type %d do not have version\n", type);
-			fw_version = 0;
+			break;
 	}
 	return fw_version;
 }
@@ -776,12 +786,18 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,

 		if ((type == CGS_UCODE_ID_CP_MEC_JT1) ||
 		    (type == CGS_UCODE_ID_CP_MEC_JT2)) {
-			gpu_addr += le32_to_cpu(header->jt_offset) << 2;
+			gpu_addr += ALIGN(le32_to_cpu(header->header.ucode_size_bytes), PAGE_SIZE);
 			data_size = le32_to_cpu(header->jt_size) << 2;
 		}
-		info->mc_addr = gpu_addr;
+
+		info->kptr = ucode->kaddr;
 		info->image_size = data_size;
+		info->mc_addr = gpu_addr;
 		info->version = (uint16_t)le32_to_cpu(header->header.ucode_version);
+
+		if (CGS_UCODE_ID_CP_MEC == type)
+			info->image_size = (header->jt_offset) << 2;
+
 		info->fw_version = amdgpu_get_firmware_version(cgs_device, type);
 		info->feature_version = (uint16_t)le32_to_cpu(header->ucode_feature_version);
 	} else {
@@ -860,6 +876,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 	return 0;
 }

+static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
+{
+	CGS_FUNC_ADEV;
+	return amdgpu_sriov_vf(adev);
+}
+
 static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 					struct cgs_system_info *sys_info)
 {
@@ -1213,6 +1235,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	amdgpu_cgs_notify_dpm_enabled,
 	amdgpu_cgs_call_acpi_method,
 	amdgpu_cgs_query_system_info,
+	amdgpu_cgs_is_virtualization_enabled
 };

 static const struct cgs_os_ops amdgpu_cgs_os_ops = {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1517,88 +1517,6 @@ static const struct drm_connector_funcs amdgpu_connector_edp_funcs = {
 	.force = amdgpu_connector_dvi_force,
 };

-static struct drm_encoder *
-amdgpu_connector_virtual_encoder(struct drm_connector *connector)
-{
-	int enc_id = connector->encoder_ids[0];
-	struct drm_encoder *encoder;
-	int i;
-	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
-		if (connector->encoder_ids[i] == 0)
-			break;
-
-		encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]);
-		if (!encoder)
-			continue;
-
-		if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL)
-			return encoder;
-	}
-
-	/* pick the first one */
-	if (enc_id)
-		return drm_encoder_find(connector->dev, enc_id);
-	return NULL;
-}
-
-static int amdgpu_connector_virtual_get_modes(struct drm_connector *connector)
-{
-	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
-
-	if (encoder) {
-		amdgpu_connector_add_common_modes(encoder, connector);
-	}
-
-	return 0;
-}
-
-static int amdgpu_connector_virtual_mode_valid(struct drm_connector *connector,
-					   struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
-static int
-amdgpu_connector_virtual_dpms(struct drm_connector *connector, int mode)
-{
-	return 0;
-}
-
-static enum drm_connector_status
-
-amdgpu_connector_virtual_detect(struct drm_connector *connector, bool force)
-{
-	return connector_status_connected;
-}
-
-static int
-amdgpu_connector_virtual_set_property(struct drm_connector *connector,
-				  struct drm_property *property,
-				  uint64_t val)
-{
-	return 0;
-}
-
-static void amdgpu_connector_virtual_force(struct drm_connector *connector)
-{
-	return;
-}
-
-static const struct drm_connector_helper_funcs amdgpu_connector_virtual_helper_funcs = {
-	.get_modes = amdgpu_connector_virtual_get_modes,
-	.mode_valid = amdgpu_connector_virtual_mode_valid,
-	.best_encoder = amdgpu_connector_virtual_encoder,
-};
-
-static const struct drm_connector_funcs amdgpu_connector_virtual_funcs = {
-	.dpms = amdgpu_connector_virtual_dpms,
-	.detect = amdgpu_connector_virtual_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.set_property = amdgpu_connector_virtual_set_property,
-	.destroy = amdgpu_connector_destroy,
-	.force = amdgpu_connector_virtual_force,
-};
-
 void
 amdgpu_connector_add(struct amdgpu_device *adev,
 		      uint32_t connector_id,
@@ -1983,17 +1901,6 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 			connector->interlace_allowed = false;
 			connector->doublescan_allowed = false;
 			break;
-		case DRM_MODE_CONNECTOR_VIRTUAL:
-			amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
-			if (!amdgpu_dig_connector)
-				goto failed;
-			amdgpu_connector->con_priv = amdgpu_dig_connector;
-			drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_virtual_funcs, connector_type);
-			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_virtual_helper_funcs);
-			subpixel_order = SubPixelHorizontalRGB;
-			connector->interlace_allowed = false;
-			connector->doublescan_allowed = false;
-			break;
 		}
 	}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -355,6 +355,7 @@ static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
 static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 				 struct amdgpu_bo *bo)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	u64 initial_bytes_moved;
 	uint32_t domain;
 	int r;
@@ -372,9 +373,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,

 retry:
 	amdgpu_ttm_placement_from_domain(bo, domain);
-	initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
+	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-	p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
+	p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
 		initial_bytes_moved;

 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -387,9 +388,9 @@ retry:

 /* Last resort, try to evict something from the current working set */
 static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
-				struct amdgpu_bo_list_entry *lobj)
+				struct amdgpu_bo *validated)
 {
-	uint32_t domain = lobj->robj->allowed_domains;
+	uint32_t domain = validated->allowed_domains;
 	int r;

 	if (!p->evictable)
@@ -400,11 +401,12 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,

 		struct amdgpu_bo_list_entry *candidate = p->evictable;
 		struct amdgpu_bo *bo = candidate->robj;
+		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 		u64 initial_bytes_moved;
 		uint32_t other;

 		/* If we reached our current BO we can forget it */
-		if (candidate == lobj)
+		if (candidate->robj == validated)
 			break;

 		other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
@@ -420,9 +422,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,

 		/* Good we can try to move this BO somewhere else */
 		amdgpu_ttm_placement_from_domain(bo, other);
-		initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
+		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-		p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
+		p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
 			initial_bytes_moved;

 		if (unlikely(r))
@@ -437,6 +439,23 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 	return false;
 }

+static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo)
+{
+	struct amdgpu_cs_parser *p = param;
+	int r;
+
+	do {
+		r = amdgpu_cs_bo_validate(p, bo);
+	} while (r == -ENOMEM && amdgpu_cs_try_evict(p, bo));
+	if (r)
+		return r;
+
+	if (bo->shadow)
+		r = amdgpu_cs_bo_validate(p, bo->shadow);
+
+	return r;
+}
+
 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 			    struct list_head *validated)
 {
@@ -464,18 +483,10 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		if (p->evictable == lobj)
 			p->evictable = NULL;

-		do {
-			r = amdgpu_cs_bo_validate(p, bo);
-		} while (r == -ENOMEM && amdgpu_cs_try_evict(p, lobj));
+		r = amdgpu_cs_validate(p, bo);
 		if (r)
 			return r;

-		if (bo->shadow) {
-			r = amdgpu_cs_bo_validate(p, bo);
-			if (r)
-				return r;
-		}
-
 		if (binding_userptr) {
 			drm_free_large(lobj->user_pages);
 			lobj->user_pages = NULL;
@@ -594,14 +605,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		list_splice(&need_pages, &p->validated);
 	}

-	amdgpu_vm_get_pt_bos(p->adev, &fpriv->vm, &duplicates);
-
 	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
 	p->bytes_moved = 0;
 	p->evictable = list_last_entry(&p->validated,
 				       struct amdgpu_bo_list_entry,
 				       tv.head);

+	r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
+				      amdgpu_cs_validate, p);
+	if (r) {
+		DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
+		goto error_validate;
+	}
+
 	r = amdgpu_cs_list_validate(p, &duplicates);
 	if (r) {
 		DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
@@ -720,7 +736,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
 	}
-	fence_put(parser->fence);
+	dma_fence_put(parser->fence);

 	if (parser->ctx)
 		amdgpu_ctx_put(parser->ctx);
@@ -757,7 +773,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,

 	if (p->bo_list) {
 		for (i = 0; i < p->bo_list->num_entries; i++) {
-			struct fence *f;
+			struct dma_fence *f;

 			/* ignore duplicates */
 			bo = p->bo_list->array[i].robj;
@@ -807,13 +823,14 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,

 	/* Only for UVD/VCE VM emulation */
 	if (ring->funcs->parse_cs) {
-		p->job->vm = NULL;
 		for (i = 0; i < p->job->num_ibs; i++) {
 			r = amdgpu_ring_parse_cs(ring, p, i);
 			if (r)
 				return r;
 		}
-	} else {
+	}
+
+	if (p->job->vm) {
 		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);

 		r = amdgpu_bo_vm_update_pte(p, vm);
@@ -824,16 +841,6 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	return amdgpu_cs_sync_rings(p);
 }

-static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
-{
-	if (r == -EDEADLK) {
-		r = amdgpu_gpu_reset(adev);
-		if (!r)
-			r = -EAGAIN;
-	}
-	return r;
-}
-
 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			     struct amdgpu_cs_parser *parser)
 {
@@ -902,7 +909,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
 			kptr += chunk_ib->va_start - offset;

-			r =  amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib);
+			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
 			if (r) {
 				DRM_ERROR("Failed to get ib !\n");
 				return r;
@@ -917,9 +924,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 				return r;
 			}

-			ib->gpu_addr = chunk_ib->va_start;
 		}

+		ib->gpu_addr = chunk_ib->va_start;
 		ib->length_dw = chunk_ib->ib_bytes / 4;
 		ib->flags = chunk_ib->flags;
 		j++;
@@ -927,8 +934,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,

 	/* UVD & VCE fw doesn't support user fences */
 	if (parser->job->uf_addr && (
-	    parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
-	    parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
+	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;

 	return 0;
@@ -957,7 +964,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 		for (j = 0; j < num_deps; ++j) {
 			struct amdgpu_ring *ring;
 			struct amdgpu_ctx *ctx;
-			struct fence *fence;
+			struct dma_fence *fence;

 			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
 					       deps[j].ip_instance,
@@ -979,7 +986,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 			} else if (fence) {
 				r = amdgpu_sync_fence(adev, &p->job->sync,
 						      fence);
-				fence_put(fence);
+				dma_fence_put(fence);
 				amdgpu_ctx_put(ctx);
 				if (r)
 					return r;
@@ -1009,7 +1016,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,

 	job->owner = p->filp;
 	job->fence_ctx = entity->fence_context;
-	p->fence = fence_get(&job->base.s_fence->finished);
+	p->fence = dma_fence_get(&job->base.s_fence->finished);
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
 	amdgpu_job_free_resources(job);
@@ -1037,29 +1044,29 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	r = amdgpu_cs_parser_init(&parser, data);
 	if (r) {
 		DRM_ERROR("Failed to initialize parser !\n");
-		amdgpu_cs_parser_fini(&parser, r, false);
-		r = amdgpu_cs_handle_lockup(adev, r);
-		return r;
+		goto out;
 	}
+
 	r = amdgpu_cs_parser_bos(&parser, data);
-	if (r == -ENOMEM)
-		DRM_ERROR("Not enough memory for command submission!\n");
-	else if (r && r != -ERESTARTSYS)
-		DRM_ERROR("Failed to process the buffer list %d!\n", r);
-	else if (!r) {
-		reserved_buffers = true;
-		r = amdgpu_cs_ib_fill(adev, &parser);
-	}
-
-	if (!r) {
-		r = amdgpu_cs_dependencies(adev, &parser);
-		if (r)
-			DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+	if (r) {
+		if (r == -ENOMEM)
+			DRM_ERROR("Not enough memory for command submission!\n");
+		else if (r != -ERESTARTSYS)
+			DRM_ERROR("Failed to process the buffer list %d!\n", r);
+		goto out;
 	}

+	reserved_buffers = true;
+	r = amdgpu_cs_ib_fill(adev, &parser);
 	if (r)
 		goto out;

+	r = amdgpu_cs_dependencies(adev, &parser);
+	if (r) {
+		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+		goto out;
+	}
+
 	for (i = 0; i < parser.job->num_ibs; i++)
 		trace_amdgpu_cs(&parser, i);

@@ -1071,7 +1078,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)

 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
-	r = amdgpu_cs_handle_lockup(adev, r);
 	return r;
 }

@@ -1092,7 +1098,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ctx *ctx;
-	struct fence *fence;
+	struct dma_fence *fence;
 	long r;

 	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
@@ -1108,8 +1114,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(fence))
 		r = PTR_ERR(fence);
 	else if (fence) {
-		r = fence_wait_timeout(fence, true, timeout);
-		fence_put(fence);
+		r = dma_fence_wait_timeout(fence, true, timeout);
+		dma_fence_put(fence);
 	} else
 		r = 1;

@@ -1123,6 +1129,180 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }

+/**
+ * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @user: drm_amdgpu_fence copied from user space
+ */
+static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
+					     struct drm_file *filp,
+					     struct drm_amdgpu_fence *user)
+{
+	struct amdgpu_ring *ring;
+	struct amdgpu_ctx *ctx;
+	struct dma_fence *fence;
+	int r;
+
+	r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance,
+			       user->ring, &ring);
+	if (r)
+		return ERR_PTR(r);
+
+	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
+	if (ctx == NULL)
+		return ERR_PTR(-EINVAL);
+
+	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
+	amdgpu_ctx_put(ctx);
+
+	return fence;
+}
+
+/**
+ * amdgpu_cs_wait_all_fence - wait on all fences to signal
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @wait: wait parameters
+ * @fences: array of drm_amdgpu_fence
+ */
+static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
+				     struct drm_file *filp,
+				     union drm_amdgpu_wait_fences *wait,
+				     struct drm_amdgpu_fence *fences)
+{
+	uint32_t fence_count = wait->in.fence_count;
+	unsigned int i;
+	long r = 1;
+
+	for (i = 0; i < fence_count; i++) {
+		struct dma_fence *fence;
+		unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+
+		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+		else if (!fence)
+			continue;
+
+		r = dma_fence_wait_timeout(fence, true, timeout);
+		if (r < 0)
+			return r;
+
+		if (r == 0)
+			break;
+	}
+
+	memset(wait, 0, sizeof(*wait));
+	wait->out.status = (r > 0);
+
+	return 0;
+}
+
+/**
+ * amdgpu_cs_wait_any_fence - wait on any fence to signal
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @wait: wait parameters
+ * @fences: array of drm_amdgpu_fence
+ */
+static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
+				    struct drm_file *filp,
+				    union drm_amdgpu_wait_fences *wait,
+				    struct drm_amdgpu_fence *fences)
+{
+	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+	uint32_t fence_count = wait->in.fence_count;
+	uint32_t first = ~0;
+	struct dma_fence **array;
+	unsigned int i;
+	long r;
+
+	/* Prepare the fence array */
+	array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
+
+	if (array == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < fence_count; i++) {
+		struct dma_fence *fence;
+
+		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
+		if (IS_ERR(fence)) {
+			r = PTR_ERR(fence);
+			goto err_free_fence_array;
+		} else if (fence) {
+			array[i] = fence;
+		} else { /* NULL, the fence has been already signaled */
+			r = 1;
+			goto out;
+		}
+	}
+
+	r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
+				       &first);
+	if (r < 0)
+		goto err_free_fence_array;
+
+out:
+	memset(wait, 0, sizeof(*wait));
+	wait->out.status = (r > 0);
+	wait->out.first_signaled = first;
+	/* set return value 0 to indicate success */
+	r = 0;
+
+err_free_fence_array:
+	for (i = 0; i < fence_count; i++)
+		dma_fence_put(array[i]);
+	kfree(array);
+
+	return r;
+}
+
+/**
+ * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
+ *
+ * @dev: drm device
+ * @data: data from userspace
+ * @filp: file private
+ */
+int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+	union drm_amdgpu_wait_fences *wait = data;
+	uint32_t fence_count = wait->in.fence_count;
+	struct drm_amdgpu_fence *fences_user;
+	struct drm_amdgpu_fence *fences;
+	int r;
+
+	/* Get the fences from userspace */
+	fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
+			GFP_KERNEL);
+	if (fences == NULL)
+		return -ENOMEM;
+
+	fences_user = (void __user *)(unsigned long)(wait->in.fences);
+	if (copy_from_user(fences, fences_user,
+		sizeof(struct drm_amdgpu_fence) * fence_count)) {
+		r = -EFAULT;
+		goto err_free_fences;
+	}
+
+	if (wait->in.wait_all)
+		r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
+	else
+		r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
+
+err_free_fences:
+	kfree(fences);
+
+	return r;
+}
+
 /**
 * amdgpu_cs_find_bo_va - find bo_va for VM address
 *
@@ -1196,6 +1376,15 @@ int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser)
 		r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
 		if (unlikely(r))
 			return r;
+
+		if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+			continue;
+
+		bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		amdgpu_ttm_placement_from_domain(bo, bo->allowed_domains);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (unlikely(r))
+			return r;
 	}

 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -35,7 +35,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 	kref_init(&ctx->refcount);
 	spin_lock_init(&ctx->ring_lock);
 	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
-			      sizeof(struct fence*), GFP_KERNEL);
+			      sizeof(struct dma_fence*), GFP_KERNEL);
 	if (!ctx->fences)
 		return -ENOMEM;

@@ -55,18 +55,18 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
 					  rq, amdgpu_sched_jobs);
 		if (r)
-			break;
+			goto failed;
 	}

-	if (i < adev->num_rings) {
-		for (j = 0; j < i; j++)
-			amd_sched_entity_fini(&adev->rings[j]->sched,
-					      &ctx->rings[j].entity);
-		kfree(ctx->fences);
-		ctx->fences = NULL;
-		return r;
-	}
 	return 0;
+
+failed:
+	for (j = 0; j < i; j++)
+		amd_sched_entity_fini(&adev->rings[j]->sched,
+				      &ctx->rings[j].entity);
+	kfree(ctx->fences);
+	ctx->fences = NULL;
+	return r;
 }

 static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
@@ -79,7 +79,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)

 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			fence_put(ctx->rings[i].fences[j]);
+			dma_fence_put(ctx->rings[i].fences[j]);
 	kfree(ctx->fences);
 	ctx->fences = NULL;

@@ -241,39 +241,39 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 }

 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-			      struct fence *fence)
+			      struct dma_fence *fence)
 {
 	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
 	uint64_t seq = cring->sequence;
 	unsigned idx = 0;
-	struct fence *other = NULL;
+	struct dma_fence *other = NULL;

 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
 	if (other) {
 		signed long r;
-		r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
 		if (r < 0)
 			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
 	}

-	fence_get(fence);
+	dma_fence_get(fence);

 	spin_lock(&ctx->ring_lock);
 	cring->fences[idx] = fence;
 	cring->sequence++;
 	spin_unlock(&ctx->ring_lock);

-	fence_put(other);
+	dma_fence_put(other);

 	return seq;
 }

-struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
-				   struct amdgpu_ring *ring, uint64_t seq)
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+				       struct amdgpu_ring *ring, uint64_t seq)
 {
 	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
-	struct fence *fence;
+	struct dma_fence *fence;

 	spin_lock(&ctx->ring_lock);

@@ -288,7 +288,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 		return NULL;
 	}

-	fence = fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
+	fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
 	spin_unlock(&ctx->ring_lock);

 	return fence;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -264,7 +264,8 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev)
 	if (adev->vram_scratch.robj == NULL) {
 		r = amdgpu_bo_create(adev, AMDGPU_GPU_PAGE_SIZE,
 				     PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
-				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 				     NULL, NULL, &adev->vram_scratch.robj);
 		if (r) {
 			return r;
@@ -442,13 +443,9 @@ void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
 static void amdgpu_wb_fini(struct amdgpu_device *adev)
 {
 	if (adev->wb.wb_obj) {
-		if (!amdgpu_bo_reserve(adev->wb.wb_obj, false)) {
-			amdgpu_bo_kunmap(adev->wb.wb_obj);
-			amdgpu_bo_unpin(adev->wb.wb_obj);
-			amdgpu_bo_unreserve(adev->wb.wb_obj);
-		}
-		amdgpu_bo_unref(&adev->wb.wb_obj);
-		adev->wb.wb = NULL;
+		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
+				      &adev->wb.gpu_addr,
+				      (void **)&adev->wb.wb);
 		adev->wb.wb_obj = NULL;
 	}
 }
@@ -467,33 +464,14 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
 	int r;

 	if (adev->wb.wb_obj == NULL) {
-		r = amdgpu_bo_create(adev, AMDGPU_MAX_WB * 4, PAGE_SIZE, true,
-				     AMDGPU_GEM_DOMAIN_GTT, 0,  NULL, NULL,
-				     &adev->wb.wb_obj);
+		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * 4,
+					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
+					    (void **)&adev->wb.wb);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
 			return r;
 		}
-		r = amdgpu_bo_reserve(adev->wb.wb_obj, false);
-		if (unlikely(r != 0)) {
-			amdgpu_wb_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_pin(adev->wb.wb_obj, AMDGPU_GEM_DOMAIN_GTT,
-				&adev->wb.gpu_addr);
-		if (r) {
-			amdgpu_bo_unreserve(adev->wb.wb_obj);
-			dev_warn(adev->dev, "(%d) pin WB bo failed\n", r);
-			amdgpu_wb_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_kmap(adev->wb.wb_obj, (void **)&adev->wb.wb);
-		amdgpu_bo_unreserve(adev->wb.wb_obj);
-		if (r) {
-			dev_warn(adev->dev, "(%d) map WB bo failed\n", r);
-			amdgpu_wb_fini(adev);
-			return r;
-		}

 		adev->wb.num_wb = AMDGPU_MAX_WB;
 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
@@ -1038,6 +1016,13 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
 			 amdgpu_vm_block_size);
 		amdgpu_vm_block_size = 9;
 	}
+
+	if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
+	    !amdgpu_check_pot_argument(amdgpu_vram_page_split))) {
+		dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
+			 amdgpu_vram_page_split);
+		amdgpu_vram_page_split = 1024;
+	}
 }

 /**
@@ -1112,11 +1097,11 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 	int i, r = 0;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_blocks[i].type == block_type) {
-			r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
-									    state);
+		if (adev->ip_blocks[i].version->type == block_type) {
+			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+										     state);
 			if (r)
 				return r;
 			break;
@@ -1132,11 +1117,11 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 	int i, r = 0;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_blocks[i].type == block_type) {
-			r = adev->ip_blocks[i].funcs->set_powergating_state((void *)adev,
-									    state);
+		if (adev->ip_blocks[i].version->type == block_type) {
+			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+										     state);
 			if (r)
 				return r;
 			break;
@@ -1151,10 +1136,10 @@ int amdgpu_wait_for_idle(struct amdgpu_device *adev,
 	int i, r;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_blocks[i].type == block_type) {
-			r = adev->ip_blocks[i].funcs->wait_for_idle((void *)adev);
+		if (adev->ip_blocks[i].version->type == block_type) {
+			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
 			if (r)
 				return r;
 			break;
@@ -1170,23 +1155,22 @@ bool amdgpu_is_idle(struct amdgpu_device *adev,
 	int i;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_blocks[i].type == block_type)
-			return adev->ip_blocks[i].funcs->is_idle((void *)adev);
+		if (adev->ip_blocks[i].version->type == block_type)
+			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
 	}
 	return true;

 }

-const struct amdgpu_ip_block_version * amdgpu_get_ip_block(
-					struct amdgpu_device *adev,
-					enum amd_ip_block_type type)
+struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev,
+					     enum amd_ip_block_type type)
 {
 	int i;

 	for (i = 0; i < adev->num_ip_blocks; i++)
-		if (adev->ip_blocks[i].type == type)
+		if (adev->ip_blocks[i].version->type == type)
 			return &adev->ip_blocks[i];

 	return NULL;
@@ -1207,38 +1191,75 @@ int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev,
 				enum amd_ip_block_type type,
 				u32 major, u32 minor)
 {
-	const struct amdgpu_ip_block_version *ip_block;
-	ip_block = amdgpu_get_ip_block(adev, type);
+	struct amdgpu_ip_block *ip_block = amdgpu_get_ip_block(adev, type);

-	if (ip_block && ((ip_block->major > major) ||
-			((ip_block->major == major) &&
-			(ip_block->minor >= minor))))
+	if (ip_block && ((ip_block->version->major > major) ||
+			((ip_block->version->major == major) &&
+			(ip_block->version->minor >= minor))))
 		return 0;

 	return 1;
 }

-static void amdgpu_whether_enable_virtual_display(struct amdgpu_device *adev)
+/**
+ * amdgpu_ip_block_add
+ *
+ * @adev: amdgpu_device pointer
+ * @ip_block_version: pointer to the IP to add
+ *
+ * Adds the IP block driver information to the collection of IPs
+ * on the asic.
+ */
+int amdgpu_ip_block_add(struct amdgpu_device *adev,
+			const struct amdgpu_ip_block_version *ip_block_version)
+{
+	if (!ip_block_version)
+		return -EINVAL;
+
+	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
+
+	return 0;
+}
+
+static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
 {
 	adev->enable_virtual_display = false;

 	if (amdgpu_virtual_display) {
 		struct drm_device *ddev = adev->ddev;
 		const char *pci_address_name = pci_name(ddev->pdev);
-		char *pciaddstr, *pciaddstr_tmp, *pciaddname;
+		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;

 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
 		pciaddstr_tmp = pciaddstr;
-		while ((pciaddname = strsep(&pciaddstr_tmp, ";"))) {
+		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
+			pciaddname = strsep(&pciaddname_tmp, ",");
 			if (!strcmp(pci_address_name, pciaddname)) {
+				long num_crtc;
+				int res = -1;
+
 				adev->enable_virtual_display = true;
+
+				if (pciaddname_tmp)
+					res = kstrtol(pciaddname_tmp, 10,
+						      &num_crtc);
+
+				if (!res) {
+					if (num_crtc < 1)
+						num_crtc = 1;
+					if (num_crtc > 6)
+						num_crtc = 6;
+					adev->mode_info.num_crtc = num_crtc;
+				} else {
+					adev->mode_info.num_crtc = 1;
+				}
 				break;
 			}
 		}

-		DRM_INFO("virtual display string:%s, %s:virtual_display:%d\n",
-				 amdgpu_virtual_display, pci_address_name,
-				 adev->enable_virtual_display);
+		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
+			 amdgpu_virtual_display, pci_address_name,
+			 adev->enable_virtual_display, adev->mode_info.num_crtc);

 		kfree(pciaddstr);
 	}
@@ -1248,7 +1269,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 {
 	int i, r;

-	amdgpu_whether_enable_virtual_display(adev);
+	amdgpu_device_enable_virtual_display(adev);

 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
@@ -1300,33 +1321,24 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 		return -EINVAL;
 	}

-	adev->ip_block_status = kcalloc(adev->num_ip_blocks,
-					sizeof(struct amdgpu_ip_block_status), GFP_KERNEL);
-	if (adev->ip_block_status == NULL)
-		return -ENOMEM;
-
-	if (adev->ip_blocks == NULL) {
-		DRM_ERROR("No IP blocks found!\n");
-		return r;
-	}
-
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 			DRM_ERROR("disabled ip block: %d\n", i);
-			adev->ip_block_status[i].valid = false;
+			adev->ip_blocks[i].status.valid = false;
 		} else {
-			if (adev->ip_blocks[i].funcs->early_init) {
-				r = adev->ip_blocks[i].funcs->early_init((void *)adev);
+			if (adev->ip_blocks[i].version->funcs->early_init) {
+				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
 				if (r == -ENOENT) {
-					adev->ip_block_status[i].valid = false;
+					adev->ip_blocks[i].status.valid = false;
 				} else if (r) {
-					DRM_ERROR("early_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+					DRM_ERROR("early_init of IP block <%s> failed %d\n",
+						  adev->ip_blocks[i].version->funcs->name, r);
 					return r;
 				} else {
-					adev->ip_block_status[i].valid = true;
+					adev->ip_blocks[i].status.valid = true;
 				}
 			} else {
-				adev->ip_block_status[i].valid = true;
+				adev->ip_blocks[i].status.valid = true;
 			}
 		}
 	}
@@ -1342,22 +1354,23 @@ static int amdgpu_init(struct amdgpu_device *adev)
 	int i, r;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		r = adev->ip_blocks[i].funcs->sw_init((void *)adev);
+		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
 		if (r) {
-			DRM_ERROR("sw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
+				  adev->ip_blocks[i].version->funcs->name, r);
 			return r;
 		}
-		adev->ip_block_status[i].sw = true;
+		adev->ip_blocks[i].status.sw = true;
 		/* need to do gmc hw init early so we can allocate gpu mem */
-		if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) {
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 			r = amdgpu_vram_scratch_init(adev);
 			if (r) {
 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
 				return r;
 			}
-			r = adev->ip_blocks[i].funcs->hw_init((void *)adev);
+			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
 			if (r) {
 				DRM_ERROR("hw_init %d failed %d\n", i, r);
 				return r;
@@ -1367,22 +1380,23 @@ static int amdgpu_init(struct amdgpu_device *adev)
 				DRM_ERROR("amdgpu_wb_init failed %d\n", r);
 				return r;
 			}
-			adev->ip_block_status[i].hw = true;
+			adev->ip_blocks[i].status.hw = true;
 		}
 	}

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].sw)
+		if (!adev->ip_blocks[i].status.sw)
 			continue;
 		/* gmc hw init is done early */
-		if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC)
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC)
 			continue;
-		r = adev->ip_blocks[i].funcs->hw_init((void *)adev);
+		r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
 		if (r) {
-			DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+				  adev->ip_blocks[i].version->funcs->name, r);
 			return r;
 		}
-		adev->ip_block_status[i].hw = true;
+		adev->ip_blocks[i].status.hw = true;
 	}

 	return 0;
@@ -1393,25 +1407,26 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
 	int i = 0, r;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_blocks[i].funcs->late_init) {
-			r = adev->ip_blocks[i].funcs->late_init((void *)adev);
+		if (adev->ip_blocks[i].version->funcs->late_init) {
+			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
 			if (r) {
-				DRM_ERROR("late_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+				DRM_ERROR("late_init of IP block <%s> failed %d\n",
+					  adev->ip_blocks[i].version->funcs->name, r);
 				return r;
 			}
-			adev->ip_block_status[i].late_initialized = true;
+			adev->ip_blocks[i].status.late_initialized = true;
 		}
 		/* skip CG for VCE/UVD, it's handled specially */
-		if (adev->ip_blocks[i].type != AMD_IP_BLOCK_TYPE_UVD &&
-		    adev->ip_blocks[i].type != AMD_IP_BLOCK_TYPE_VCE) {
+		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
 			/* enable clockgating to save power */
-			r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
-									    AMD_CG_STATE_GATE);
+			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+										     AMD_CG_STATE_GATE);
 			if (r) {
 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
-					  adev->ip_blocks[i].funcs->name, r);
+					  adev->ip_blocks[i].version->funcs->name, r);
 				return r;
 			}
 		}
@@ -1426,68 +1441,77 @@ static int amdgpu_fini(struct amdgpu_device *adev)

 	/* need to disable SMC first */
 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].hw)
+		if (!adev->ip_blocks[i].status.hw)
 			continue;
-		if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_SMC) {
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
 			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
-			r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
-									    AMD_CG_STATE_UNGATE);
+			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+										     AMD_CG_STATE_UNGATE);
 			if (r) {
 				DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
-					  adev->ip_blocks[i].funcs->name, r);
+					  adev->ip_blocks[i].version->funcs->name, r);
 				return r;
 			}
-			r = adev->ip_blocks[i].funcs->hw_fini((void *)adev);
+			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
 			/* XXX handle errors */
 			if (r) {
 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
-					  adev->ip_blocks[i].funcs->name, r);
+					  adev->ip_blocks[i].version->funcs->name, r);
 			}
-			adev->ip_block_status[i].hw = false;
+			adev->ip_blocks[i].status.hw = false;
 			break;
 		}
 	}

 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
-		if (!adev->ip_block_status[i].hw)
+		if (!adev->ip_blocks[i].status.hw)
 			continue;
-		if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) {
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
 			amdgpu_wb_fini(adev);
 			amdgpu_vram_scratch_fini(adev);
 		}
-		/* ungate blocks before hw fini so that we can shutdown the blocks safely */
-		r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
-								    AMD_CG_STATE_UNGATE);
-		if (r) {
-			DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
-			return r;
+
+		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
+			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
+			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+										     AMD_CG_STATE_UNGATE);
+			if (r) {
+				DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
+					  adev->ip_blocks[i].version->funcs->name, r);
+				return r;
+			}
 		}
-		r = adev->ip_blocks[i].funcs->hw_fini((void *)adev);
+
+		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
 		/* XXX handle errors */
 		if (r) {
-			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
+				  adev->ip_blocks[i].version->funcs->name, r);
 		}
-		adev->ip_block_status[i].hw = false;
+
+		adev->ip_blocks[i].status.hw = false;
 	}

 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
-		if (!adev->ip_block_status[i].sw)
+		if (!adev->ip_blocks[i].status.sw)
 			continue;
-		r = adev->ip_blocks[i].funcs->sw_fini((void *)adev);
+		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
 		/* XXX handle errors */
 		if (r) {
-			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
+				  adev->ip_blocks[i].version->funcs->name, r);
 		}
-		adev->ip_block_status[i].sw = false;
-		adev->ip_block_status[i].valid = false;
+		adev->ip_blocks[i].status.sw = false;
+		adev->ip_blocks[i].status.valid = false;
 	}

 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
-		if (!adev->ip_block_status[i].late_initialized)
+		if (!adev->ip_blocks[i].status.late_initialized)
 			continue;
-		if (adev->ip_blocks[i].funcs->late_fini)
-			adev->ip_blocks[i].funcs->late_fini((void *)adev);
-		adev->ip_block_status[i].late_initialized = false;
+		if (adev->ip_blocks[i].version->funcs->late_fini)
+			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
+		adev->ip_blocks[i].status.late_initialized = false;
 	}

 	return 0;
@@ -1505,21 +1529,23 @@ int amdgpu_suspend(struct amdgpu_device *adev)
 	}

 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
 		/* ungate blocks so that suspend can properly shut them down */
 		if (i != AMD_IP_BLOCK_TYPE_SMC) {
-			r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
-									    AMD_CG_STATE_UNGATE);
+			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+										     AMD_CG_STATE_UNGATE);
 			if (r) {
-				DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+				DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
+					  adev->ip_blocks[i].version->funcs->name, r);
 			}
 		}
 		/* XXX handle errors */
-		r = adev->ip_blocks[i].funcs->suspend(adev);
+		r = adev->ip_blocks[i].version->funcs->suspend(adev);
 		/* XXX handle errors */
 		if (r) {
-			DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+			DRM_ERROR("suspend of IP block <%s> failed %d\n",
+				  adev->ip_blocks[i].version->funcs->name, r);
 		}
 	}

@@ -1531,11 +1557,12 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 	int i, r;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		r = adev->ip_blocks[i].funcs->resume(adev);
+		r = adev->ip_blocks[i].version->funcs->resume(adev);
 		if (r) {
-			DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
+			DRM_ERROR("resume of IP block <%s> failed %d\n",
+				  adev->ip_blocks[i].version->funcs->name, r);
 			return r;
 		}
 	}
@@ -1586,7 +1613,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->vm_manager.vm_pte_funcs = NULL;
 	adev->vm_manager.vm_pte_num_rings = 0;
 	adev->gart.gart_funcs = NULL;
-	adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
+	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);

 	adev->smc_rreg = &amdgpu_invalid_rreg;
 	adev->smc_wreg = &amdgpu_invalid_wreg;
@@ -1846,8 +1873,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_fini(adev);
-	kfree(adev->ip_block_status);
-	adev->ip_block_status = NULL;
 	adev->accel_working = false;
 	/* free i2c buses */
 	amdgpu_i2c_fini(adev);
@@ -1943,7 +1968,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)

 	r = amdgpu_suspend(adev);

-	/* evict remaining vram memory */
+	/* evict remaining vram memory
+	 * This second call to evict vram is to evict the gart page table
+	 * using the CPU.
+	 */
 	amdgpu_bo_evict_vram(adev);

 	amdgpu_atombios_scratch_regs_save(adev);
@@ -2085,13 +2113,13 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
 	bool asic_hang = false;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_blocks[i].funcs->check_soft_reset)
-			adev->ip_block_status[i].hang =
-				adev->ip_blocks[i].funcs->check_soft_reset(adev);
-		if (adev->ip_block_status[i].hang) {
-			DRM_INFO("IP block:%d is hang!\n", i);
+		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
+			adev->ip_blocks[i].status.hang =
+				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
+		if (adev->ip_blocks[i].status.hang) {
+			DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
 			asic_hang = true;
 		}
 	}
@@ -2103,11 +2131,11 @@ static int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
 	int i, r = 0;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_block_status[i].hang &&
-		    adev->ip_blocks[i].funcs->pre_soft_reset) {
-			r = adev->ip_blocks[i].funcs->pre_soft_reset(adev);
+		if (adev->ip_blocks[i].status.hang &&
+		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
+			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
 			if (r)
 				return r;
 		}
@@ -2121,13 +2149,13 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev)
 	int i;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if ((adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) ||
-		    (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_SMC) ||
-		    (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_ACP) ||
-		    (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_DCE)) {
-			if (adev->ip_block_status[i].hang) {
+		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)) {
+			if (adev->ip_blocks[i].status.hang) {
 				DRM_INFO("Some block need full reset!\n");
 				return true;
 			}
@@ -2141,11 +2169,11 @@ static int amdgpu_soft_reset(struct amdgpu_device *adev)
 	int i, r = 0;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_block_status[i].hang &&
-		    adev->ip_blocks[i].funcs->soft_reset) {
-			r = adev->ip_blocks[i].funcs->soft_reset(adev);
+		if (adev->ip_blocks[i].status.hang &&
+		    adev->ip_blocks[i].version->funcs->soft_reset) {
+			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
 			if (r)
 				return r;
 		}
@@ -2159,11 +2187,11 @@ static int amdgpu_post_soft_reset(struct amdgpu_device *adev)
 	int i, r = 0;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_block_status[i].valid)
+		if (!adev->ip_blocks[i].status.valid)
 			continue;
-		if (adev->ip_block_status[i].hang &&
-		    adev->ip_blocks[i].funcs->post_soft_reset)
-			r = adev->ip_blocks[i].funcs->post_soft_reset(adev);
+		if (adev->ip_blocks[i].status.hang &&
+		    adev->ip_blocks[i].version->funcs->post_soft_reset)
+			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
 		if (r)
 			return r;
 	}
@@ -2182,7 +2210,7 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
 static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 					   struct amdgpu_ring *ring,
 					   struct amdgpu_bo *bo,
-					   struct fence **fence)
+					   struct dma_fence **fence)
 {
 	uint32_t domain;
 	int r;
@@ -2298,30 +2326,30 @@ retry:
 		if (need_full_reset && amdgpu_need_backup(adev)) {
 			struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 			struct amdgpu_bo *bo, *tmp;
-			struct fence *fence = NULL, *next = NULL;
+			struct dma_fence *fence = NULL, *next = NULL;

 			DRM_INFO("recover vram bo from shadow\n");
 			mutex_lock(&adev->shadow_list_lock);
 			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
 				amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
 				if (fence) {
-					r = fence_wait(fence, false);
+					r = dma_fence_wait(fence, false);
 					if (r) {
 						WARN(r, "recovery from shadow isn't comleted\n");
 						break;
 					}
 				}

-				fence_put(fence);
+				dma_fence_put(fence);
 				fence = next;
 			}
 			mutex_unlock(&adev->shadow_list_lock);
 			if (fence) {
-				r = fence_wait(fence, false);
+				r = dma_fence_wait(fence, false);
 				if (r)
 					WARN(r, "recovery from shadow isn't comleted\n");
 			}
-			fence_put(fence);
+			dma_fence_put(fence);
 		}
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
@@ -2470,9 +2498,6 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 	adev->debugfs[adev->debugfs_count].num_files = nfiles;
 	adev->debugfs_count = i;
 #if defined(CONFIG_DEBUG_FS)
-	drm_debugfs_create_files(files, nfiles,
-				 adev->ddev->control->debugfs_root,
-				 adev->ddev->control);
 	drm_debugfs_create_files(files, nfiles,
 				 adev->ddev->primary->debugfs_root,
 				 adev->ddev->primary);
@@ -2486,9 +2511,6 @@ static void amdgpu_debugfs_remove_files(struct amdgpu_device *adev)
 	unsigned i;

 	for (i = 0; i < adev->debugfs_count; i++) {
-		drm_debugfs_remove_files(adev->debugfs[i].files,
-					 adev->debugfs[i].num_files,
-					 adev->ddev->control);
 		drm_debugfs_remove_files(adev->debugfs[i].files,
 					 adev->debugfs[i].num_files,
 					 adev->ddev->primary);
@@ -2517,6 +2539,13 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		se_bank = (*pos >> 24) & 0x3FF;
 		sh_bank = (*pos >> 34) & 0x3FF;
 		instance_bank = (*pos >> 44) & 0x3FF;
+
+		if (se_bank == 0x3FF)
+			se_bank = 0xFFFFFFFF;
+		if (sh_bank == 0x3FF)
+			sh_bank = 0xFFFFFFFF;
+		if (instance_bank == 0x3FF)
+			instance_bank = 0xFFFFFFFF;
 		use_bank = 1;
 	} else {
 		use_bank = 0;
@@ -2525,8 +2554,8 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 	*pos &= 0x3FFFF;

 	if (use_bank) {
-		if (sh_bank >= adev->gfx.config.max_sh_per_se ||
-		    se_bank >= adev->gfx.config.max_shader_engines)
+		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
+		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
 			return -EINVAL;
 		mutex_lock(&adev->grbm_idx_mutex);
 		amdgpu_gfx_select_se_sh(adev, se_bank,
@@ -2573,10 +2602,45 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 	struct amdgpu_device *adev = f->f_inode->i_private;
 	ssize_t result = 0;
 	int r;
+	bool pm_pg_lock, use_bank;
+	unsigned instance_bank, sh_bank, se_bank;

 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;

+	/* are we reading registers for which a PG lock is necessary? */
+	pm_pg_lock = (*pos >> 23) & 1;
+
+	if (*pos & (1ULL << 62)) {
+		se_bank = (*pos >> 24) & 0x3FF;
+		sh_bank = (*pos >> 34) & 0x3FF;
+		instance_bank = (*pos >> 44) & 0x3FF;
+
+		if (se_bank == 0x3FF)
+			se_bank = 0xFFFFFFFF;
+		if (sh_bank == 0x3FF)
+			sh_bank = 0xFFFFFFFF;
+		if (instance_bank == 0x3FF)
+			instance_bank = 0xFFFFFFFF;
+		use_bank = 1;
+	} else {
+		use_bank = 0;
+	}
+
+	*pos &= 0x3FFFF;
+
+	if (use_bank) {
+		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
+		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
+			return -EINVAL;
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, se_bank,
+					sh_bank, instance_bank);
+	}
+
+	if (pm_pg_lock)
+		mutex_lock(&adev->pm.mutex);
+
 	while (size) {
 		uint32_t value;

@@ -2595,6 +2659,14 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 		size -= 4;
 	}

+	if (use_bank) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	}
+
+	if (pm_pg_lock)
+		mutex_unlock(&adev->pm.mutex);
+
 	return result;
 }

@@ -2857,6 +2929,116 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 	return !r ? 4 : r;
 }

+static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	int r, x;
+	ssize_t result=0;
+	uint32_t offset, se, sh, cu, wave, simd, data[32];
+
+	if (size & 3 || *pos & 3)
+		return -EINVAL;
+
+	/* decode offset */
+	offset = (*pos & 0x7F);
+	se = ((*pos >> 7) & 0xFF);
+	sh = ((*pos >> 15) & 0xFF);
+	cu = ((*pos >> 23) & 0xFF);
+	wave = ((*pos >> 31) & 0xFF);
+	simd = ((*pos >> 37) & 0xFF);
+
+	/* switch to the specific se/sh/cu */
+	mutex_lock(&adev->grbm_idx_mutex);
+	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+
+	x = 0;
+	if (adev->gfx.funcs->read_wave_data)
+		adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
+
+	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	if (!x)
+		return -EINVAL;
+
+	while (size && (offset < x * 4)) {
+		uint32_t value;
+
+		value = data[offset >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		result += 4;
+		buf += 4;
+		offset += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	int r;
+	ssize_t result = 0;
+	uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
+
+	if (size & 3 || *pos & 3)
+		return -EINVAL;
+
+	/* decode offset */
+	offset = (*pos & 0xFFF);       /* in dwords */
+	se = ((*pos >> 12) & 0xFF);
+	sh = ((*pos >> 20) & 0xFF);
+	cu = ((*pos >> 28) & 0xFF);
+	wave = ((*pos >> 36) & 0xFF);
+	simd = ((*pos >> 44) & 0xFF);
+	thread = ((*pos >> 52) & 0xFF);
+	bank = ((*pos >> 60) & 1);
+
+	data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* switch to the specific se/sh/cu */
+	mutex_lock(&adev->grbm_idx_mutex);
+	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+
+	if (bank == 0) {
+		if (adev->gfx.funcs->read_wave_vgprs)
+			adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
+	} else {
+		if (adev->gfx.funcs->read_wave_sgprs)
+			adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
+	}
+
+	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	while (size) {
+		uint32_t value;
+
+		value = data[offset++];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			result = r;
+			goto err;
+		}
+
+		result += 4;
+		buf += 4;
+		size -= 4;
+	}
+
+err:
+	kfree(data);
+	return result;
+}
+
 static const struct file_operations amdgpu_debugfs_regs_fops = {
 	.owner = THIS_MODULE,
 	.read = amdgpu_debugfs_regs_read,
@@ -2894,6 +3076,17 @@ static const struct file_operations amdgpu_debugfs_sensors_fops = {
 	.llseek = default_llseek
 };

+static const struct file_operations amdgpu_debugfs_wave_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_wave_read,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_gpr_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gpr_read,
+	.llseek = default_llseek
+};
+
 static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_fops,
 	&amdgpu_debugfs_regs_didt_fops,
@@ -2901,6 +3094,8 @@ static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_smc_fops,
 	&amdgpu_debugfs_gca_config_fops,
 	&amdgpu_debugfs_sensors_fops,
+	&amdgpu_debugfs_wave_fops,
+	&amdgpu_debugfs_gpr_fops,
 };

 static const char *debugfs_regs_names[] = {
@@ -2910,6 +3105,8 @@ static const char *debugfs_regs_names[] = {
 	"amdgpu_regs_smc",
 	"amdgpu_gca_config",
 	"amdgpu_sensors",
+	"amdgpu_wave",
+	"amdgpu_gpr",
 };

 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,29 +35,29 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>

-static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb)
+static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
 {
 	struct amdgpu_flip_work *work =
 		container_of(cb, struct amdgpu_flip_work, cb);

-	fence_put(f);
+	dma_fence_put(f);
 	schedule_work(&work->flip_work.work);
 }

 static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
-				     struct fence **f)
+				     struct dma_fence **f)
 {
-	struct fence *fence= *f;
+	struct dma_fence *fence= *f;

 	if (fence == NULL)
 		return false;

 	*f = NULL;

-	if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
+	if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
 		return true;

-	fence_put(fence);
+	dma_fence_put(fence);
 	return false;
 }

@@ -68,9 +68,9 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	struct amdgpu_flip_work *work =
 		container_of(delayed_work, struct amdgpu_flip_work, flip_work);
 	struct amdgpu_device *adev = work->adev;
-	struct amdgpu_crtc *amdgpuCrtc = adev->mode_info.crtcs[work->crtc_id];
+	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[work->crtc_id];

-	struct drm_crtc *crtc = &amdgpuCrtc->base;
+	struct drm_crtc *crtc = &amdgpu_crtc->base;
 	unsigned long flags;
 	unsigned i;
 	int vpos, hpos;
@@ -85,14 +85,14 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	/* Wait until we're out of the vertical blank period before the one
 	 * targeted by the flip
 	 */
-	if (amdgpuCrtc->enabled &&
+	if (amdgpu_crtc->enabled &&
 	    (amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0,
 					&vpos, &hpos, NULL, NULL,
 					&crtc->hwmode)
 	     & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
 	    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
 	    (int)(work->target_vblank -
-		  amdgpu_get_vblank_counter_kms(adev->ddev, amdgpuCrtc->crtc_id)) > 0) {
+		  amdgpu_get_vblank_counter_kms(adev->ddev, amdgpu_crtc->crtc_id)) > 0) {
 		schedule_delayed_work(&work->flip_work, usecs_to_jiffies(1000));
 		return;
 	}
@@ -104,12 +104,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base, work->async);

 	/* Set the flip status */
-	amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
+	amdgpu_crtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);


 	DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
-					 amdgpuCrtc->crtc_id, amdgpuCrtc, work);
+					 amdgpu_crtc->crtc_id, amdgpu_crtc, work);

 }

@@ -187,7 +187,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 		goto cleanup;
 	}

-	r = amdgpu_bo_pin_restricted(new_abo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base);
+	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base);
 	if (unlikely(r != 0)) {
 		r = -EINVAL;
 		DRM_ERROR("failed to pin new abo buffer before flip\n");
@@ -244,9 +244,9 @@ unreserve:

 cleanup:
 	amdgpu_bo_unref(&work->old_abo);
-	fence_put(work->excl);
+	dma_fence_put(work->excl);
 	for (i = 0; i < work->shared_count; ++i)
-		fence_put(work->shared[i]);
+		dma_fence_put(work->shared[i]);
 	kfree(work->shared);
 	kfree(work);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -553,9 +553,10 @@ int amdgpu_parse_extended_power_table(struct amdgpu_device *adev)
 				entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *)
 					((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record));
 			}
-			for (i = 0; i < states->numEntries; i++) {
-				if (i >= AMDGPU_MAX_VCE_LEVELS)
-					break;
+			adev->pm.dpm.num_of_vce_states =
+					states->numEntries > AMD_MAX_VCE_LEVELS ?
+					AMD_MAX_VCE_LEVELS : states->numEntries;
+			for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) {
 				vce_clk = (VCEClockInfo *)
 					((u8 *)&array->entries[0] +
 					 (state_entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo)));
@@ -955,3 +956,12 @@ u8 amdgpu_encode_pci_lane_width(u32 lanes)

 	return encoded_lanes[lanes];
 }
+
+struct amd_vce_state*
+amdgpu_get_vce_clock_state(struct amdgpu_device *adev, unsigned idx)
+{
+	if (idx < adev->pm.dpm.num_of_vce_states)
+		return &adev->pm.dpm.vce_states[idx];
+
+	return NULL;
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -23,6 +23,453 @@
 #ifndef __AMDGPU_DPM_H__
 #define __AMDGPU_DPM_H__

+enum amdgpu_int_thermal_type {
+	THERMAL_TYPE_NONE,
+	THERMAL_TYPE_EXTERNAL,
+	THERMAL_TYPE_EXTERNAL_GPIO,
+	THERMAL_TYPE_RV6XX,
+	THERMAL_TYPE_RV770,
+	THERMAL_TYPE_ADT7473_WITH_INTERNAL,
+	THERMAL_TYPE_EVERGREEN,
+	THERMAL_TYPE_SUMO,
+	THERMAL_TYPE_NI,
+	THERMAL_TYPE_SI,
+	THERMAL_TYPE_EMC2103_WITH_INTERNAL,
+	THERMAL_TYPE_CI,
+	THERMAL_TYPE_KV,
+};
+
+enum amdgpu_dpm_auto_throttle_src {
+	AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL,
+	AMDGPU_DPM_AUTO_THROTTLE_SRC_EXTERNAL
+};
+
+enum amdgpu_dpm_event_src {
+	AMDGPU_DPM_EVENT_SRC_ANALOG = 0,
+	AMDGPU_DPM_EVENT_SRC_EXTERNAL = 1,
+	AMDGPU_DPM_EVENT_SRC_DIGITAL = 2,
+	AMDGPU_DPM_EVENT_SRC_ANALOG_OR_EXTERNAL = 3,
+	AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
+};
+
+#define SCLK_DEEP_SLEEP_MASK 0x8
+
+struct amdgpu_ps {
+	u32 caps; /* vbios flags */
+	u32 class; /* vbios flags */
+	u32 class2; /* vbios flags */
+	/* UVD clocks */
+	u32 vclk;
+	u32 dclk;
+	/* VCE clocks */
+	u32 evclk;
+	u32 ecclk;
+	bool vce_active;
+	enum amd_vce_level vce_level;
+	/* asic priv */
+	void *ps_priv;
+};
+
+struct amdgpu_dpm_thermal {
+	/* thermal interrupt work */
+	struct work_struct work;
+	/* low temperature threshold */
+	int                min_temp;
+	/* high temperature threshold */
+	int                max_temp;
+	/* was last interrupt low to high or high to low */
+	bool               high_to_low;
+	/* interrupt source */
+	struct amdgpu_irq_src	irq;
+};
+
+enum amdgpu_clk_action
+{
+	AMDGPU_SCLK_UP = 1,
+	AMDGPU_SCLK_DOWN
+};
+
+struct amdgpu_blacklist_clocks
+{
+	u32 sclk;
+	u32 mclk;
+	enum amdgpu_clk_action action;
+};
+
+struct amdgpu_clock_and_voltage_limits {
+	u32 sclk;
+	u32 mclk;
+	u16 vddc;
+	u16 vddci;
+};
+
+struct amdgpu_clock_array {
+	u32 count;
+	u32 *values;
+};
+
+struct amdgpu_clock_voltage_dependency_entry {
+	u32 clk;
+	u16 v;
+};
+
+struct amdgpu_clock_voltage_dependency_table {
+	u32 count;
+	struct amdgpu_clock_voltage_dependency_entry *entries;
+};
+
+union amdgpu_cac_leakage_entry {
+	struct {
+		u16 vddc;
+		u32 leakage;
+	};
+	struct {
+		u16 vddc1;
+		u16 vddc2;
+		u16 vddc3;
+	};
+};
+
+struct amdgpu_cac_leakage_table {
+	u32 count;
+	union amdgpu_cac_leakage_entry *entries;
+};
+
+struct amdgpu_phase_shedding_limits_entry {
+	u16 voltage;
+	u32 sclk;
+	u32 mclk;
+};
+
+struct amdgpu_phase_shedding_limits_table {
+	u32 count;
+	struct amdgpu_phase_shedding_limits_entry *entries;
+};
+
+struct amdgpu_uvd_clock_voltage_dependency_entry {
+	u32 vclk;
+	u32 dclk;
+	u16 v;
+};
+
+struct amdgpu_uvd_clock_voltage_dependency_table {
+	u8 count;
+	struct amdgpu_uvd_clock_voltage_dependency_entry *entries;
+};
+
+struct amdgpu_vce_clock_voltage_dependency_entry {
+	u32 ecclk;
+	u32 evclk;
+	u16 v;
+};
+
+struct amdgpu_vce_clock_voltage_dependency_table {
+	u8 count;
+	struct amdgpu_vce_clock_voltage_dependency_entry *entries;
+};
+
+struct amdgpu_ppm_table {
+	u8 ppm_design;
+	u16 cpu_core_number;
+	u32 platform_tdp;
+	u32 small_ac_platform_tdp;
+	u32 platform_tdc;
+	u32 small_ac_platform_tdc;
+	u32 apu_tdp;
+	u32 dgpu_tdp;
+	u32 dgpu_ulv_power;
+	u32 tj_max;
+};
+
+struct amdgpu_cac_tdp_table {
+	u16 tdp;
+	u16 configurable_tdp;
+	u16 tdc;
+	u16 battery_power_limit;
+	u16 small_power_limit;
+	u16 low_cac_leakage;
+	u16 high_cac_leakage;
+	u16 maximum_power_delivery_limit;
+};
+
+struct amdgpu_dpm_dynamic_state {
+	struct amdgpu_clock_voltage_dependency_table vddc_dependency_on_sclk;
+	struct amdgpu_clock_voltage_dependency_table vddci_dependency_on_mclk;
+	struct amdgpu_clock_voltage_dependency_table vddc_dependency_on_mclk;
+	struct amdgpu_clock_voltage_dependency_table mvdd_dependency_on_mclk;
+	struct amdgpu_clock_voltage_dependency_table vddc_dependency_on_dispclk;
+	struct amdgpu_uvd_clock_voltage_dependency_table uvd_clock_voltage_dependency_table;
+	struct amdgpu_vce_clock_voltage_dependency_table vce_clock_voltage_dependency_table;
+	struct amdgpu_clock_voltage_dependency_table samu_clock_voltage_dependency_table;
+	struct amdgpu_clock_voltage_dependency_table acp_clock_voltage_dependency_table;
+	struct amdgpu_clock_voltage_dependency_table vddgfx_dependency_on_sclk;
+	struct amdgpu_clock_array valid_sclk_values;
+	struct amdgpu_clock_array valid_mclk_values;
+	struct amdgpu_clock_and_voltage_limits max_clock_voltage_on_dc;
+	struct amdgpu_clock_and_voltage_limits max_clock_voltage_on_ac;
+	u32 mclk_sclk_ratio;
+	u32 sclk_mclk_delta;
+	u16 vddc_vddci_delta;
+	u16 min_vddc_for_pcie_gen2;
+	struct amdgpu_cac_leakage_table cac_leakage_table;
+	struct amdgpu_phase_shedding_limits_table phase_shedding_limits_table;
+	struct amdgpu_ppm_table *ppm_table;
+	struct amdgpu_cac_tdp_table *cac_tdp_table;
+};
+
+struct amdgpu_dpm_fan {
+	u16 t_min;
+	u16 t_med;
+	u16 t_high;
+	u16 pwm_min;
+	u16 pwm_med;
+	u16 pwm_high;
+	u8 t_hyst;
+	u32 cycle_delay;
+	u16 t_max;
+	u8 control_mode;
+	u16 default_max_fan_pwm;
+	u16 default_fan_output_sensitivity;
+	u16 fan_output_sensitivity;
+	bool ucode_fan_control;
+};
+
+enum amdgpu_pcie_gen {
+	AMDGPU_PCIE_GEN1 = 0,
+	AMDGPU_PCIE_GEN2 = 1,
+	AMDGPU_PCIE_GEN3 = 2,
+	AMDGPU_PCIE_GEN_INVALID = 0xffff
+};
+
+enum amdgpu_dpm_forced_level {
+	AMDGPU_DPM_FORCED_LEVEL_AUTO = 0,
+	AMDGPU_DPM_FORCED_LEVEL_LOW = 1,
+	AMDGPU_DPM_FORCED_LEVEL_HIGH = 2,
+	AMDGPU_DPM_FORCED_LEVEL_MANUAL = 3,
+};
+
+struct amdgpu_dpm_funcs {
+	int (*get_temperature)(struct amdgpu_device *adev);
+	int (*pre_set_power_state)(struct amdgpu_device *adev);
+	int (*set_power_state)(struct amdgpu_device *adev);
+	void (*post_set_power_state)(struct amdgpu_device *adev);
+	void (*display_configuration_changed)(struct amdgpu_device *adev);
+	u32 (*get_sclk)(struct amdgpu_device *adev, bool low);
+	u32 (*get_mclk)(struct amdgpu_device *adev, bool low);
+	void (*print_power_state)(struct amdgpu_device *adev, struct amdgpu_ps *ps);
+	void (*debugfs_print_current_performance_level)(struct amdgpu_device *adev, struct seq_file *m);
+	int (*force_performance_level)(struct amdgpu_device *adev, enum amdgpu_dpm_forced_level level);
+	bool (*vblank_too_short)(struct amdgpu_device *adev);
+	void (*powergate_uvd)(struct amdgpu_device *adev, bool gate);
+	void (*powergate_vce)(struct amdgpu_device *adev, bool gate);
+	void (*enable_bapm)(struct amdgpu_device *adev, bool enable);
+	void (*set_fan_control_mode)(struct amdgpu_device *adev, u32 mode);
+	u32 (*get_fan_control_mode)(struct amdgpu_device *adev);
+	int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed);
+	int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed);
+	int (*force_clock_level)(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t mask);
+	int (*print_clock_levels)(struct amdgpu_device *adev, enum pp_clock_type type, char *buf);
+	int (*get_sclk_od)(struct amdgpu_device *adev);
+	int (*set_sclk_od)(struct amdgpu_device *adev, uint32_t value);
+	int (*get_mclk_od)(struct amdgpu_device *adev);
+	int (*set_mclk_od)(struct amdgpu_device *adev, uint32_t value);
+	int (*check_state_equal)(struct amdgpu_device *adev,
+				struct amdgpu_ps *cps,
+				struct amdgpu_ps *rps,
+				bool *equal);
+
+	struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device *adev, unsigned idx);
+};
+
+#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
+#define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev))
+#define amdgpu_dpm_post_set_power_state(adev) (adev)->pm.funcs->post_set_power_state((adev))
+#define amdgpu_dpm_display_configuration_changed(adev) (adev)->pm.funcs->display_configuration_changed((adev))
+#define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps))
+#define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
+#define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
+
+#define amdgpu_dpm_read_sensor(adev, idx, value) \
+	((adev)->pp_enabled ? \
+		(adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value)) : \
+		-EINVAL)
+
+#define amdgpu_dpm_get_temperature(adev) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
+	      (adev)->pm.funcs->get_temperature((adev)))
+
+#define amdgpu_dpm_set_fan_control_mode(adev, m) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
+	      (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
+
+#define amdgpu_dpm_get_fan_control_mode(adev) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
+	      (adev)->pm.funcs->get_fan_control_mode((adev)))
+
+#define amdgpu_dpm_set_fan_speed_percent(adev, s) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
+	      (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
+
+#define amdgpu_dpm_get_fan_speed_percent(adev, s) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
+	      (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
+
+#define amdgpu_dpm_get_fan_speed_rpm(adev, s) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->get_fan_speed_rpm((adev)->powerplay.pp_handle, (s)) : \
+	      -EINVAL)
+
+#define amdgpu_dpm_get_sclk(adev, l) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
+		(adev)->pm.funcs->get_sclk((adev), (l)))
+
+#define amdgpu_dpm_get_mclk(adev, l)  \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
+	      (adev)->pm.funcs->get_mclk((adev), (l)))
+
+
+#define amdgpu_dpm_force_performance_level(adev, l) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
+	      (adev)->pm.funcs->force_performance_level((adev), (l)))
+
+#define amdgpu_dpm_powergate_uvd(adev, g) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
+	      (adev)->pm.funcs->powergate_uvd((adev), (g)))
+
+#define amdgpu_dpm_powergate_vce(adev, g) \
+	((adev)->pp_enabled ?						\
+	      (adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
+	      (adev)->pm.funcs->powergate_vce((adev), (g)))
+
+#define amdgpu_dpm_get_current_power_state(adev) \
+	(adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
+
+#define amdgpu_dpm_get_performance_level(adev) \
+	(adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle)
+
+#define amdgpu_dpm_get_pp_num_states(adev, data) \
+	(adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data)
+
+#define amdgpu_dpm_get_pp_table(adev, table) \
+	(adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table)
+
+#define amdgpu_dpm_set_pp_table(adev, buf, size) \
+	(adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size)
+
+#define amdgpu_dpm_print_clock_levels(adev, type, buf) \
+	(adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf)
+
+#define amdgpu_dpm_force_clock_level(adev, type, level) \
+		(adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
+
+#define amdgpu_dpm_get_sclk_od(adev) \
+	(adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle)
+
+#define amdgpu_dpm_set_sclk_od(adev, value) \
+	(adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value)
+
+#define amdgpu_dpm_get_mclk_od(adev) \
+	((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle))
+
+#define amdgpu_dpm_set_mclk_od(adev, value) \
+	((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
+
+#define amdgpu_dpm_dispatch_task(adev, event_id, input, output)		\
+	(adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
+
+#define amgdpu_dpm_check_state_equal(adev, cps, rps, equal) (adev)->pm.funcs->check_state_equal((adev), (cps),(rps),(equal))
+
+#define amdgpu_dpm_get_vce_clock_state(adev, i)				\
+	((adev)->pp_enabled ?						\
+	 (adev)->powerplay.pp_funcs->get_vce_clock_state((adev)->powerplay.pp_handle, (i)) : \
+	 (adev)->pm.funcs->get_vce_clock_state((adev), (i)))
+
+struct amdgpu_dpm {
+	struct amdgpu_ps        *ps;
+	/* number of valid power states */
+	int                     num_ps;
+	/* current power state that is active */
+	struct amdgpu_ps        *current_ps;
+	/* requested power state */
+	struct amdgpu_ps        *requested_ps;
+	/* boot up power state */
+	struct amdgpu_ps        *boot_ps;
+	/* default uvd power state */
+	struct amdgpu_ps        *uvd_ps;
+	/* vce requirements */
+	u32                  num_of_vce_states;
+	struct amd_vce_state vce_states[AMD_MAX_VCE_LEVELS];
+	enum amd_vce_level vce_level;
+	enum amd_pm_state_type state;
+	enum amd_pm_state_type user_state;
+	enum amd_pm_state_type last_state;
+	enum amd_pm_state_type last_user_state;
+	u32                     platform_caps;
+	u32                     voltage_response_time;
+	u32                     backbias_response_time;
+	void                    *priv;
+	u32			new_active_crtcs;
+	int			new_active_crtc_count;
+	u32			current_active_crtcs;
+	int			current_active_crtc_count;
+	struct amdgpu_dpm_dynamic_state dyn_state;
+	struct amdgpu_dpm_fan fan;
+	u32 tdp_limit;
+	u32 near_tdp_limit;
+	u32 near_tdp_limit_adjusted;
+	u32 sq_ramping_threshold;
+	u32 cac_leakage;
+	u16 tdp_od_limit;
+	u32 tdp_adjustment;
+	u16 load_line_slope;
+	bool power_control;
+	bool ac_power;
+	/* special states active */
+	bool                    thermal_active;
+	bool                    uvd_active;
+	bool                    vce_active;
+	/* thermal handling */
+	struct amdgpu_dpm_thermal thermal;
+	/* forced levels */
+	enum amdgpu_dpm_forced_level forced_level;
+};
+
+struct amdgpu_pm {
+	struct mutex		mutex;
+	u32                     current_sclk;
+	u32                     current_mclk;
+	u32                     default_sclk;
+	u32                     default_mclk;
+	struct amdgpu_i2c_chan *i2c_bus;
+	/* internal thermal controller on rv6xx+ */
+	enum amdgpu_int_thermal_type int_thermal_type;
+	struct device	        *int_hwmon_dev;
+	/* fan control parameters */
+	bool                    no_fan;
+	u8                      fan_pulses_per_revolution;
+	u8                      fan_min_rpm;
+	u8                      fan_max_rpm;
+	/* dpm */
+	bool                    dpm_enabled;
+	bool                    sysfs_initialized;
+	struct amdgpu_dpm       dpm;
+	const struct firmware	*fw;	/* SMC firmware */
+	uint32_t                fw_version;
+	const struct amdgpu_dpm_funcs *funcs;
+	uint32_t                pcie_gen_mask;
+	uint32_t                pcie_mlw_mask;
+	struct amd_pp_display_configuration pm_display_cfg;/* set by DAL */
+};
+
 #define R600_SSTU_DFLT                               0
 #define R600_SST_DFLT                                0x00C8

@@ -82,4 +529,7 @@ u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev,
 				 u16 default_lanes);
 u8 amdgpu_encode_pci_lane_width(u32 lanes);

+struct amd_vce_state*
+amdgpu_get_vce_clock_state(struct amdgpu_device *adev, unsigned idx);
+
 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -58,9 +58,10 @@
 * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
 * - 3.7.0 - Add support for VCE clock list packet
 * - 3.8.0 - Add support raster config init in the kernel
+ * - 3.9.0 - Add support for memory query info about VRAM and GTT.
 */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	8
+#define KMS_DRIVER_MINOR	9
 #define KMS_DRIVER_PATCHLEVEL	0

 int amdgpu_vram_limit = 0;
@@ -85,12 +86,13 @@ int amdgpu_vm_size = 64;
 int amdgpu_vm_block_size = -1;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
+int amdgpu_vram_page_split = 1024;
 int amdgpu_exp_hw_support = 0;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
 int amdgpu_powerplay = -1;
-int amdgpu_powercontainment = 1;
-int amdgpu_sclk_deep_sleep_en = 1;
+int amdgpu_no_evict = 0;
+int amdgpu_direct_gma_size = 0;
 unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
 unsigned amdgpu_cg_mask = 0xffffffff;
@@ -165,6 +167,9 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
 MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
 module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);

+MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)");
+module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
+
 MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
 module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);

@@ -177,14 +182,14 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
 MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);

-MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = disable)");
-module_param_named(powercontainment, amdgpu_powercontainment, int, 0444);
-
 MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
 module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, int, 0444);

-MODULE_PARM_DESC(sclkdeepsleep, "SCLK Deep Sleep (1 = enable (default), 0 = disable)");
-module_param_named(sclkdeepsleep, amdgpu_sclk_deep_sleep_en, int, 0444);
+MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))");
+module_param_named(no_evict, amdgpu_no_evict, int, 0444);
+
+MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)");
+module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444);

 MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
 module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
@@ -201,7 +206,8 @@ module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
 MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
 module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);

-MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x;xxxx:xx:xx.x)");
+MODULE_PARM_DESC(virtual_display,
+		 "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
 module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);

 static const struct pci_device_id pciidlist[] = {
@@ -381,6 +387,7 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6939, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
 	/* fiji */
 	{0x1002, 0x7300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_FIJI},
+	{0x1002, 0x730F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_FIJI},
 	/* carrizo */
 	{0x1002, 0x9870, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
 	{0x1002, 0x9874, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -75,27 +75,21 @@ amdgpufb_release(struct fb_info *info, int user)

 static struct fb_ops amdgpufb_ops = {
 	.owner = THIS_MODULE,
+	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_open = amdgpufb_open,
 	.fb_release = amdgpufb_release,
-	.fb_check_var = drm_fb_helper_check_var,
-	.fb_set_par = drm_fb_helper_set_par,
 	.fb_fillrect = drm_fb_helper_cfb_fillrect,
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
-	.fb_pan_display = drm_fb_helper_pan_display,
-	.fb_blank = drm_fb_helper_blank,
-	.fb_setcmap = drm_fb_helper_setcmap,
-	.fb_debug_enter = drm_fb_helper_debug_enter,
-	.fb_debug_leave = drm_fb_helper_debug_leave,
 };


-int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled)
+int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int cpp, bool tiled)
 {
 	int aligned = width;
 	int pitch_mask = 0;

-	switch (bpp / 8) {
+	switch (cpp) {
 	case 1:
 		pitch_mask = 255;
 		break;
@@ -110,7 +104,7 @@ int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tile

 	aligned += pitch_mask;
 	aligned &= ~pitch_mask;
-	return aligned;
+	return aligned * cpp;
 }

 static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj)
@@ -139,20 +133,21 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	int ret;
 	int aligned_size, size;
 	int height = mode_cmd->height;
-	u32 bpp, depth;
+	u32 cpp;

-	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+	cpp = drm_format_plane_cpp(mode_cmd->pixel_format, 0);

 	/* need to align pitch with crtc limits */
-	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, bpp,
-						  fb_tiled) * ((bpp + 1) / 8);
+	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
+						  fb_tiled);

 	height = ALIGN(mode_cmd->height, 8);
 	size = mode_cmd->pitches[0] * height;
 	aligned_size = ALIGN(size, PAGE_SIZE);
 	ret = amdgpu_gem_object_create(adev, aligned_size, 0,
 				       AMDGPU_GEM_DOMAIN_VRAM,
-				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 				       true, &gobj);
 	if (ret) {
 		printk(KERN_ERR "failed to allocate framebuffer (%d)\n",
@@ -176,7 +171,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	}


-	ret = amdgpu_bo_pin_restricted(abo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, NULL);
+	ret = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, NULL);
 	if (ret) {
 		amdgpu_bo_unreserve(abo);
 		goto out_unref;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -48,7 +48,7 @@
 */

 struct amdgpu_fence {
-	struct fence base;
+	struct dma_fence base;

 	/* RB, DMA, etc. */
 	struct amdgpu_ring		*ring;
@@ -74,8 +74,8 @@ void amdgpu_fence_slab_fini(void)
 /*
 * Cast helper
 */
-static const struct fence_ops amdgpu_fence_ops;
-static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
+static const struct dma_fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
 {
 	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);

@@ -131,11 +131,11 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
 * Emits a fence command on the requested ring (all asics).
 * Returns 0 on success, -ENOMEM on failure.
 */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_fence *fence;
-	struct fence *old, **ptr;
+	struct dma_fence *old, **ptr;
 	uint32_t seq;

 	fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
@@ -144,10 +144,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)

 	seq = ++ring->fence_drv.sync_seq;
 	fence->ring = ring;
-	fence_init(&fence->base, &amdgpu_fence_ops,
-		   &ring->fence_drv.lock,
-		   adev->fence_context + ring->idx,
-		   seq);
+	dma_fence_init(&fence->base, &amdgpu_fence_ops,
+		       &ring->fence_drv.lock,
+		       adev->fence_context + ring->idx,
+		       seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 			       seq, AMDGPU_FENCE_FLAG_INT);

@@ -156,12 +156,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 	 * emitting the fence would mess up the hardware ring buffer.
 	 */
 	old = rcu_dereference_protected(*ptr, 1);
-	if (old && !fence_is_signaled(old)) {
+	if (old && !dma_fence_is_signaled(old)) {
 		DRM_INFO("rcu slot is busy\n");
-		fence_wait(old, false);
+		dma_fence_wait(old, false);
 	}

-	rcu_assign_pointer(*ptr, fence_get(&fence->base));
+	rcu_assign_pointer(*ptr, dma_fence_get(&fence->base));

 	*f = &fence->base;

@@ -212,7 +212,7 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 	seq &= drv->num_fences_mask;

 	do {
-		struct fence *fence, **ptr;
+		struct dma_fence *fence, **ptr;

 		++last_seq;
 		last_seq &= drv->num_fences_mask;
@@ -225,13 +225,13 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 		if (!fence)
 			continue;

-		r = fence_signal(fence);
+		r = dma_fence_signal(fence);
 		if (!r)
-			FENCE_TRACE(fence, "signaled from irq context\n");
+			DMA_FENCE_TRACE(fence, "signaled from irq context\n");
 		else
 			BUG();

-		fence_put(fence);
+		dma_fence_put(fence);
 	} while (last_seq != seq);
 }

@@ -261,7 +261,7 @@ static void amdgpu_fence_fallback(unsigned long arg)
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 {
 	uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
-	struct fence *fence, **ptr;
+	struct dma_fence *fence, **ptr;
 	int r;

 	if (!seq)
@@ -270,14 +270,14 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	rcu_read_lock();
 	fence = rcu_dereference(*ptr);
-	if (!fence || !fence_get_rcu(fence)) {
+	if (!fence || !dma_fence_get_rcu(fence)) {
 		rcu_read_unlock();
 		return 0;
 	}
 	rcu_read_unlock();

-	r = fence_wait(fence, false);
-	fence_put(fence);
+	r = dma_fence_wait(fence, false);
+	dma_fence_put(fence);
 	return r;
 }

@@ -382,24 +382,27 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 	if (!ring->fence_drv.fences)
 		return -ENOMEM;

-	timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
-	if (timeout == 0) {
-		/*
-		 * FIXME:
-		 * Delayed workqueue cannot use it directly,
-		 * so the scheduler will not use delayed workqueue if
-		 * MAX_SCHEDULE_TIMEOUT is set.
-		 * Currently keep it simple and silly.
-		 */
-		timeout = MAX_SCHEDULE_TIMEOUT;
-	}
-	r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
-			   num_hw_submission,
-			   timeout, ring->name);
-	if (r) {
-		DRM_ERROR("Failed to create scheduler on ring %s.\n",
-			  ring->name);
-		return r;
+	/* No need to setup the GPU scheduler for KIQ ring */
+	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
+		timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+		if (timeout == 0) {
+			/*
+			 * FIXME:
+			 * Delayed workqueue cannot use it directly,
+			 * so the scheduler will not use delayed workqueue if
+			 * MAX_SCHEDULE_TIMEOUT is set.
+			 * Currently keep it simple and silly.
+			 */
+			timeout = MAX_SCHEDULE_TIMEOUT;
+		}
+		r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
+				   num_hw_submission,
+				   timeout, ring->name);
+		if (r) {
+			DRM_ERROR("Failed to create scheduler on ring %s.\n",
+				  ring->name);
+			return r;
+		}
 	}

 	return 0;
@@ -453,7 +456,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 		amd_sched_fini(&ring->sched);
 		del_timer_sync(&ring->fence_drv.fallback_timer);
 		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
-			fence_put(ring->fence_drv.fences[j]);
+			dma_fence_put(ring->fence_drv.fences[j]);
 		kfree(ring->fence_drv.fences);
 		ring->fence_drv.fences = NULL;
 		ring->fence_drv.initialized = false;
@@ -542,12 +545,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
 * Common fence implementation
 */

-static const char *amdgpu_fence_get_driver_name(struct fence *fence)
+static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
 {
 	return "amdgpu";
 }

-static const char *amdgpu_fence_get_timeline_name(struct fence *f)
+static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
 {
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	return (const char *)fence->ring->name;
@@ -561,7 +564,7 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f)
 * to fence_queue that checks if this fence is signaled, and if so it
 * signals the fence and removes itself.
 */
-static bool amdgpu_fence_enable_signaling(struct fence *f)
+static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
 {
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	struct amdgpu_ring *ring = fence->ring;
@@ -569,7 +572,7 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
 	if (!timer_pending(&ring->fence_drv.fallback_timer))
 		amdgpu_fence_schedule_fallback(ring);

-	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
+	DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);

 	return true;
 }
@@ -583,7 +586,7 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
 */
 static void amdgpu_fence_free(struct rcu_head *rcu)
 {
-	struct fence *f = container_of(rcu, struct fence, rcu);
+	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	kmem_cache_free(amdgpu_fence_slab, fence);
 }
@@ -596,16 +599,16 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
 * This function is called when the reference count becomes zero.
 * It just RCU schedules freeing up the fence.
 */
-static void amdgpu_fence_release(struct fence *f)
+static void amdgpu_fence_release(struct dma_fence *f)
 {
 	call_rcu(&f->rcu, amdgpu_fence_free);
 }

-static const struct fence_ops amdgpu_fence_ops = {
+static const struct dma_fence_ops amdgpu_fence_ops = {
 	.get_driver_name = amdgpu_fence_get_driver_name,
 	.get_timeline_name = amdgpu_fence_get_timeline_name,
 	.enable_signaling = amdgpu_fence_enable_signaling,
-	.wait = fence_default_wait,
+	.wait = dma_fence_default_wait,
 	.release = amdgpu_fence_release,
 };

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -126,7 +126,8 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
 	if (adev->gart.robj == NULL) {
 		r = amdgpu_bo_create(adev, adev->gart.table_size,
 				     PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
-				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 				     NULL, NULL, &adev->gart.robj);
 		if (r) {
 			return r;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -116,10 +116,11 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev)
 * Call from drm_gem_handle_create which appear in both new and open ioctl
 * case.
 */
-int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv)
+int amdgpu_gem_object_open(struct drm_gem_object *obj,
+			   struct drm_file *file_priv)
 {
 	struct amdgpu_bo *abo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = abo->adev;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_va *bo_va;
@@ -142,7 +143,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			     struct drm_file *file_priv)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = bo->adev;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;

@@ -407,10 +408,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 	robj = gem_to_amdgpu_bo(gobj);
-	if (timeout == 0)
-		ret = reservation_object_test_signaled_rcu(robj->tbo.resv, true);
-	else
-		ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, timeout);
+	ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true,
+						  timeout);

 	/* ret == 0 means not signaled,
 	 * ret > 0 means signaled
@@ -470,6 +469,16 @@ out:
 	return r;
 }

+static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
+{
+	unsigned domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+
+	/* if anything is swapped out don't swap it in here,
+	   just abort and wait for the next CS */
+
+	return domain == AMDGPU_GEM_DOMAIN_CPU ? -ERESTARTSYS : 0;
+}
+
 /**
 * amdgpu_gem_va_update_vm -update the bo_va in its VM
 *
@@ -480,7 +489,8 @@ out:
 * vital here, so they are not reported back to userspace.
 */
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
-				    struct amdgpu_bo_va *bo_va, uint32_t operation)
+				    struct amdgpu_bo_va *bo_va,
+				    uint32_t operation)
 {
 	struct ttm_validate_buffer tv, *entry;
 	struct amdgpu_bo_list_entry vm_pd;
@@ -503,7 +513,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (r)
 		goto error_print;

-	amdgpu_vm_get_pt_bos(adev, bo_va->vm, &duplicates);
 	list_for_each_entry(entry, &list, head) {
 		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
 		/* if anything is swapped out don't swap it in here,
@@ -511,13 +520,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 		if (domain == AMDGPU_GEM_DOMAIN_CPU)
 			goto error_unreserve;
 	}
-	list_for_each_entry(entry, &duplicates, head) {
-		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
-		/* if anything is swapped out don't swap it in here,
-		   just abort and wait for the next CS */
-		if (domain == AMDGPU_GEM_DOMAIN_CPU)
-			goto error_unreserve;
-	}
+	r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check,
+				      NULL);
+	if (r)
+		goto error_unreserve;

 	r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
 	if (r)
@@ -538,8 +544,6 @@ error_print:
 		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
 }

-
-
 int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp)
 {
@@ -549,7 +553,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	struct amdgpu_bo *abo;
 	struct amdgpu_bo_va *bo_va;
-	struct ttm_validate_buffer tv, tv_pd;
+	struct amdgpu_bo_list_entry vm_pd;
+	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct list_head list, duplicates;
 	uint32_t invalid_flags, va_flags = 0;
@@ -594,9 +599,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	tv.shared = true;
 	list_add(&tv.head, &list);

-	tv_pd.bo = &fpriv->vm.page_directory->tbo;
-	tv_pd.shared = true;
-	list_add(&tv_pd.head, &list);
+	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);

 	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r) {
@@ -704,7 +707,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 	uint32_t handle;
 	int r;

-	args->pitch = amdgpu_align_pitch(adev, args->width, args->bpp, 0) * ((args->bpp + 1) / 8);
+	args->pitch = amdgpu_align_pitch(adev, args->width,
+					 DIV_ROUND_UP(args->bpp, 8), 0);
 	args->size = (u64)args->pitch * args->height;
 	args->size = ALIGN(args->size, PAGE_SIZE);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -24,6 +24,7 @@
 */
 #include <drm/drmP.h>
 #include "amdgpu.h"
+#include "amdgpu_gfx.h"

 /*
 * GPU scratch registers helpers function.
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -27,6 +27,7 @@
 int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
 void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);

-unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh);
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
+		unsigned max_sh);

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -164,10 +164,13 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
 	spin_unlock(&mgr->lock);

 	node = kzalloc(sizeof(*node), GFP_KERNEL);
-	if (!node)
-		return -ENOMEM;
+	if (!node) {
+		r = -ENOMEM;
+		goto err_out;
+	}

 	node->start = AMDGPU_BO_INVALID_OFFSET;
+	node->size = mem->num_pages;
 	mem->mm_node = node;

 	if (place->fpfn || place->lpfn || place->flags & TTM_PL_FLAG_TOPDOWN) {
@@ -175,12 +178,20 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man,
 		if (unlikely(r)) {
 			kfree(node);
 			mem->mm_node = NULL;
+			r = 0;
+			goto err_out;
 		}
 	} else {
 		mem->start = node->start;
 	}

 	return 0;
+err_out:
+	spin_lock(&mgr->lock);
+	mgr->available += mem->num_pages;
+	spin_unlock(&mgr->lock);
+
+	return r;
 }

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -89,7 +89,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 * Free an IB (all asics).
 */
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
-		    struct fence *f)
+		    struct dma_fence *f)
 {
 	amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
 }
@@ -116,8 +116,8 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
 * to SI there was just a DE IB.
 */
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
-		       struct amdgpu_ib *ibs, struct fence *last_vm_update,
-		       struct amdgpu_job *job, struct fence **f)
+		       struct amdgpu_ib *ibs, struct dma_fence *last_vm_update,
+		       struct amdgpu_job *job, struct dma_fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
@@ -152,8 +152,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return -EINVAL;
 	}

-	alloc_size = amdgpu_ring_get_dma_frame_size(ring) +
-		num_ibs * amdgpu_ring_get_emit_ib_size(ring);
+	alloc_size = ring->funcs->emit_frame_size + num_ibs *
+		ring->funcs->emit_ib_size;

 	r = amdgpu_ring_alloc(ring, alloc_size);
 	if (r) {
@@ -161,7 +161,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return r;
 	}

-	if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec)
+	if (ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);

 	if (vm) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -424,15 +424,6 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 	return 0;
 }

-bool amdgpu_irq_get_delayed(struct amdgpu_device *adev,
-			struct amdgpu_irq_src *src,
-			unsigned type)
-{
-	if ((type >= src->num_types) || !src->enabled_types)
-		return false;
-	return atomic_inc_return(&src->enabled_types[type]) == 1;
-}
-
 /**
 * amdgpu_irq_put - disable interrupt
 *
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -88,9 +88,6 @@ int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		      unsigned type);
 int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		   unsigned type);
-bool amdgpu_irq_get_delayed(struct amdgpu_device *adev,
-			    struct amdgpu_irq_src *src,
-			    unsigned type);
 int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		   unsigned type);
 bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -81,7 +81,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,

 void amdgpu_job_free_resources(struct amdgpu_job *job)
 {
-	struct fence *f;
+	struct dma_fence *f;
 	unsigned i;

 	/* use sched fence if available */
@@ -95,7 +95,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
 {
 	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);

-	fence_put(job->fence);
+	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	kfree(job);
 }
@@ -104,14 +104,14 @@ void amdgpu_job_free(struct amdgpu_job *job)
 {
 	amdgpu_job_free_resources(job);

-	fence_put(job->fence);
+	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	kfree(job);
 }

 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
-		      struct fence **f)
+		      struct dma_fence **f)
 {
 	int r;
 	job->ring = ring;
@@ -125,19 +125,19 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,

 	job->owner = owner;
 	job->fence_ctx = entity->fence_context;
-	*f = fence_get(&job->base.s_fence->finished);
+	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
 	amd_sched_entity_push_job(&job->base);

 	return 0;
 }

-static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
+static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 {
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
 	struct amdgpu_vm *vm = job->vm;

-	struct fence *fence = amdgpu_sync_get_fence(&job->sync);
+	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync);

 	if (fence == NULL && vm && !job->vm_id) {
 		struct amdgpu_ring *ring = job->ring;
@@ -155,9 +155,9 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 	return fence;
 }

-static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
+static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 {
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 	struct amdgpu_job *job;
 	int r;

@@ -176,8 +176,8 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);

 	/* if gpu reset, hw fence will be replaced here */
-	fence_put(job->fence);
-	job->fence = fence_get(fence);
+	dma_fence_put(job->fence);
+	job->fence = dma_fence_get(fence);
 	amdgpu_job_free_resources(job);
 	return fence;
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -308,10 +308,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		}

 		for (i = 0; i < adev->num_ip_blocks; i++) {
-			if (adev->ip_blocks[i].type == type &&
-			    adev->ip_block_status[i].valid) {
-				ip.hw_ip_version_major = adev->ip_blocks[i].major;
-				ip.hw_ip_version_minor = adev->ip_blocks[i].minor;
+			if (adev->ip_blocks[i].version->type == type &&
+			    adev->ip_blocks[i].status.valid) {
+				ip.hw_ip_version_major = adev->ip_blocks[i].version->major;
+				ip.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
 				ip.capabilities_flags = 0;
 				ip.available_rings = ring_mask;
 				ip.ib_start_alignment = ib_start_alignment;
@@ -347,8 +347,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		}

 		for (i = 0; i < adev->num_ip_blocks; i++)
-			if (adev->ip_blocks[i].type == type &&
-			    adev->ip_block_status[i].valid &&
+			if (adev->ip_blocks[i].version->type == type &&
+			    adev->ip_blocks[i].status.valid &&
 			    count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
 				count++;

@@ -413,6 +413,36 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		return copy_to_user(out, &vram_gtt,
 				    min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
 	}
+	case AMDGPU_INFO_MEMORY: {
+		struct drm_amdgpu_memory_info mem;
+
+		memset(&mem, 0, sizeof(mem));
+		mem.vram.total_heap_size = adev->mc.real_vram_size;
+		mem.vram.usable_heap_size =
+			adev->mc.real_vram_size - adev->vram_pin_size;
+		mem.vram.heap_usage = atomic64_read(&adev->vram_usage);
+		mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
+
+		mem.cpu_accessible_vram.total_heap_size =
+			adev->mc.visible_vram_size;
+		mem.cpu_accessible_vram.usable_heap_size =
+			adev->mc.visible_vram_size -
+			(adev->vram_pin_size - adev->invisible_pin_size);
+		mem.cpu_accessible_vram.heap_usage =
+			atomic64_read(&adev->vram_vis_usage);
+		mem.cpu_accessible_vram.max_allocation =
+			mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
+
+		mem.gtt.total_heap_size = adev->mc.gtt_size;
+		mem.gtt.usable_heap_size =
+			adev->mc.gtt_size - adev->gart_pin_size;
+		mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage);
+		mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
+
+		return copy_to_user(out, &mem,
+				    min((size_t)size, sizeof(mem)))
+				    ? -EFAULT : 0;
+	}
 	case AMDGPU_INFO_READ_MMR_REG: {
 		unsigned n, alloc_size;
 		uint32_t *regs;
@@ -475,6 +505,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		dev_info.ids_flags = 0;
 		if (adev->flags & AMD_IS_APU)
 			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
+		if (amdgpu_sriov_vf(adev))
+			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
 		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
 		dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
 		dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
@@ -494,6 +526,50 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		return copy_to_user(out, &dev_info,
 				    min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
 	}
+	case AMDGPU_INFO_VCE_CLOCK_TABLE: {
+		unsigned i;
+		struct drm_amdgpu_info_vce_clock_table vce_clk_table = {};
+		struct amd_vce_state *vce_state;
+
+		for (i = 0; i < AMDGPU_VCE_CLOCK_TABLE_ENTRIES; i++) {
+			vce_state = amdgpu_dpm_get_vce_clock_state(adev, i);
+			if (vce_state) {
+				vce_clk_table.entries[i].sclk = vce_state->sclk;
+				vce_clk_table.entries[i].mclk = vce_state->mclk;
+				vce_clk_table.entries[i].eclk = vce_state->evclk;
+				vce_clk_table.num_valid_entries++;
+			}
+		}
+
+		return copy_to_user(out, &vce_clk_table,
+				    min((size_t)size, sizeof(vce_clk_table))) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_VBIOS: {
+		uint32_t bios_size = adev->bios_size;
+
+		switch (info->vbios_info.type) {
+		case AMDGPU_INFO_VBIOS_SIZE:
+			return copy_to_user(out, &bios_size,
+					min((size_t)size, sizeof(bios_size)))
+					? -EFAULT : 0;
+		case AMDGPU_INFO_VBIOS_IMAGE: {
+			uint8_t *bios;
+			uint32_t bios_offset = info->vbios_info.offset;
+
+			if (bios_offset >= bios_size)
+				return -EINVAL;
+
+			bios = adev->bios + bios_offset;
+			return copy_to_user(out, bios,
+					    min((size_t)size, (size_t)(bios_size - bios_offset)))
+					? -EFAULT : 0;
+		}
+		default:
+			DRM_DEBUG_KMS("Invalid request %d\n",
+					info->vbios_info.type);
+			return -EINVAL;
+		}
+	}
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->query);
 		return -EINVAL;
@@ -775,6 +851,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_FENCES, amdgpu_cs_wait_fences_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -285,7 +285,7 @@ free_rmn:
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 	unsigned long end = addr + amdgpu_bo_size(bo) - 1;
-	struct amdgpu_device *adev = bo->adev;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct amdgpu_mn *rmn;
 	struct amdgpu_mn_node *node = NULL;
 	struct list_head bos;
@@ -340,7 +340,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
-	struct amdgpu_device *adev = bo->adev;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct amdgpu_mn *rmn;
 	struct list_head *head;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -271,8 +271,6 @@ struct amdgpu_display_funcs {
 	u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc);
 	/* wait for vblank */
 	void (*vblank_wait)(struct amdgpu_device *adev, int crtc);
-	/* is dce hung */
-	bool (*is_display_hung)(struct amdgpu_device *adev);
 	/* set backlight level */
 	void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder,
 				    u8 level);
@@ -341,8 +339,6 @@ struct amdgpu_mode_info {
 	int			num_dig; /* number of dig blocks */
 	int			disp_priority;
 	const struct amdgpu_display_funcs *funcs;
-	struct hrtimer vblank_timer;
-	enum amdgpu_interrupt_state vsync_timer_enabled;
 };

 #define AMDGPU_MAX_BL_LEVEL 0xFF
@@ -413,6 +409,9 @@ struct amdgpu_crtc {
 	u32 wm_high;
 	u32 lb_vblank_lead_lines;
 	struct drm_display_mode hw_mode;
+	/* for virtual dce */
+	struct hrtimer vblank_timer;
+	enum amdgpu_interrupt_state vsync_timer_enabled;
 };

 struct amdgpu_encoder_atom_dig {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -88,18 +88,19 @@ static void amdgpu_update_memory_usage(struct amdgpu_device *adev,

 static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
 	struct amdgpu_bo *bo;

 	bo = container_of(tbo, struct amdgpu_bo, tbo);

-	amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL);
+	amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);

 	drm_gem_object_release(&bo->gem_base);
 	amdgpu_bo_unref(&bo->parent);
 	if (!list_empty(&bo->shadow_list)) {
-		mutex_lock(&bo->adev->shadow_list_lock);
+		mutex_lock(&adev->shadow_list_lock);
 		list_del_init(&bo->shadow_list);
-		mutex_unlock(&bo->adev->shadow_list_lock);
+		mutex_unlock(&adev->shadow_list_lock);
 	}
 	kfree(bo->metadata);
 	kfree(bo);
@@ -121,20 +122,14 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,

 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+		unsigned lpfn = 0;

-		if (flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS &&
-		    !(flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
-		    adev->mc.visible_vram_size < adev->mc.real_vram_size) {
-			places[c].fpfn = visible_pfn;
-			places[c].lpfn = 0;
-			places[c].flags = TTM_PL_FLAG_WC |
-				TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM |
-				TTM_PL_FLAG_TOPDOWN;
-			c++;
-		}
+		/* This forces a reallocation if the flag wasn't set before */
+		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+			lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;

 		places[c].fpfn = 0;
-		places[c].lpfn = 0;
+		places[c].lpfn = lpfn;
 		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
 			TTM_PL_FLAG_VRAM;
 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
@@ -205,8 +200,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,

 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 {
-	amdgpu_ttm_placement_init(abo->adev, &abo->placement,
-				  abo->placements, domain, abo->flags);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+
+	amdgpu_ttm_placement_init(adev, &abo->placement, abo->placements,
+				  domain, abo->flags);
 }

 static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
@@ -245,7 +242,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 	int r;

 	r = amdgpu_bo_create(adev, size, align, true, domain,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 			     NULL, NULL, bo_ptr);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r);
@@ -351,7 +349,6 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 		kfree(bo);
 		return r;
 	}
-	bo->adev = adev;
 	INIT_LIST_HEAD(&bo->shadow_list);
 	INIT_LIST_HEAD(&bo->va);
 	bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
@@ -374,39 +371,36 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,

 	amdgpu_fill_placement_to_bo(bo, placement);
 	/* Kernel allocation are uninterruptible */
+
+	if (!resv) {
+		bool locked;
+
+		reservation_object_init(&bo->tbo.ttm_resv);
+		locked = ww_mutex_trylock(&bo->tbo.ttm_resv.lock);
+		WARN_ON(!locked);
+	}
 	r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
 			&bo->placement, page_align, !kernel, NULL,
-			acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
-	if (unlikely(r != 0)) {
+			acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
+			&amdgpu_ttm_bo_destroy);
+	if (unlikely(r != 0))
 		return r;
-	}

 	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
-		struct fence *fence;
+		struct dma_fence *fence;

-		if (adev->mman.buffer_funcs_ring == NULL ||
-		   !adev->mman.buffer_funcs_ring->ready) {
-			r = -EBUSY;
-			goto fail_free;
-		}
-
-		r = amdgpu_bo_reserve(bo, false);
-		if (unlikely(r != 0))
-			goto fail_free;
-
-		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
-		if (unlikely(r != 0))
+		r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
+		if (unlikely(r))
 			goto fail_unreserve;

-		amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
 		amdgpu_bo_fence(bo, fence, false);
-		amdgpu_bo_unreserve(bo);
-		fence_put(bo->tbo.moving);
-		bo->tbo.moving = fence_get(fence);
-		fence_put(fence);
+		dma_fence_put(bo->tbo.moving);
+		bo->tbo.moving = dma_fence_get(fence);
+		dma_fence_put(fence);
 	}
+	if (!resv)
+		ww_mutex_unlock(&bo->tbo.resv->lock);
 	*bo_ptr = bo;

 	trace_amdgpu_bo_create(bo);
@@ -414,8 +408,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	return 0;

 fail_unreserve:
-	amdgpu_bo_unreserve(bo);
-fail_free:
+	ww_mutex_unlock(&bo->tbo.resv->lock);
 	amdgpu_bo_unref(&bo);
 	return r;
 }
@@ -491,7 +484,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring,
 			       struct amdgpu_bo *bo,
 			       struct reservation_object *resv,
-			       struct fence **fence,
+			       struct dma_fence **fence,
 			       bool direct)

 {
@@ -523,7 +516,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
 				  struct reservation_object *resv,
-				  struct fence **fence,
+				  struct dma_fence **fence,
 				  bool direct)

 {
@@ -616,6 +609,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 			     u64 min_offset, u64 max_offset,
 			     u64 *gpu_addr)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	int r, i;
 	unsigned fpfn, lpfn;

@@ -643,18 +637,20 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,

 		return 0;
 	}
+
+	bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	amdgpu_ttm_placement_from_domain(bo, domain);
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
 		    !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) &&
 		    (!max_offset || max_offset >
-		     bo->adev->mc.visible_vram_size)) {
+		     adev->mc.visible_vram_size)) {
 			if (WARN_ON_ONCE(min_offset >
-					 bo->adev->mc.visible_vram_size))
+					 adev->mc.visible_vram_size))
 				return -EINVAL;
 			fpfn = min_offset >> PAGE_SHIFT;
-			lpfn = bo->adev->mc.visible_vram_size >> PAGE_SHIFT;
+			lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
 		} else {
 			fpfn = min_offset >> PAGE_SHIFT;
 			lpfn = max_offset >> PAGE_SHIFT;
@@ -669,12 +665,12 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,

 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (unlikely(r)) {
-		dev_err(bo->adev->dev, "%p pin failed\n", bo);
+		dev_err(adev->dev, "%p pin failed\n", bo);
 		goto error;
 	}
 	r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
 	if (unlikely(r)) {
-		dev_err(bo->adev->dev, "%p bind failed\n", bo);
+		dev_err(adev->dev, "%p bind failed\n", bo);
 		goto error;
 	}

@@ -682,11 +678,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 	if (gpu_addr != NULL)
 		*gpu_addr = amdgpu_bo_gpu_offset(bo);
 	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
-		bo->adev->vram_pin_size += amdgpu_bo_size(bo);
+		adev->vram_pin_size += amdgpu_bo_size(bo);
 		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-			bo->adev->invisible_pin_size += amdgpu_bo_size(bo);
+			adev->invisible_pin_size += amdgpu_bo_size(bo);
 	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-		bo->adev->gart_pin_size += amdgpu_bo_size(bo);
+		adev->gart_pin_size += amdgpu_bo_size(bo);
 	}

 error:
@@ -700,10 +696,11 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)

 int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	int r, i;

 	if (!bo->pin_count) {
-		dev_warn(bo->adev->dev, "%p unpin not necessary\n", bo);
+		dev_warn(adev->dev, "%p unpin not necessary\n", bo);
 		return 0;
 	}
 	bo->pin_count--;
@@ -715,16 +712,16 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 	}
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (unlikely(r)) {
-		dev_err(bo->adev->dev, "%p validate failed for unpin\n", bo);
+		dev_err(adev->dev, "%p validate failed for unpin\n", bo);
 		goto error;
 	}

 	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
-		bo->adev->vram_pin_size -= amdgpu_bo_size(bo);
+		adev->vram_pin_size -= amdgpu_bo_size(bo);
 		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-			bo->adev->invisible_pin_size -= amdgpu_bo_size(bo);
+			adev->invisible_pin_size -= amdgpu_bo_size(bo);
 	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
-		bo->adev->gart_pin_size -= amdgpu_bo_size(bo);
+		adev->gart_pin_size -= amdgpu_bo_size(bo);
 	}

 error:
@@ -854,6 +851,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *new_mem)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo;
 	struct ttm_mem_reg *old_mem = &bo->mem;

@@ -861,21 +859,21 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 		return;

 	abo = container_of(bo, struct amdgpu_bo, tbo);
-	amdgpu_vm_bo_invalidate(abo->adev, abo);
+	amdgpu_vm_bo_invalidate(adev, abo);

 	/* update statistics */
 	if (!new_mem)
 		return;

 	/* move_notify is called before move happens */
-	amdgpu_update_memory_usage(abo->adev, &bo->mem, new_mem);
+	amdgpu_update_memory_usage(adev, &bo->mem, new_mem);

 	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 }

 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
-	struct amdgpu_device *adev;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo;
 	unsigned long offset, size, lpfn;
 	int i, r;
@@ -884,13 +882,14 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return 0;

 	abo = container_of(bo, struct amdgpu_bo, tbo);
-	adev = abo->adev;
 	if (bo->mem.mem_type != TTM_PL_VRAM)
 		return 0;

 	size = bo->mem.num_pages << PAGE_SHIFT;
 	offset = bo->mem.start << PAGE_SHIFT;
-	if ((offset + size) <= adev->mc.visible_vram_size)
+	/* TODO: figure out how to map scattered VRAM to the CPU */
+	if ((offset + size) <= adev->mc.visible_vram_size &&
+	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
 		return 0;

 	/* Can't move a pinned BO to visible VRAM */
@@ -898,6 +897,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return -EINVAL;

 	/* hurrah the memory is not visible ! */
+	abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < abo->placement.num_placement; i++) {
@@ -931,7 +931,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 * @shared: true if fence should be added shared
 *
 */
-void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
+void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		     bool shared)
 {
 	struct reservation_object *resv = bo->tbo.resv;
@@ -959,6 +959,8 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
 		     !bo->pin_count);
 	WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
+	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));

 	return bo->tbo.offset;
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -71,12 +71,13 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
 */
 static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, bool no_intr)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	int r;

 	r = ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS)
-			dev_err(bo->adev->dev, "%p reserve failed\n", bo);
+			dev_err(adev->dev, "%p reserve failed\n", bo);
 		return r;
 	}
 	return 0;
@@ -156,19 +157,19 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 				  struct ttm_mem_reg *new_mem);
 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
-void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence,
+void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		     bool shared);
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
 int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring,
 			       struct amdgpu_bo *bo,
 			       struct reservation_object *resv,
-			       struct fence **fence, bool direct);
+			       struct dma_fence **fence, bool direct);
 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
 				  struct reservation_object *resv,
-				  struct fence **fence,
+				  struct dma_fence **fence,
 				  bool direct);


@@ -200,7 +201,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     unsigned size, unsigned align);
 void amdgpu_sa_bo_free(struct amdgpu_device *adev,
 			      struct amdgpu_sa_bo **sa_bo,
-			      struct fence *fence);
+			      struct dma_fence *fence);
 #if defined(CONFIG_DEBUG_FS)
 void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 					 struct seq_file *m);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -737,6 +737,21 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
 	return sprintf(buf, "%i\n", speed);
 }

+static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int err;
+	u32 speed;
+
+	err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
+	if (err)
+		return err;
+
+	return sprintf(buf, "%i\n", speed);
+}
+
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -744,6 +759,7 @@ static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu
 static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
 static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);

 static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
@@ -753,6 +769,7 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
 	&sensor_dev_attr_pwm1_max.dev_attr.attr,
+	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	NULL
 };

@@ -804,6 +821,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
 		return 0;

+	/* requires powerplay */
+	if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
+		return 0;
+
 	return effective_mode;
 }

@@ -986,10 +1007,10 @@ restart_search:

 static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
 {
-	int i;
 	struct amdgpu_ps *ps;
 	enum amd_pm_state_type dpm_state;
 	int ret;
+	bool equal;

 	/* if dpm init failed */
 	if (!adev->pm.dpm_enabled)
@@ -1009,46 +1030,6 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
 	else
 		return;

-	/* no need to reprogram if nothing changed unless we are on BTC+ */
-	if (adev->pm.dpm.current_ps == adev->pm.dpm.requested_ps) {
-		/* vce just modifies an existing state so force a change */
-		if (ps->vce_active != adev->pm.dpm.vce_active)
-			goto force;
-		if (adev->flags & AMD_IS_APU) {
-			/* for APUs if the num crtcs changed but state is the same,
-			 * all we need to do is update the display configuration.
-			 */
-			if (adev->pm.dpm.new_active_crtcs != adev->pm.dpm.current_active_crtcs) {
-				/* update display watermarks based on new power state */
-				amdgpu_display_bandwidth_update(adev);
-				/* update displays */
-				amdgpu_dpm_display_configuration_changed(adev);
-				adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
-				adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
-			}
-			return;
-		} else {
-			/* for BTC+ if the num crtcs hasn't changed and state is the same,
-			 * nothing to do, if the num crtcs is > 1 and state is the same,
-			 * update display configuration.
-			 */
-			if (adev->pm.dpm.new_active_crtcs ==
-			    adev->pm.dpm.current_active_crtcs) {
-				return;
-			} else if ((adev->pm.dpm.current_active_crtc_count > 1) &&
-				   (adev->pm.dpm.new_active_crtc_count > 1)) {
-				/* update display watermarks based on new power state */
-				amdgpu_display_bandwidth_update(adev);
-				/* update displays */
-				amdgpu_dpm_display_configuration_changed(adev);
-				adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
-				adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
-				return;
-			}
-		}
-	}
-
-force:
 	if (amdgpu_dpm == 1) {
 		printk("switching from power state:\n");
 		amdgpu_dpm_print_power_state(adev, adev->pm.dpm.current_ps);
@@ -1059,31 +1040,21 @@ force:
 	/* update whether vce is active */
 	ps->vce_active = adev->pm.dpm.vce_active;

+	amdgpu_dpm_display_configuration_changed(adev);
+
 	ret = amdgpu_dpm_pre_set_power_state(adev);
 	if (ret)
 		return;

-	/* update display watermarks based on new power state */
-	amdgpu_display_bandwidth_update(adev);
+	if ((0 != amgdpu_dpm_check_state_equal(adev, adev->pm.dpm.current_ps, adev->pm.dpm.requested_ps, &equal)))
+		equal = false;

-	/* wait for the rings to drain */
-	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		if (ring && ring->ready)
-			amdgpu_fence_wait_empty(ring);
-	}
+	if (equal)
+		return;

-	/* program the new power state */
 	amdgpu_dpm_set_power_state(adev);
-
-	/* update current power state */
-	adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps;
-
 	amdgpu_dpm_post_set_power_state(adev);

-	/* update displays */
-	amdgpu_dpm_display_configuration_changed(adev);
-
 	adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
 	adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;

@@ -1135,7 +1106,7 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
 			mutex_lock(&adev->pm.mutex);
 			adev->pm.dpm.vce_active = true;
 			/* XXX select vce level based on ring/task */
-			adev->pm.dpm.vce_level = AMDGPU_VCE_LEVEL_AC_ALL;
+			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
 			mutex_unlock(&adev->pm.mutex);
 		} else {
 			mutex_lock(&adev->pm.mutex);
@@ -1276,20 +1247,20 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 	struct drm_device *ddev = adev->ddev;
 	struct drm_crtc *crtc;
 	struct amdgpu_crtc *amdgpu_crtc;
+	int i = 0;

 	if (!adev->pm.dpm_enabled)
 		return;

+	amdgpu_display_bandwidth_update(adev);
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		if (ring && ring->ready)
+			amdgpu_fence_wait_empty(ring);
+	}
+
 	if (adev->pp_enabled) {
-		int i = 0;
-
-		amdgpu_display_bandwidth_update(adev);
-		for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-			struct amdgpu_ring *ring = adev->rings[i];
-			if (ring && ring->ready)
-				amdgpu_fence_wait_empty(ring);
-		}
-
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL);
 	} else {
 		mutex_lock(&adev->pm.mutex);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -155,9 +155,6 @@ static int amdgpu_pp_sw_init(void *handle)
 		ret = adev->powerplay.ip_funcs->sw_init(
 					adev->powerplay.pp_handle);

-	if (adev->pp_enabled)
-		adev->pm.dpm_enabled = true;
-
 	return ret;
 }

@@ -187,6 +184,9 @@ static int amdgpu_pp_hw_init(void *handle)
 		ret = adev->powerplay.ip_funcs->hw_init(
 					adev->powerplay.pp_handle);

+	if ((amdgpu_dpm != 0) && !amdgpu_sriov_vf(adev))
+		adev->pm.dpm_enabled = true;
+
 	return ret;
 }

@@ -299,7 +299,7 @@ static int amdgpu_pp_soft_reset(void *handle)
 	return ret;
 }

-const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
+static const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
 	.name = "amdgpu_powerplay",
 	.early_init = amdgpu_pp_early_init,
 	.late_init = amdgpu_pp_late_init,
@@ -316,3 +316,12 @@ const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
 	.set_clockgating_state = amdgpu_pp_set_clockgating_state,
 	.set_powergating_state = amdgpu_pp_set_powergating_state,
 };
+
+const struct amdgpu_ip_block_version amdgpu_pp_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SMC,
+	.major = 1,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &amdgpu_pp_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h
@@ -23,11 +23,11 @@
 *
 */

-#ifndef __AMDGPU_POPWERPLAY_H__
-#define __AMDGPU_POPWERPLAY_H__
+#ifndef __AMDGPU_POWERPLAY_H__
+#define __AMDGPU_POWERPLAY_H__

 #include "amd_shared.h"

-extern const struct amd_ip_funcs amdgpu_pp_ip_funcs;
+extern const struct amdgpu_ip_block_version amdgpu_pp_ip_block;

-#endif /* __AMDSOC_DM_H__ */
+#endif /* __AMDGPU_POWERPLAY_H__ */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -65,7 +65,7 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
 {
 	/* Align requested size with padding so unlock_commit can
 	 * pad safely */
-	ndw = (ndw + ring->align_mask) & ~ring->align_mask;
+	ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;

 	/* Make sure we aren't trying to allocate more space
 	 * than the maximum for one submission
@@ -94,7 +94,7 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 	int i;

 	for (i = 0; i < count; i++)
-		amdgpu_ring_write(ring, ring->nop);
+		amdgpu_ring_write(ring, ring->funcs->nop);
 }

 /** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
@@ -106,8 +106,8 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 */
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
 {
-	while (ib->length_dw & ring->align_mask)
-		ib->ptr[ib->length_dw++] = ring->nop;
+	while (ib->length_dw & ring->funcs->align_mask)
+		ib->ptr[ib->length_dw++] = ring->funcs->nop;
 }

 /**
@@ -125,8 +125,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 	uint32_t count;

 	/* We pad to match fetch size */
-	count = ring->align_mask + 1 - (ring->wptr & ring->align_mask);
-	count %= ring->align_mask + 1;
+	count = ring->funcs->align_mask + 1 -
+		(ring->wptr & ring->funcs->align_mask);
+	count %= ring->funcs->align_mask + 1;
 	ring->funcs->insert_nop(ring, count);

 	mb();
@@ -163,9 +164,8 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
 * Returns 0 on success, error on failure.
 */
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
-		     unsigned max_dw, u32 nop, u32 align_mask,
-		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
-		     enum amdgpu_ring_type ring_type)
+		     unsigned max_dw, struct amdgpu_irq_src *irq_src,
+		     unsigned irq_type)
 {
 	int r;

@@ -216,9 +216,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,

 	ring->ring_size = roundup_pow_of_two(max_dw * 4 *
 					     amdgpu_sched_hw_submission);
-	ring->align_mask = align_mask;
-	ring->nop = nop;
-	ring->type = ring_type;

 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_RING_H__
+#define __AMDGPU_RING_H__
+
+#include "gpu_scheduler.h"
+
+/* max number of rings */
+#define AMDGPU_MAX_RINGS		16
+#define AMDGPU_MAX_GFX_RINGS		1
+#define AMDGPU_MAX_COMPUTE_RINGS	8
+#define AMDGPU_MAX_VCE_RINGS		3
+
+/* some special values for the owner field */
+#define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
+#define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
+
+#define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
+#define AMDGPU_FENCE_FLAG_INT           (1 << 1)
+
+enum amdgpu_ring_type {
+	AMDGPU_RING_TYPE_GFX,
+	AMDGPU_RING_TYPE_COMPUTE,
+	AMDGPU_RING_TYPE_SDMA,
+	AMDGPU_RING_TYPE_UVD,
+	AMDGPU_RING_TYPE_VCE,
+	AMDGPU_RING_TYPE_KIQ
+};
+
+struct amdgpu_device;
+struct amdgpu_ring;
+struct amdgpu_ib;
+struct amdgpu_cs_parser;
+
+/*
+ * Fences.
+ */
+struct amdgpu_fence_driver {
+	uint64_t			gpu_addr;
+	volatile uint32_t		*cpu_addr;
+	/* sync_seq is protected by ring emission lock */
+	uint32_t			sync_seq;
+	atomic_t			last_seq;
+	bool				initialized;
+	struct amdgpu_irq_src		*irq_src;
+	unsigned			irq_type;
+	struct timer_list		fallback_timer;
+	unsigned			num_fences_mask;
+	spinlock_t			lock;
+	struct dma_fence		**fences;
+};
+
+int amdgpu_fence_driver_init(struct amdgpu_device *adev);
+void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
+void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
+
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+				  unsigned num_hw_submission);
+int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
+				   struct amdgpu_irq_src *irq_src,
+				   unsigned irq_type);
+void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
+void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
+void amdgpu_fence_process(struct amdgpu_ring *ring);
+int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
+unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+
+/*
+ * Rings.
+ */
+
+/* provided by hw blocks that expose a ring buffer for commands */
+struct amdgpu_ring_funcs {
+	enum amdgpu_ring_type	type;
+	uint32_t		align_mask;
+	u32			nop;
+
+	/* ring read/write ptr handling */
+	u32 (*get_rptr)(struct amdgpu_ring *ring);
+	u32 (*get_wptr)(struct amdgpu_ring *ring);
+	void (*set_wptr)(struct amdgpu_ring *ring);
+	/* validating and patching of IBs */
+	int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+	/* constants to calculate how many DW are needed for an emit */
+	unsigned emit_frame_size;
+	unsigned emit_ib_size;
+	/* command emit functions */
+	void (*emit_ib)(struct amdgpu_ring *ring,
+			struct amdgpu_ib *ib,
+			unsigned vm_id, bool ctx_switch);
+	void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
+			   uint64_t seq, unsigned flags);
+	void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
+	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
+			      uint64_t pd_addr);
+	void (*emit_hdp_flush)(struct amdgpu_ring *ring);
+	void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
+	void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
+				uint32_t gds_base, uint32_t gds_size,
+				uint32_t gws_base, uint32_t gws_size,
+				uint32_t oa_base, uint32_t oa_size);
+	/* testing functions */
+	int (*test_ring)(struct amdgpu_ring *ring);
+	int (*test_ib)(struct amdgpu_ring *ring, long timeout);
+	/* insert NOP packets */
+	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
+	/* pad the indirect buffer to the necessary number of dw */
+	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+	unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
+	void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+	/* note usage for clock and power gating */
+	void (*begin_use)(struct amdgpu_ring *ring);
+	void (*end_use)(struct amdgpu_ring *ring);
+	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+};
+
+struct amdgpu_ring {
+	struct amdgpu_device		*adev;
+	const struct amdgpu_ring_funcs	*funcs;
+	struct amdgpu_fence_driver	fence_drv;
+	struct amd_gpu_scheduler	sched;
+
+	struct amdgpu_bo	*ring_obj;
+	volatile uint32_t	*ring;
+	unsigned		rptr_offs;
+	unsigned		wptr;
+	unsigned		wptr_old;
+	unsigned		ring_size;
+	unsigned		max_dw;
+	int			count_dw;
+	uint64_t		gpu_addr;
+	uint32_t		ptr_mask;
+	bool			ready;
+	u32			idx;
+	u32			me;
+	u32			pipe;
+	u32			queue;
+	struct amdgpu_bo	*mqd_obj;
+	u32			doorbell_index;
+	bool			use_doorbell;
+	unsigned		wptr_offs;
+	unsigned		fence_offs;
+	uint64_t		current_ctx;
+	char			name[16];
+	unsigned		cond_exe_offs;
+	u64			cond_exe_gpu_addr;
+	volatile u32		*cond_exe_cpu_addr;
+#if defined(CONFIG_DEBUG_FS)
+	struct dentry *ent;
+#endif
+};
+
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+void amdgpu_ring_commit(struct amdgpu_ring *ring);
+void amdgpu_ring_undo(struct amdgpu_ring *ring);
+int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
+		     unsigned irq_type);
+void amdgpu_ring_fini(struct amdgpu_ring *ring);
+
+#endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -147,7 +147,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
 	}
 	list_del_init(&sa_bo->olist);
 	list_del_init(&sa_bo->flist);
-	fence_put(sa_bo->fence);
+	dma_fence_put(sa_bo->fence);
 	kfree(sa_bo);
 }

@@ -161,7 +161,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager)
 	sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist);
 	list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) {
 		if (sa_bo->fence == NULL ||
-		    !fence_is_signaled(sa_bo->fence)) {
+		    !dma_fence_is_signaled(sa_bo->fence)) {
 			return;
 		}
 		amdgpu_sa_bo_remove_locked(sa_bo);
@@ -244,7 +244,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
 }

 static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
-				   struct fence **fences,
+				   struct dma_fence **fences,
 				   unsigned *tries)
 {
 	struct amdgpu_sa_bo *best_bo = NULL;
@@ -272,7 +272,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 		sa_bo = list_first_entry(&sa_manager->flist[i],
 					 struct amdgpu_sa_bo, flist);

-		if (!fence_is_signaled(sa_bo->fence)) {
+		if (!dma_fence_is_signaled(sa_bo->fence)) {
 			fences[i] = sa_bo->fence;
 			continue;
 		}
@@ -314,7 +314,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     struct amdgpu_sa_bo **sa_bo,
 		     unsigned size, unsigned align)
 {
-	struct fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
+	struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
 	unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
 	unsigned count;
 	int i, r;
@@ -327,9 +327,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		return -EINVAL;

 	*sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
-	if ((*sa_bo) == NULL) {
+	if (!(*sa_bo))
 		return -ENOMEM;
-	}
 	(*sa_bo)->manager = sa_manager;
 	(*sa_bo)->fence = NULL;
 	INIT_LIST_HEAD(&(*sa_bo)->olist);
@@ -356,14 +355,15 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,

 		for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 			if (fences[i])
-				fences[count++] = fence_get(fences[i]);
+				fences[count++] = dma_fence_get(fences[i]);

 		if (count) {
 			spin_unlock(&sa_manager->wq.lock);
-			t = fence_wait_any_timeout(fences, count, false,
-						   MAX_SCHEDULE_TIMEOUT);
+			t = dma_fence_wait_any_timeout(fences, count, false,
+						       MAX_SCHEDULE_TIMEOUT,
+						       NULL);
 			for (i = 0; i < count; ++i)
-				fence_put(fences[i]);
+				dma_fence_put(fences[i]);

 			r = (t > 0) ? 0 : t;
 			spin_lock(&sa_manager->wq.lock);
@@ -384,7 +384,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 }

 void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
-		       struct fence *fence)
+		       struct dma_fence *fence)
 {
 	struct amdgpu_sa_manager *sa_manager;

@@ -394,10 +394,10 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,

 	sa_manager = (*sa_bo)->manager;
 	spin_lock(&sa_manager->wq.lock);
-	if (fence && !fence_is_signaled(fence)) {
+	if (fence && !dma_fence_is_signaled(fence)) {
 		uint32_t idx;

-		(*sa_bo)->fence = fence_get(fence);
+		(*sa_bo)->fence = dma_fence_get(fence);
 		idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
 		list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
 	} else {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -34,7 +34,7 @@

 struct amdgpu_sync_entry {
 	struct hlist_node	node;
-	struct fence		*fence;
+	struct dma_fence	*fence;
 };

 static struct kmem_cache *amdgpu_sync_slab;
@@ -60,7 +60,8 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
 *
 * Test if the fence was issued by us.
 */
-static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
+static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
+				 struct dma_fence *f)
 {
 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);

@@ -81,7 +82,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
 *
 * Extract who originally created the fence.
 */
-static void *amdgpu_sync_get_owner(struct fence *f)
+static void *amdgpu_sync_get_owner(struct dma_fence *f)
 {
 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);

@@ -99,13 +100,14 @@ static void *amdgpu_sync_get_owner(struct fence *f)
 *
 * Either keep the existing fence or the new one, depending which one is later.
 */
-static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
+static void amdgpu_sync_keep_later(struct dma_fence **keep,
+				   struct dma_fence *fence)
 {
-	if (*keep && fence_is_later(*keep, fence))
+	if (*keep && dma_fence_is_later(*keep, fence))
 		return;

-	fence_put(*keep);
-	*keep = fence_get(fence);
+	dma_fence_put(*keep);
+	*keep = dma_fence_get(fence);
 }

 /**
@@ -117,7 +119,7 @@ static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
 * Tries to add the fence to an existing hash entry. Returns true when an entry
 * was found, false otherwise.
 */
-static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f)
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
 {
 	struct amdgpu_sync_entry *e;

@@ -139,7 +141,7 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f)
 *
 */
 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
-		      struct fence *f)
+		      struct dma_fence *f)
 {
 	struct amdgpu_sync_entry *e;

@@ -158,7 +160,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		return -ENOMEM;

 	hash_add(sync->fences, &e->node, f->context);
-	e->fence = fence_get(f);
+	e->fence = dma_fence_get(f);
 	return 0;
 }

@@ -177,7 +179,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     void *owner)
 {
 	struct reservation_object_list *flist;
-	struct fence *f;
+	struct dma_fence *f;
 	void *fence_owner;
 	unsigned i;
 	int r = 0;
@@ -231,15 +233,15 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 * Returns the next fence not signaled yet without removing it from the sync
 * object.
 */
-struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
-				     struct amdgpu_ring *ring)
+struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+					 struct amdgpu_ring *ring)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
 	int i;

 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
-		struct fence *f = e->fence;
+		struct dma_fence *f = e->fence;
 		struct amd_sched_fence *s_fence = to_amd_sched_fence(f);

 		if (ring && s_fence) {
@@ -247,16 +249,16 @@ struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 			 * when they are scheduled.
 			 */
 			if (s_fence->sched == &ring->sched) {
-				if (fence_is_signaled(&s_fence->scheduled))
+				if (dma_fence_is_signaled(&s_fence->scheduled))
 					continue;

 				return &s_fence->scheduled;
 			}
 		}

-		if (fence_is_signaled(f)) {
+		if (dma_fence_is_signaled(f)) {
 			hash_del(&e->node);
-			fence_put(f);
+			dma_fence_put(f);
 			kmem_cache_free(amdgpu_sync_slab, e);
 			continue;
 		}
@@ -274,11 +276,11 @@ struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 *
 * Get and removes the next fence from the sync object not signaled yet.
 */
-struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
-	struct fence *f;
+	struct dma_fence *f;
 	int i;

 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
@@ -288,10 +290,10 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 		hash_del(&e->node);
 		kmem_cache_free(amdgpu_sync_slab, e);

-		if (!fence_is_signaled(f))
+		if (!dma_fence_is_signaled(f))
 			return f;

-		fence_put(f);
+		dma_fence_put(f);
 	}
 	return NULL;
 }
@@ -311,11 +313,11 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)

 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		hash_del(&e->node);
-		fence_put(e->fence);
+		dma_fence_put(e->fence);
 		kmem_cache_free(amdgpu_sync_slab, e);
 	}

-	fence_put(sync->last_vm_update);
+	dma_fence_put(sync->last_vm_update);
 }

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_SYNC_H__
+#define __AMDGPU_SYNC_H__
+
+#include <linux/hashtable.h>
+
+struct dma_fence;
+struct reservation_object;
+struct amdgpu_device;
+struct amdgpu_ring;
+
+/*
+ * Container for fences used to sync command submissions.
+ */
+struct amdgpu_sync {
+	DECLARE_HASHTABLE(fences, 4);
+	struct dma_fence	*last_vm_update;
+};
+
+void amdgpu_sync_create(struct amdgpu_sync *sync);
+int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+		      struct dma_fence *f);
+int amdgpu_sync_resv(struct amdgpu_device *adev,
+		     struct amdgpu_sync *sync,
+		     struct reservation_object *resv,
+		     void *owner);
+struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+void amdgpu_sync_free(struct amdgpu_sync *sync);
+int amdgpu_sync_init(void);
+void amdgpu_sync_fini(void);
+
+#endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -78,7 +78,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		void *gtt_map, *vram_map;
 		void **gtt_start, **gtt_end;
 		void **vram_start, **vram_end;
-		struct fence *fence = NULL;
+		struct dma_fence *fence = NULL;

 		r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
@@ -118,13 +118,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 			goto out_lclean_unpin;
 		}

-		r = fence_wait(fence, false);
+		r = dma_fence_wait(fence, false);
 		if (r) {
 			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
 			goto out_lclean_unpin;
 		}

-		fence_put(fence);
+		dma_fence_put(fence);

 		r = amdgpu_bo_kmap(vram_obj, &vram_map);
 		if (r) {
@@ -163,13 +163,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 			goto out_lclean_unpin;
 		}

-		r = fence_wait(fence, false);
+		r = dma_fence_wait(fence, false);
 		if (r) {
 			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
 			goto out_lclean_unpin;
 		}

-		fence_put(fence);
+		dma_fence_put(fence);

 		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
 		if (r) {
@@ -216,7 +216,7 @@ out_lclean:
 			amdgpu_bo_unref(&gtt_obj[i]);
 		}
 		if (fence)
-			fence_put(fence);
+			dma_fence_put(fence);
 		break;
 	}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -104,7 +104,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			     __field(struct amdgpu_device *, adev)
 			     __field(struct amd_sched_job *, sched_job)
 			     __field(struct amdgpu_ib *, ib)
-			     __field(struct fence *, fence)
+			     __field(struct dma_fence *, fence)
 			     __field(char *, ring_name)
 			     __field(u32, num_ibs)
 			     ),
@@ -129,7 +129,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			     __field(struct amdgpu_device *, adev)
 			     __field(struct amd_sched_job *, sched_job)
 			     __field(struct amdgpu_ib *, ib)
-			     __field(struct fence *, fence)
+			     __field(struct dma_fence *, fence)
 			     __field(char *, ring_name)
 			     __field(u32, num_ibs)
 			     ),
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -34,7 +34,6 @@
 #include <ttm/ttm_placement.h>
 #include <ttm/ttm_module.h>
 #include <ttm/ttm_page_alloc.h>
-#include <ttm/ttm_memory.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include <linux/seq_file.h>
@@ -51,16 +50,6 @@
 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);

-static struct amdgpu_device *amdgpu_get_adev(struct ttm_bo_device *bdev)
-{
-	struct amdgpu_mman *mman;
-	struct amdgpu_device *adev;
-
-	mman = container_of(bdev, struct amdgpu_mman, bdev);
-	adev = container_of(mman, struct amdgpu_device, mman);
-	return adev;
-}
-

 /*
 * Global memory.
@@ -75,7 +64,7 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
 	ttm_mem_global_release(ref->object);
 }

-int amdgpu_ttm_global_init(struct amdgpu_device *adev)
+static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 {
 	struct drm_global_reference *global_ref;
 	struct amdgpu_ring *ring;
@@ -150,7 +139,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 {
 	struct amdgpu_device *adev;

-	adev = amdgpu_get_adev(bdev);
+	adev = amdgpu_ttm_adev(bdev);

 	switch (type) {
 	case TTM_PL_SYSTEM:
@@ -168,7 +157,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		break;
 	case TTM_PL_VRAM:
 		/* "On-card" video ram */
-		man->func = &ttm_bo_manager_func;
+		man->func = &amdgpu_vram_mgr_func;
 		man->gpu_offset = adev->mc.vram_start;
 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
 			     TTM_MEMTYPE_FLAG_MAPPABLE;
@@ -195,6 +184,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo;
 	static struct ttm_place placements = {
 		.fpfn = 0,
@@ -213,7 +203,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	abo = container_of(bo, struct amdgpu_bo, tbo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
-		if (abo->adev->mman.buffer_funcs_ring->ready == false) {
+		if (adev->mman.buffer_funcs_ring->ready == false) {
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 		} else {
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
@@ -229,7 +219,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 				 * allocating address space for the BO.
 				 */
 				abo->placements[i].lpfn =
-					abo->adev->mc.gtt_size >> PAGE_SHIFT;
+					adev->mc.gtt_size >> PAGE_SHIFT;
 			}
 		}
 		break;
@@ -260,63 +250,115 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
 	new_mem->mm_node = NULL;
 }

-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
-			bool evict, bool no_wait_gpu,
-			struct ttm_mem_reg *new_mem,
-			struct ttm_mem_reg *old_mem)
+static int amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
+			       struct drm_mm_node *mm_node,
+			       struct ttm_mem_reg *mem,
+			       uint64_t *addr)
 {
-	struct amdgpu_device *adev;
-	struct amdgpu_ring *ring;
-	uint64_t old_start, new_start;
-	struct fence *fence;
 	int r;

-	adev = amdgpu_get_adev(bo->bdev);
-	ring = adev->mman.buffer_funcs_ring;
-
-	switch (old_mem->mem_type) {
+	switch (mem->mem_type) {
 	case TTM_PL_TT:
-		r = amdgpu_ttm_bind(bo, old_mem);
+		r = amdgpu_ttm_bind(bo, mem);
 		if (r)
 			return r;

 	case TTM_PL_VRAM:
-		old_start = (u64)old_mem->start << PAGE_SHIFT;
-		old_start += bo->bdev->man[old_mem->mem_type].gpu_offset;
+		*addr = mm_node->start << PAGE_SHIFT;
+		*addr += bo->bdev->man[mem->mem_type].gpu_offset;
 		break;
 	default:
-		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
+		DRM_ERROR("Unknown placement %d\n", mem->mem_type);
 		return -EINVAL;
 	}
-	switch (new_mem->mem_type) {
-	case TTM_PL_TT:
-		r = amdgpu_ttm_bind(bo, new_mem);
-		if (r)
-			return r;

-	case TTM_PL_VRAM:
-		new_start = (u64)new_mem->start << PAGE_SHIFT;
-		new_start += bo->bdev->man[new_mem->mem_type].gpu_offset;
-		break;
-	default:
-		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
-		return -EINVAL;
-	}
+	return 0;
+}
+
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+			    bool evict, bool no_wait_gpu,
+			    struct ttm_mem_reg *new_mem,
+			    struct ttm_mem_reg *old_mem)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+
+	struct drm_mm_node *old_mm, *new_mm;
+	uint64_t old_start, old_size, new_start, new_size;
+	unsigned long num_pages;
+	struct dma_fence *fence = NULL;
+	int r;
+
+	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+
 	if (!ring->ready) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
 		return -EINVAL;
 	}

-	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
-
-	r = amdgpu_copy_buffer(ring, old_start, new_start,
-			       new_mem->num_pages * PAGE_SIZE, /* bytes */
-			       bo->resv, &fence, false);
+	old_mm = old_mem->mm_node;
+	r = amdgpu_mm_node_addr(bo, old_mm, old_mem, &old_start);
 	if (r)
 		return r;
+	old_size = old_mm->size;
+
+
+	new_mm = new_mem->mm_node;
+	r = amdgpu_mm_node_addr(bo, new_mm, new_mem, &new_start);
+	if (r)
+		return r;
+	new_size = new_mm->size;
+
+	num_pages = new_mem->num_pages;
+	while (num_pages) {
+		unsigned long cur_pages = min(old_size, new_size);
+		struct dma_fence *next;
+
+		r = amdgpu_copy_buffer(ring, old_start, new_start,
+				       cur_pages * PAGE_SIZE,
+				       bo->resv, &next, false);
+		if (r)
+			goto error;
+
+		dma_fence_put(fence);
+		fence = next;
+
+		num_pages -= cur_pages;
+		if (!num_pages)
+			break;
+
+		old_size -= cur_pages;
+		if (!old_size) {
+			r = amdgpu_mm_node_addr(bo, ++old_mm, old_mem,
+						&old_start);
+			if (r)
+				goto error;
+			old_size = old_mm->size;
+		} else {
+			old_start += cur_pages * PAGE_SIZE;
+		}
+
+		new_size -= cur_pages;
+		if (!new_size) {
+			r = amdgpu_mm_node_addr(bo, ++new_mm, new_mem,
+						&new_start);
+			if (r)
+				goto error;
+
+			new_size = new_mm->size;
+		} else {
+			new_start += cur_pages * PAGE_SIZE;
+		}
+	}

 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
-	fence_put(fence);
+	dma_fence_put(fence);
+	return r;
+
+error:
+	if (fence)
+		dma_fence_wait(fence, false);
+	dma_fence_put(fence);
 	return r;
 }

@@ -332,7 +374,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo,
 	struct ttm_placement placement;
 	int r;

-	adev = amdgpu_get_adev(bo->bdev);
+	adev = amdgpu_ttm_adev(bo->bdev);
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
 	placement.num_placement = 1;
@@ -379,7 +421,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo,
 	struct ttm_place placements;
 	int r;

-	adev = amdgpu_get_adev(bo->bdev);
+	adev = amdgpu_ttm_adev(bo->bdev);
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
 	placement.num_placement = 1;
@@ -422,7 +464,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 	if (WARN_ON_ONCE(abo->pin_count > 0))
 		return -EINVAL;

-	adev = amdgpu_get_adev(bo->bdev);
+	adev = amdgpu_ttm_adev(bo->bdev);

 	/* remember the eviction */
 	if (evict)
@@ -475,7 +517,7 @@ memcpy:
 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
-	struct amdgpu_device *adev = amdgpu_get_adev(bdev);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);

 	mem->bus.addr = NULL;
 	mem->bus.offset = 0;
@@ -607,7 +649,7 @@ release_pages:
 /* prepare the sg table with the user pages */
 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 {
-	struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	unsigned nents;
 	int r;
@@ -639,7 +681,7 @@ release_sg:

 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 {
-	struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct sg_page_iter sg_iter;

@@ -799,7 +841,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
 	struct amdgpu_device *adev;
 	struct amdgpu_ttm_tt *gtt;

-	adev = amdgpu_get_adev(bdev);
+	adev = amdgpu_ttm_adev(bdev);

 	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
 	if (gtt == NULL) {
@@ -843,7 +885,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
 		return 0;
 	}

-	adev = amdgpu_get_adev(ttm->bdev);
+	adev = amdgpu_ttm_adev(ttm->bdev);

 #ifdef CONFIG_SWIOTLB
 	if (swiotlb_nr_tbl()) {
@@ -889,7 +931,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	if (slave)
 		return;

-	adev = amdgpu_get_adev(ttm->bdev);
+	adev = amdgpu_ttm_adev(ttm->bdev);

 #ifdef CONFIG_SWIOTLB
 	if (swiotlb_nr_tbl()) {
@@ -1012,7 +1054,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,

 static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)
 {
-	struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
 	unsigned i, j;

 	for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) {
@@ -1029,7 +1071,7 @@ static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo)

 static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo)
 {
-	struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
 	unsigned log2_size = min(ilog2(tbo->num_pages),
 				 AMDGPU_TTM_LRU_SIZE - 1);

@@ -1060,12 +1102,37 @@ static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
 	return res;
 }

+static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
+					    const struct ttm_place *place)
+{
+	if (bo->mem.mem_type == TTM_PL_VRAM &&
+	    bo->mem.start == AMDGPU_BO_INVALID_OFFSET) {
+		unsigned long num_pages = bo->mem.num_pages;
+		struct drm_mm_node *node = bo->mem.mm_node;
+
+		/* Check each drm MM node individually */
+		while (num_pages) {
+			if (place->fpfn < (node->start + node->size) &&
+			    !(place->lpfn && place->lpfn <= node->start))
+				return true;
+
+			num_pages -= node->size;
+			++node;
+		}
+
+		return false;
+	}
+
+	return ttm_bo_eviction_valuable(bo, place);
+}
+
 static struct ttm_bo_driver amdgpu_bo_driver = {
 	.ttm_tt_create = &amdgpu_ttm_tt_create,
 	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
 	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
 	.invalidate_caches = &amdgpu_invalidate_caches,
 	.init_mem_type = &amdgpu_init_mem_type,
+	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
 	.evict_flags = &amdgpu_evict_flags,
 	.move = &amdgpu_bo_move,
 	.verify_access = &amdgpu_verify_access,
@@ -1083,6 +1150,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	unsigned i, j;
 	int r;

+	r = amdgpu_ttm_global_init(adev);
+	if (r) {
+		return r;
+	}
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&adev->mman.bdev,
 			       adev->mman.bo_global_ref.ref.object,
@@ -1119,7 +1190,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)

 	r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 			     NULL, NULL, &adev->stollen_vga_memory);
 	if (r) {
 		return r;
@@ -1247,7 +1319,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 		       uint64_t dst_offset,
 		       uint32_t byte_count,
 		       struct reservation_object *resv,
-		       struct fence **fence, bool direct_submit)
+		       struct dma_fence **fence, bool direct_submit)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_job *job;
@@ -1294,7 +1366,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 	if (direct_submit) {
 		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
 				       NULL, NULL, fence);
-		job->fence = fence_get(*fence);
+		job->fence = dma_fence_get(*fence);
 		if (r)
 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
 		amdgpu_job_free(job);
@@ -1313,28 +1385,40 @@ error_free:
 }

 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-		uint32_t src_data,
-		struct reservation_object *resv,
-		struct fence **fence)
+		       uint32_t src_data,
+		       struct reservation_object *resv,
+		       struct dma_fence **fence)
 {
-	struct amdgpu_device *adev = bo->adev;
-	struct amdgpu_job *job;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;

-	uint32_t max_bytes, byte_count;
-	uint64_t dst_offset;
+	struct drm_mm_node *mm_node;
+	unsigned long num_pages;
 	unsigned int num_loops, num_dw;
-	unsigned int i;
+
+	struct amdgpu_job *job;
 	int r;

-	byte_count = bo->tbo.num_pages << PAGE_SHIFT;
-	max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
-	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
+	if (!ring->ready) {
+		DRM_ERROR("Trying to clear memory with ring turned off.\n");
+		return -EINVAL;
+	}
+
+	num_pages = bo->tbo.num_pages;
+	mm_node = bo->tbo.mem.mm_node;
+	num_loops = 0;
+	while (num_pages) {
+		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
+
+		num_loops += DIV_ROUND_UP(byte_count, max_bytes);
+		num_pages -= mm_node->size;
+		++mm_node;
+	}
 	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;

 	/* for IB padding */
-	while (num_dw & 0x7)
-		num_dw++;
+	num_dw += 64;

 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
 	if (r)
@@ -1342,28 +1426,43 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,

 	if (resv) {
 		r = amdgpu_sync_resv(adev, &job->sync, resv,
-				AMDGPU_FENCE_OWNER_UNDEFINED);
+				     AMDGPU_FENCE_OWNER_UNDEFINED);
 		if (r) {
 			DRM_ERROR("sync failed (%d).\n", r);
 			goto error_free;
 		}
 	}

-	dst_offset = bo->tbo.mem.start << PAGE_SHIFT;
-	for (i = 0; i < num_loops; i++) {
-		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
+	num_pages = bo->tbo.num_pages;
+	mm_node = bo->tbo.mem.mm_node;

-		amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
-				dst_offset, cur_size_in_bytes);
+	while (num_pages) {
+		uint32_t byte_count = mm_node->size << PAGE_SHIFT;
+		uint64_t dst_addr;

-		dst_offset += cur_size_in_bytes;
-		byte_count -= cur_size_in_bytes;
+		r = amdgpu_mm_node_addr(&bo->tbo, mm_node,
+					&bo->tbo.mem, &dst_addr);
+		if (r)
+			return r;
+
+		while (byte_count) {
+			uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
+
+			amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
+						dst_addr, cur_size_in_bytes);
+
+			dst_addr += cur_size_in_bytes;
+			byte_count -= cur_size_in_bytes;
+		}
+
+		num_pages -= mm_node->size;
+		++mm_node;
 	}

 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	WARN_ON(job->ibs[0].length_dw > num_dw);
 	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
-			AMDGPU_FENCE_OWNER_UNDEFINED, fence);
+			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 	if (r)
 		goto error_free;

@@ -1554,8 +1653,3 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)

 #endif
 }
-
-u64 amdgpu_ttm_get_gtt_mem_size(struct amdgpu_device *adev)
-{
-	return ttm_get_kernel_zone_memory_size(adev->mman.mem_global_ref.object);
-}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -66,6 +66,7 @@ struct amdgpu_mman {
 };

 extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
+extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;

 int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *tbo,
@@ -77,11 +78,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 		       uint64_t dst_offset,
 		       uint32_t byte_count,
 		       struct reservation_object *resv,
-		       struct fence **fence, bool direct_submit);
+		       struct dma_fence **fence, bool direct_submit);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint32_t src_data,
 			struct reservation_object *resv,
-			struct fence **fence);
+			struct dma_fence **fence);

 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
 bool amdgpu_ttm_is_bound(struct ttm_tt *ttm);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -228,6 +228,9 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode,
 	ucode->mc_addr = mc_addr;
 	ucode->kaddr = kptr;

+	if (ucode->ucode_id == AMDGPU_UCODE_ID_STORAGE)
+		return 0;
+
 	header = (const struct common_firmware_header *)ucode->fw->data;
 	memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
 		le32_to_cpu(header->ucode_array_offset_bytes)),
@@ -236,6 +239,31 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode,
 	return 0;
 }

+static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
+				uint64_t mc_addr, void *kptr)
+{
+	const struct gfx_firmware_header_v1_0 *header = NULL;
+	const struct common_firmware_header *comm_hdr = NULL;
+	uint8_t* src_addr = NULL;
+	uint8_t* dst_addr = NULL;
+
+	if (NULL == ucode->fw)
+		return 0;
+
+	comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
+	header = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+	dst_addr = ucode->kaddr +
+			   ALIGN(le32_to_cpu(comm_hdr->ucode_size_bytes),
+			   PAGE_SIZE);
+	src_addr = (uint8_t *)ucode->fw->data +
+			   le32_to_cpu(comm_hdr->ucode_array_offset_bytes) +
+			   (le32_to_cpu(header->jt_offset) * 4);
+	memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4);
+
+	return 0;
+}
+
+
 int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 {
 	struct amdgpu_bo **bo = &adev->firmware.fw_buf;
@@ -247,7 +275,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 	const struct common_firmware_header *header = NULL;

 	err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
-			       AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, bo);
+				amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+				0, NULL, NULL, bo);
 	if (err) {
 		dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
 		goto failed;
@@ -259,7 +288,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 		goto failed_reserve;
 	}

-	err = amdgpu_bo_pin(*bo, AMDGPU_GEM_DOMAIN_GTT, &fw_mc_addr);
+	err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+				&fw_mc_addr);
 	if (err) {
 		dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err);
 		goto failed_pin;
@@ -279,6 +309,13 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 			header = (const struct common_firmware_header *)ucode->fw->data;
 			amdgpu_ucode_init_single_fw(ucode, fw_mc_addr + fw_offset,
 						    fw_buf_ptr + fw_offset);
+			if (i == AMDGPU_UCODE_ID_CP_MEC1) {
+				const struct gfx_firmware_header_v1_0 *cp_hdr;
+				cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+				amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset,
+						    fw_buf_ptr + fw_offset);
+				fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
+			}
 			fw_offset += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 		}
 	}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -130,6 +130,7 @@ enum AMDGPU_UCODE_ID {
 	AMDGPU_UCODE_ID_CP_MEC1,
 	AMDGPU_UCODE_ID_CP_MEC2,
 	AMDGPU_UCODE_ID_RLC_G,
+	AMDGPU_UCODE_ID_STORAGE,
 	AMDGPU_UCODE_ID_MAXIMUM,
 };

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -333,7 +333,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 	for (i = 0; i < adev->uvd.max_handles; ++i) {
 		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
 		if (handle != 0 && adev->uvd.filp[i] == filp) {
-			struct fence *fence;
+			struct dma_fence *fence;

 			r = amdgpu_uvd_get_destroy_msg(ring, handle,
 						       false, &fence);
@@ -342,8 +342,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 				continue;
 			}

-			fence_wait(fence, false);
-			fence_put(fence);
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);

 			adev->uvd.filp[i] = NULL;
 			atomic_set(&adev->uvd.handles[i], 0);
@@ -360,6 +360,18 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
 	}
 }

+static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
+{
+	uint32_t lo, hi;
+	uint64_t addr;
+
+	lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
+	hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
+	addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
+
+	return addr;
+}
+
 /**
 * amdgpu_uvd_cs_pass1 - first parsing round
 *
@@ -372,14 +384,10 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo *bo;
-	uint32_t cmd, lo, hi;
-	uint64_t addr;
+	uint32_t cmd;
+	uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
 	int r = 0;

-	lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
-	hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
-	addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
-
 	mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo);
 	if (mapping == NULL) {
 		DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
@@ -698,18 +706,16 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo *bo;
-	uint32_t cmd, lo, hi;
+	uint32_t cmd;
 	uint64_t start, end;
-	uint64_t addr;
+	uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
 	int r;

-	lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0);
-	hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1);
-	addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
-
 	mapping = amdgpu_cs_find_mapping(ctx->parser, addr, &bo);
-	if (mapping == NULL)
+	if (mapping == NULL) {
+		DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
 		return -EINVAL;
+	}

 	start = amdgpu_bo_gpu_offset(bo);

@@ -876,6 +882,9 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 	struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
 	int r;

+	parser->job->vm = NULL;
+	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+
 	if (ib->length_dw % 16) {
 		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
 			  ib->length_dw);
@@ -890,10 +899,13 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 	ctx.buf_sizes = buf_sizes;
 	ctx.ib_idx = ib_idx;

-	/* first round, make sure the buffers are actually in the UVD segment */
-	r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1);
-	if (r)
-		return r;
+	/* first round only required on chips without UVD 64 bit address support */
+	if (!parser->adev->uvd.address_64_bit) {
+		/* first round, make sure the buffers are actually in the UVD segment */
+		r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1);
+		if (r)
+			return r;
+	}

 	/* second round, patch buffer addresses into the command stream */
 	r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2);
@@ -909,14 +921,14 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 }

 static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
-			       bool direct, struct fence **fence)
+			       bool direct, struct dma_fence **fence)
 {
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct list_head head;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	struct amdgpu_device *adev = ring->adev;
 	uint64_t addr;
 	int i, r;
@@ -931,7 +943,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	if (r)
 		return r;

-	if (!bo->adev->uvd.address_64_bit) {
+	if (!ring->adev->uvd.address_64_bit) {
 		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
 		amdgpu_uvd_force_into_uvd_segment(bo);
 	}
@@ -960,7 +972,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,

 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = fence_get(f);
+		job->fence = dma_fence_get(f);
 		if (r)
 			goto err_free;

@@ -975,9 +987,9 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	ttm_eu_fence_buffer_objects(&ticket, &head, f);

 	if (fence)
-		*fence = fence_get(f);
+		*fence = dma_fence_get(f);
 	amdgpu_bo_unref(&bo);
-	fence_put(f);
+	dma_fence_put(f);

 	return 0;

@@ -993,7 +1005,7 @@ err:
   crash the vcpu so just try to emmit a dummy create/destroy msg to
   avoid this */
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct fence **fence)
+			      struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_bo *bo;
@@ -1002,7 +1014,8 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,

 	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 			     NULL, NULL, &bo);
 	if (r)
 		return r;
@@ -1042,7 +1055,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 }

 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct fence **fence)
+			       bool direct, struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_bo *bo;
@@ -1051,7 +1064,8 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,

 	r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 			     NULL, NULL, &bo);
 	if (r)
 		return r;
@@ -1128,7 +1142,7 @@ void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 */
 int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
-	struct fence *fence;
+	struct dma_fence *fence;
 	long r;

 	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
@@ -1143,7 +1157,7 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto error;
 	}

-	r = fence_wait_timeout(fence, false, timeout);
+	r = dma_fence_wait_timeout(fence, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out.\n");
 		r = -ETIMEDOUT;
@@ -1154,7 +1168,7 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		r = 0;
 	}

-	fence_put(fence);
+	dma_fence_put(fence);

 error:
 	return r;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -29,9 +29,9 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
 int amdgpu_uvd_suspend(struct amdgpu_device *adev);
 int amdgpu_uvd_resume(struct amdgpu_device *adev);
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct fence **fence);
+			      struct dma_fence **fence);
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct fence **fence);
+			       bool direct, struct dma_fence **fence);
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
 			     struct drm_file *filp);
 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -157,7 +157,8 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)

 	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
-			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+			     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 			     NULL, NULL, &adev->vce.vcpu_bo);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
@@ -395,12 +396,12 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 * Open up a stream for HW test
 */
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct fence **fence)
+			      struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	uint64_t dummy;
 	int i, r;

@@ -450,14 +451,14 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 		ib->ptr[i] = 0x0;

 	r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-	job->fence = fence_get(f);
+	job->fence = dma_fence_get(f);
 	if (r)
 		goto err;

 	amdgpu_job_free(job);
 	if (fence)
-		*fence = fence_get(f);
-	fence_put(f);
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
 	return 0;

 err:
@@ -476,12 +477,12 @@ err:
 * Close up a stream for HW test or if userspace failed to do so
 */
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct fence **fence)
+			       bool direct, struct dma_fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	int i, r;

 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -513,7 +514,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,

 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = fence_get(f);
+		job->fence = dma_fence_get(f);
 		if (r)
 			goto err;

@@ -526,8 +527,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	}

 	if (fence)
-		*fence = fence_get(f);
-	fence_put(f);
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
 	return 0;

 err:
@@ -641,6 +642,9 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 	uint32_t *size = &tmp;
 	int i, r, idx = 0;

+	p->job->vm = NULL;
+	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+
 	r = amdgpu_cs_sysvm_access_required(p);
 	if (r)
 		return r;
@@ -787,6 +791,96 @@ out:
 	return r;
 }

+/**
+ * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode
+ *
+ * @p: parser context
+ *
+ */
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
+{
+	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
+	int session_idx = -1;
+	uint32_t destroyed = 0;
+	uint32_t created = 0;
+	uint32_t allocated = 0;
+	uint32_t tmp, handle = 0;
+	int i, r = 0, idx = 0;
+
+	while (idx < ib->length_dw) {
+		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
+		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
+
+		if ((len < 8) || (len & 3)) {
+			DRM_ERROR("invalid VCE command length (%d)!\n", len);
+			r = -EINVAL;
+			goto out;
+		}
+
+		switch (cmd) {
+		case 0x00000001: /* session */
+			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
+			session_idx = amdgpu_vce_validate_handle(p, handle,
+								 &allocated);
+			if (session_idx < 0) {
+				r = session_idx;
+				goto out;
+			}
+			break;
+
+		case 0x01000001: /* create */
+			created |= 1 << session_idx;
+			if (destroyed & (1 << session_idx)) {
+				destroyed &= ~(1 << session_idx);
+				allocated |= 1 << session_idx;
+
+			} else if (!(allocated & (1 << session_idx))) {
+				DRM_ERROR("Handle already in use!\n");
+				r = -EINVAL;
+				goto out;
+			}
+
+			break;
+
+		case 0x02000001: /* destroy */
+			destroyed |= 1 << session_idx;
+			break;
+
+		default:
+			break;
+		}
+
+		if (session_idx == -1) {
+			DRM_ERROR("no session command at start of IB\n");
+			r = -EINVAL;
+			goto out;
+		}
+
+		idx += len / 4;
+	}
+
+	if (allocated & ~created) {
+		DRM_ERROR("New session without create command!\n");
+		r = -ENOENT;
+	}
+
+out:
+	if (!r) {
+		/* No error, free all destroyed handle slots */
+		tmp = destroyed;
+		amdgpu_ib_free(p->adev, ib, NULL);
+	} else {
+		/* Error during parsing, free all allocated handle slots */
+		tmp = allocated;
+	}
+
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+		if (tmp & (1 << i))
+			atomic_set(&p->adev->vce.handles[i], 0);
+
+	return r;
+}
+
 /**
 * amdgpu_vce_ring_emit_ib - execute indirect buffer
 *
@@ -823,18 +917,6 @@ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 	amdgpu_ring_write(ring, VCE_CMD_END);
 }

-unsigned amdgpu_vce_ring_get_emit_ib_size(struct amdgpu_ring *ring)
-{
-	return
-		4; /* amdgpu_vce_ring_emit_ib */
-}
-
-unsigned amdgpu_vce_ring_get_dma_frame_size(struct amdgpu_ring *ring)
-{
-	return
-		6; /* amdgpu_vce_ring_emit_fence  x1 no user fence */
-}
-
 /**
 * amdgpu_vce_ring_test_ring - test if VCE ring is working
 *
@@ -883,7 +965,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 */
 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 	long r;

 	/* skip vce ring1/2 ib test for now, since it's not reliable */
@@ -902,7 +984,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		goto error;
 	}

-	r = fence_wait_timeout(fence, false, timeout);
+	r = dma_fence_wait_timeout(fence, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out.\n");
 		r = -ETIMEDOUT;
@@ -913,6 +995,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		r = 0;
 	}
 error:
-	fence_put(fence);
+	dma_fence_put(fence);
 	return r;
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -29,11 +29,12 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
 int amdgpu_vce_suspend(struct amdgpu_device *adev);
 int amdgpu_vce_resume(struct amdgpu_device *adev);
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
-			      struct fence **fence);
+			      struct dma_fence **fence);
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       bool direct, struct fence **fence);
+			       bool direct, struct dma_fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
 			     unsigned vm_id, bool ctx_switch);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -25,7 +25,7 @@
 *          Alex Deucher
 *          Jerome Glisse
 */
-#include <linux/fence-array.h>
+#include <linux/dma-fence-array.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -116,38 +116,43 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 }

 /**
- * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
+ * amdgpu_vm_validate_pt_bos - validate the page table BOs
 *
 * @adev: amdgpu device pointer
 * @vm: vm providing the BOs
- * @duplicates: head of duplicates list
+ * @validate: callback to do the validation
+ * @param: parameter for the validation callback
 *
- * Add the page directory to the BO duplicates list
- * for command submission.
+ * Validate the page table BOs on command submission if neccessary.
 */
-void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			  struct list_head *duplicates)
+int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			      int (*validate)(void *p, struct amdgpu_bo *bo),
+			      void *param)
 {
 	uint64_t num_evictions;
 	unsigned i;
+	int r;

 	/* We only need to validate the page tables
 	 * if they aren't already valid.
 	 */
 	num_evictions = atomic64_read(&adev->num_evictions);
 	if (num_evictions == vm->last_eviction_counter)
-		return;
+		return 0;

 	/* add the vm page table to the list */
 	for (i = 0; i <= vm->max_pde_used; ++i) {
-		struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
+		struct amdgpu_bo *bo = vm->page_tables[i].bo;

-		if (!entry->robj)
+		if (!bo)
 			continue;

-		list_add(&entry->tv.head, duplicates);
+		r = validate(param, bo);
+		if (r)
+			return r;
 	}

+	return 0;
 }

 /**
@@ -166,12 +171,12 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,

 	spin_lock(&glob->lru_lock);
 	for (i = 0; i <= vm->max_pde_used; ++i) {
-		struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
+		struct amdgpu_bo *bo = vm->page_tables[i].bo;

-		if (!entry->robj)
+		if (!bo)
 			continue;

-		ttm_bo_move_to_lru_tail(&entry->robj->tbo);
+		ttm_bo_move_to_lru_tail(&bo->tbo);
 	}
 	spin_unlock(&glob->lru_lock);
 }
@@ -194,14 +199,14 @@ static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
 * Allocate an id for the vm, adding fences to the sync obj as necessary.
 */
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync, struct fence *fence,
+		      struct amdgpu_sync *sync, struct dma_fence *fence,
 		      struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
 	uint64_t fence_context = adev->fence_context + ring->idx;
-	struct fence *updates = sync->last_vm_update;
+	struct dma_fence *updates = sync->last_vm_update;
 	struct amdgpu_vm_id *id, *idle;
-	struct fence **fences;
+	struct dma_fence **fences;
 	unsigned i;
 	int r = 0;

@@ -225,17 +230,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (&idle->list == &adev->vm_manager.ids_lru) {
 		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
 		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
-		struct fence_array *array;
+		struct dma_fence_array *array;
 		unsigned j;

 		for (j = 0; j < i; ++j)
-			fence_get(fences[j]);
+			dma_fence_get(fences[j]);

-		array = fence_array_create(i, fences, fence_context,
+		array = dma_fence_array_create(i, fences, fence_context,
 					   seqno, true);
 		if (!array) {
 			for (j = 0; j < i; ++j)
-				fence_put(fences[j]);
+				dma_fence_put(fences[j]);
 			kfree(fences);
 			r = -ENOMEM;
 			goto error;
@@ -243,7 +248,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,


 		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
-		fence_put(&array->base);
+		dma_fence_put(&array->base);
 		if (r)
 			goto error;

@@ -257,7 +262,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	/* Check if we can use a VMID already assigned to this VM */
 	i = ring->idx;
 	do {
-		struct fence *flushed;
+		struct dma_fence *flushed;

 		id = vm->ids[i++];
 		if (i == AMDGPU_MAX_RINGS)
@@ -279,12 +284,12 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			continue;

 		if (id->last_flush->context != fence_context &&
-		    !fence_is_signaled(id->last_flush))
+		    !dma_fence_is_signaled(id->last_flush))
 			continue;

 		flushed  = id->flushed_updates;
 		if (updates &&
-		    (!flushed || fence_is_later(updates, flushed)))
+		    (!flushed || dma_fence_is_later(updates, flushed)))
 			continue;

 		/* Good we can use this VMID. Remember this submission as
@@ -315,14 +320,14 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (r)
 		goto error;

-	fence_put(id->first);
-	id->first = fence_get(fence);
+	dma_fence_put(id->first);
+	id->first = dma_fence_get(fence);

-	fence_put(id->last_flush);
+	dma_fence_put(id->last_flush);
 	id->last_flush = NULL;

-	fence_put(id->flushed_updates);
-	id->flushed_updates = fence_get(updates);
+	dma_fence_put(id->flushed_updates);
+	id->flushed_updates = dma_fence_get(updates);

 	id->pd_gpu_addr = job->vm_pd_addr;
 	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
@@ -341,9 +346,9 @@ error:
 static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	const struct amdgpu_ip_block_version *ip_block;
+	const struct amdgpu_ip_block *ip_block;

-	if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
+	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
 		/* only compute rings */
 		return false;

@@ -351,10 +356,10 @@ static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
 	if (!ip_block)
 		return false;

-	if (ip_block->major <= 7) {
+	if (ip_block->version->major <= 7) {
 		/* gfx7 has no workaround */
 		return true;
-	} else if (ip_block->major == 8) {
+	} else if (ip_block->version->major == 8) {
 		if (adev->gfx.mec_fw_version >= 673)
 			/* gfx8 is fixed in MEC firmware 673 */
 			return false;
@@ -393,7 +398,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)

 	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
 	    amdgpu_vm_is_gpu_reset(adev, id))) {
-		struct fence *fence;
+		struct dma_fence *fence;

 		trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id);
 		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
@@ -403,7 +408,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 			return r;

 		mutex_lock(&adev->vm_manager.lock);
-		fence_put(id->last_flush);
+		dma_fence_put(id->last_flush);
 		id->last_flush = fence;
 		mutex_unlock(&adev->vm_manager.lock);
 	}
@@ -524,70 +529,6 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
 	amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
 }

-/**
- * amdgpu_vm_clear_bo - initially clear the page dir/table
- *
- * @adev: amdgpu_device pointer
- * @bo: bo to clear
- *
- * need to reserve bo first before calling it.
- */
-static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
-			      struct amdgpu_vm *vm,
-			      struct amdgpu_bo *bo)
-{
-	struct amdgpu_ring *ring;
-	struct fence *fence = NULL;
-	struct amdgpu_job *job;
-	struct amdgpu_pte_update_params params;
-	unsigned entries;
-	uint64_t addr;
-	int r;
-
-	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
-
-	r = reservation_object_reserve_shared(bo->tbo.resv);
-	if (r)
-		return r;
-
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-	if (r)
-		goto error;
-
-	r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
-	if (r)
-		goto error;
-
-	addr = amdgpu_bo_gpu_offset(bo);
-	entries = amdgpu_bo_size(bo) / 8;
-
-	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
-	if (r)
-		goto error;
-
-	memset(&params, 0, sizeof(params));
-	params.adev = adev;
-	params.ib = &job->ibs[0];
-	amdgpu_vm_do_set_ptes(&params, addr, 0, entries, 0, 0);
-	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-
-	WARN_ON(job->ibs[0].length_dw > 64);
-	r = amdgpu_job_submit(job, ring, &vm->entity,
-			      AMDGPU_FENCE_OWNER_VM, &fence);
-	if (r)
-		goto error_free;
-
-	amdgpu_bo_fence(bo, fence, true);
-	fence_put(fence);
-	return 0;
-
-error_free:
-	amdgpu_job_free(job);
-
-error:
-	return r;
-}
-
 /**
 * amdgpu_vm_map_gart - Resolve gart mapping of addr
 *
@@ -612,123 +553,6 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 	return result;
 }

-static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev,
-					 struct amdgpu_vm *vm,
-					 bool shadow)
-{
-	struct amdgpu_ring *ring;
-	struct amdgpu_bo *pd = shadow ? vm->page_directory->shadow :
-		vm->page_directory;
-	uint64_t pd_addr;
-	uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
-	uint64_t last_pde = ~0, last_pt = ~0;
-	unsigned count = 0, pt_idx, ndw;
-	struct amdgpu_job *job;
-	struct amdgpu_pte_update_params params;
-	struct fence *fence = NULL;
-
-	int r;
-
-	if (!pd)
-		return 0;
-
-	r = amdgpu_ttm_bind(&pd->tbo, &pd->tbo.mem);
-	if (r)
-		return r;
-
-	pd_addr = amdgpu_bo_gpu_offset(pd);
-	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
-
-	/* padding, etc. */
-	ndw = 64;
-
-	/* assume the worst case */
-	ndw += vm->max_pde_used * 6;
-
-	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
-	if (r)
-		return r;
-
-	memset(&params, 0, sizeof(params));
-	params.adev = adev;
-	params.ib = &job->ibs[0];
-
-	/* walk over the address space and update the page directory */
-	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
-		struct amdgpu_bo *bo = vm->page_tables[pt_idx].entry.robj;
-		uint64_t pde, pt;
-
-		if (bo == NULL)
-			continue;
-
-		if (bo->shadow) {
-			struct amdgpu_bo *shadow = bo->shadow;
-
-			r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
-			if (r)
-				return r;
-		}
-
-		pt = amdgpu_bo_gpu_offset(bo);
-		if (!shadow) {
-			if (vm->page_tables[pt_idx].addr == pt)
-				continue;
-			vm->page_tables[pt_idx].addr = pt;
-		} else {
-			if (vm->page_tables[pt_idx].shadow_addr == pt)
-				continue;
-			vm->page_tables[pt_idx].shadow_addr = pt;
-		}
-
-		pde = pd_addr + pt_idx * 8;
-		if (((last_pde + 8 * count) != pde) ||
-		    ((last_pt + incr * count) != pt) ||
-		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
-
-			if (count) {
-				amdgpu_vm_do_set_ptes(&params, last_pde,
-						      last_pt, count, incr,
-						      AMDGPU_PTE_VALID);
-			}
-
-			count = 1;
-			last_pde = pde;
-			last_pt = pt;
-		} else {
-			++count;
-		}
-	}
-
-	if (count)
-		amdgpu_vm_do_set_ptes(&params, last_pde, last_pt,
-				      count, incr, AMDGPU_PTE_VALID);
-
-	if (params.ib->length_dw != 0) {
-		amdgpu_ring_pad_ib(ring, params.ib);
-		amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
-				 AMDGPU_FENCE_OWNER_VM);
-		WARN_ON(params.ib->length_dw > ndw);
-		r = amdgpu_job_submit(job, ring, &vm->entity,
-				      AMDGPU_FENCE_OWNER_VM, &fence);
-		if (r)
-			goto error_free;
-
-		amdgpu_bo_fence(pd, fence, true);
-		fence_put(vm->page_directory_fence);
-		vm->page_directory_fence = fence_get(fence);
-		fence_put(fence);
-
-	} else {
-		amdgpu_job_free(job);
-	}
-
-	return 0;
-
-error_free:
-	amdgpu_job_free(job);
-	return r;
-}
-
 /*
 * amdgpu_vm_update_pdes - make sure that page directory is valid
 *
@@ -742,14 +566,135 @@ error_free:
 * Returns 0 for success, error for failure.
 */
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
-                                   struct amdgpu_vm *vm)
+				    struct amdgpu_vm *vm)
 {
+	struct amdgpu_bo *shadow;
+	struct amdgpu_ring *ring;
+	uint64_t pd_addr, shadow_addr;
+	uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
+	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
+	unsigned count = 0, pt_idx, ndw;
+	struct amdgpu_job *job;
+	struct amdgpu_pte_update_params params;
+	struct dma_fence *fence = NULL;
+
 	int r;

-	r = amdgpu_vm_update_pd_or_shadow(adev, vm, true);
+	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+	shadow = vm->page_directory->shadow;
+
+	/* padding, etc. */
+	ndw = 64;
+
+	/* assume the worst case */
+	ndw += vm->max_pde_used * 6;
+
+	pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
+	if (shadow) {
+		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
+		if (r)
+			return r;
+		shadow_addr = amdgpu_bo_gpu_offset(shadow);
+		ndw *= 2;
+	} else {
+		shadow_addr = 0;
+	}
+
+	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
 	if (r)
 		return r;
-	return amdgpu_vm_update_pd_or_shadow(adev, vm, false);
+
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+	params.ib = &job->ibs[0];
+
+	/* walk over the address space and update the page directory */
+	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
+		struct amdgpu_bo *bo = vm->page_tables[pt_idx].bo;
+		uint64_t pde, pt;
+
+		if (bo == NULL)
+			continue;
+
+		if (bo->shadow) {
+			struct amdgpu_bo *pt_shadow = bo->shadow;
+
+			r = amdgpu_ttm_bind(&pt_shadow->tbo,
+					    &pt_shadow->tbo.mem);
+			if (r)
+				return r;
+		}
+
+		pt = amdgpu_bo_gpu_offset(bo);
+		if (vm->page_tables[pt_idx].addr == pt)
+			continue;
+
+		vm->page_tables[pt_idx].addr = pt;
+
+		pde = pd_addr + pt_idx * 8;
+		if (((last_pde + 8 * count) != pde) ||
+		    ((last_pt + incr * count) != pt) ||
+		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
+
+			if (count) {
+				if (shadow)
+					amdgpu_vm_do_set_ptes(&params,
+							      last_shadow,
+							      last_pt, count,
+							      incr,
+							      AMDGPU_PTE_VALID);
+
+				amdgpu_vm_do_set_ptes(&params, last_pde,
+						      last_pt, count, incr,
+						      AMDGPU_PTE_VALID);
+			}
+
+			count = 1;
+			last_pde = pde;
+			last_shadow = shadow_addr + pt_idx * 8;
+			last_pt = pt;
+		} else {
+			++count;
+		}
+	}
+
+	if (count) {
+		if (vm->page_directory->shadow)
+			amdgpu_vm_do_set_ptes(&params, last_shadow, last_pt,
+					      count, incr, AMDGPU_PTE_VALID);
+
+		amdgpu_vm_do_set_ptes(&params, last_pde, last_pt,
+				      count, incr, AMDGPU_PTE_VALID);
+	}
+
+	if (params.ib->length_dw == 0) {
+		amdgpu_job_free(job);
+		return 0;
+	}
+
+	amdgpu_ring_pad_ib(ring, params.ib);
+	amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
+			 AMDGPU_FENCE_OWNER_VM);
+	if (shadow)
+		amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv,
+				 AMDGPU_FENCE_OWNER_VM);
+
+	WARN_ON(params.ib->length_dw > ndw);
+	r = amdgpu_job_submit(job, ring, &vm->entity,
+			      AMDGPU_FENCE_OWNER_VM, &fence);
+	if (r)
+		goto error_free;
+
+	amdgpu_bo_fence(vm->page_directory, fence, true);
+	dma_fence_put(vm->page_directory_fence);
+	vm->page_directory_fence = dma_fence_get(fence);
+	dma_fence_put(fence);
+
+	return 0;
+
+error_free:
+	amdgpu_job_free(job);
+	return r;
 }

 /**
@@ -781,11 +726,11 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	/* initialize the variables */
 	addr = start;
 	pt_idx = addr >> amdgpu_vm_block_size;
-	pt = vm->page_tables[pt_idx].entry.robj;
+	pt = vm->page_tables[pt_idx].bo;
 	if (params->shadow) {
 		if (!pt->shadow)
 			return;
-		pt = vm->page_tables[pt_idx].entry.robj->shadow;
+		pt = pt->shadow;
 	}
 	if ((addr & ~mask) == (end & ~mask))
 		nptes = end - addr;
@@ -804,11 +749,11 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	/* walk over the address space and update the page tables */
 	while (addr < end) {
 		pt_idx = addr >> amdgpu_vm_block_size;
-		pt = vm->page_tables[pt_idx].entry.robj;
+		pt = vm->page_tables[pt_idx].bo;
 		if (params->shadow) {
 			if (!pt->shadow)
 				return;
-			pt = vm->page_tables[pt_idx].entry.robj->shadow;
+			pt = pt->shadow;
 		}

 		if ((addr & ~mask) == (end & ~mask))
@@ -929,20 +874,20 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 * Returns 0 for success, -EINVAL for failure.
 */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-				       struct fence *exclusive,
+				       struct dma_fence *exclusive,
 				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
 				       uint32_t flags, uint64_t addr,
-				       struct fence **fence)
+				       struct dma_fence **fence)
 {
 	struct amdgpu_ring *ring;
 	void *owner = AMDGPU_FENCE_OWNER_VM;
 	unsigned nptes, ncmds, ndw;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	int r;

 	memset(&params, 0, sizeof(params));
@@ -1045,10 +990,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,

 	amdgpu_bo_fence(vm->page_directory, f, true);
 	if (fence) {
-		fence_put(*fence);
-		*fence = fence_get(f);
+		dma_fence_put(*fence);
+		*fence = dma_fence_get(f);
 	}
-	fence_put(f);
+	dma_fence_put(f);
 	return 0;

 error_free:
@@ -1065,8 +1010,8 @@ error_free:
 * @pages_addr: DMA addresses to use for mapping
 * @vm: requested vm
 * @mapping: mapped range and flags to use for the update
- * @addr: addr to set the area to
 * @flags: HW flags for the mapping
+ * @nodes: array of drm_mm_nodes with the MC addresses
 * @fence: optional resulting fence
 *
 * Split the mapping into smaller chunks so that each update fits
@@ -1074,17 +1019,16 @@ error_free:
 * Returns 0 for success, -EINVAL for failure.
 */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
-				      struct fence *exclusive,
+				      struct dma_fence *exclusive,
 				      uint32_t gtt_flags,
 				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
 				      struct amdgpu_bo_va_mapping *mapping,
-				      uint32_t flags, uint64_t addr,
-				      struct fence **fence)
+				      uint32_t flags,
+				      struct drm_mm_node *nodes,
+				      struct dma_fence **fence)
 {
-	const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
-
-	uint64_t src = 0, start = mapping->it.start;
+	uint64_t pfn, src = 0, start = mapping->it.start;
 	int r;

 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -1097,23 +1041,40 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,

 	trace_amdgpu_vm_bo_update(mapping);

-	if (pages_addr) {
-		if (flags == gtt_flags)
-			src = adev->gart.table_addr + (addr >> 12) * 8;
-		addr = 0;
+	pfn = mapping->offset >> PAGE_SHIFT;
+	if (nodes) {
+		while (pfn >= nodes->size) {
+			pfn -= nodes->size;
+			++nodes;
+		}
 	}
-	addr += mapping->offset;

-	if (!pages_addr || src)
-		return amdgpu_vm_bo_update_mapping(adev, exclusive,
-						   src, pages_addr, vm,
-						   start, mapping->it.last,
-						   flags, addr, fence);
+	do {
+		uint64_t max_entries;
+		uint64_t addr, last;

-	while (start != mapping->it.last + 1) {
-		uint64_t last;
+		if (nodes) {
+			addr = nodes->start << PAGE_SHIFT;
+			max_entries = (nodes->size - pfn) *
+				(PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+		} else {
+			addr = 0;
+			max_entries = S64_MAX;
+		}

-		last = min((uint64_t)mapping->it.last, start + max_size - 1);
+		if (pages_addr) {
+			if (flags == gtt_flags)
+				src = adev->gart.table_addr +
+					(addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
+			else
+				max_entries = min(max_entries, 16ull * 1024ull);
+			addr = 0;
+		} else if (flags & AMDGPU_PTE_VALID) {
+			addr += adev->vm_manager.vram_base_offset;
+		}
+		addr += pfn << PAGE_SHIFT;
+
+		last = min((uint64_t)mapping->it.last, start + max_entries - 1);
 		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
 						src, pages_addr, vm,
 						start, last, flags, addr,
@@ -1121,9 +1082,14 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		if (r)
 			return r;

+		pfn += last - start + 1;
+		if (nodes && nodes->size == pfn) {
+			pfn = 0;
+			++nodes;
+		}
 		start = last + 1;
-		addr += max_size * AMDGPU_GPU_PAGE_SIZE;
-	}
+
+	} while (unlikely(start != mapping->it.last + 1));

 	return 0;
 }
@@ -1147,40 +1113,30 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	dma_addr_t *pages_addr = NULL;
 	uint32_t gtt_flags, flags;
 	struct ttm_mem_reg *mem;
-	struct fence *exclusive;
-	uint64_t addr;
+	struct drm_mm_node *nodes;
+	struct dma_fence *exclusive;
 	int r;

 	if (clear) {
 		mem = NULL;
-		addr = 0;
+		nodes = NULL;
 		exclusive = NULL;
 	} else {
 		struct ttm_dma_tt *ttm;

 		mem = &bo_va->bo->tbo.mem;
-		addr = (u64)mem->start << PAGE_SHIFT;
-		switch (mem->mem_type) {
-		case TTM_PL_TT:
+		nodes = mem->mm_node;
+		if (mem->mem_type == TTM_PL_TT) {
 			ttm = container_of(bo_va->bo->tbo.ttm, struct
 					   ttm_dma_tt, ttm);
 			pages_addr = ttm->dma_address;
-			break;
-
-		case TTM_PL_VRAM:
-			addr += adev->vm_manager.vram_base_offset;
-			break;
-
-		default:
-			break;
 		}
-
 		exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
 	}

 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
 	gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) &&
-		adev == bo_va->bo->adev) ? flags : 0;
+		adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? flags : 0;

 	spin_lock(&vm->status_lock);
 	if (!list_empty(&bo_va->vm_status))
@@ -1190,7 +1146,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
 		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
 					       gtt_flags, pages_addr, vm,
-					       mapping, flags, addr,
+					       mapping, flags, nodes,
 					       &bo_va->last_pt_update);
 		if (r)
 			return r;
@@ -1405,18 +1361,18 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
 		struct reservation_object *resv = vm->page_directory->tbo.resv;
-		struct amdgpu_bo_list_entry *entry;
 		struct amdgpu_bo *pt;

-		entry = &vm->page_tables[pt_idx].entry;
-		if (entry->robj)
+		if (vm->page_tables[pt_idx].bo)
 			continue;

 		r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
 				     AMDGPU_GPU_PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_VRAM,
 				     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-				     AMDGPU_GEM_CREATE_SHADOW,
+				     AMDGPU_GEM_CREATE_SHADOW |
+				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+				     AMDGPU_GEM_CREATE_VRAM_CLEARED,
 				     NULL, resv, &pt);
 		if (r)
 			goto error_free;
@@ -1426,27 +1382,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 		 */
 		pt->parent = amdgpu_bo_ref(vm->page_directory);

-		r = amdgpu_vm_clear_bo(adev, vm, pt);
-		if (r) {
-			amdgpu_bo_unref(&pt->shadow);
-			amdgpu_bo_unref(&pt);
-			goto error_free;
-		}
-
-		if (pt->shadow) {
-			r = amdgpu_vm_clear_bo(adev, vm, pt->shadow);
-			if (r) {
-				amdgpu_bo_unref(&pt->shadow);
-				amdgpu_bo_unref(&pt);
-				goto error_free;
-			}
-		}
-
-		entry->robj = pt;
-		entry->priority = 0;
-		entry->tv.bo = &entry->robj->tbo;
-		entry->tv.shared = true;
-		entry->user_pages = NULL;
+		vm->page_tables[pt_idx].bo = pt;
 		vm->page_tables[pt_idx].addr = 0;
 	}

@@ -1547,7 +1483,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		kfree(mapping);
 	}

-	fence_put(bo_va->last_pt_update);
+	dma_fence_put(bo_va->last_pt_update);
 	kfree(bo_va);
 }

@@ -1626,7 +1562,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	r = amdgpu_bo_create(adev, pd_size, align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-			     AMDGPU_GEM_CREATE_SHADOW,
+			     AMDGPU_GEM_CREATE_SHADOW |
+			     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+			     AMDGPU_GEM_CREATE_VRAM_CLEARED,
 			     NULL, NULL, &vm->page_directory);
 	if (r)
 		goto error_free_sched_entity;
@@ -1635,24 +1573,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r)
 		goto error_free_page_directory;

-	r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory);
-	if (r)
-		goto error_unreserve;
-
-	if (vm->page_directory->shadow) {
-		r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory->shadow);
-		if (r)
-			goto error_unreserve;
-	}
-
 	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
 	amdgpu_bo_unreserve(vm->page_directory);

 	return 0;

-error_unreserve:
-	amdgpu_bo_unreserve(vm->page_directory);
-
 error_free_page_directory:
 	amdgpu_bo_unref(&vm->page_directory->shadow);
 	amdgpu_bo_unref(&vm->page_directory);
@@ -1697,7 +1622,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	}

 	for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
-		struct amdgpu_bo *pt = vm->page_tables[i].entry.robj;
+		struct amdgpu_bo *pt = vm->page_tables[i].bo;

 		if (!pt)
 			continue;
@@ -1709,7 +1634,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)

 	amdgpu_bo_unref(&vm->page_directory->shadow);
 	amdgpu_bo_unref(&vm->page_directory);
-	fence_put(vm->page_directory_fence);
+	dma_fence_put(vm->page_directory_fence);
 }

 /**
@@ -1733,7 +1658,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 			      &adev->vm_manager.ids_lru);
 	}

-	adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
+	adev->vm_manager.fence_context =
+		dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		adev->vm_manager.seqno[i] = 0;

@@ -1755,9 +1681,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
 		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];

-		fence_put(adev->vm_manager.ids[i].first);
+		dma_fence_put(adev->vm_manager.ids[i].first);
 		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
-		fence_put(id->flushed_updates);
-		fence_put(id->last_flush);
+		dma_fence_put(id->flushed_updates);
+		dma_fence_put(id->last_flush);
 	}
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_VM_H__
+#define __AMDGPU_VM_H__
+
+#include <linux/rbtree.h>
+
+#include "gpu_scheduler.h"
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+
+struct amdgpu_bo_va;
+struct amdgpu_job;
+struct amdgpu_bo_list_entry;
+
+/*
+ * GPUVM handling
+ */
+
+/* maximum number of VMIDs */
+#define AMDGPU_NUM_VM	16
+
+/* Maximum number of PTEs the hardware can write with one command */
+#define AMDGPU_VM_MAX_UPDATE_SIZE	0x3FFFF
+
+/* number of entries in page table */
+#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
+
+/* PTBs (Page Table Blocks) need to be aligned to 32K */
+#define AMDGPU_VM_PTB_ALIGN_SIZE   32768
+
+/* LOG2 number of continuous pages for the fragment field */
+#define AMDGPU_LOG2_PAGES_PER_FRAG 4
+
+#define AMDGPU_PTE_VALID	(1 << 0)
+#define AMDGPU_PTE_SYSTEM	(1 << 1)
+#define AMDGPU_PTE_SNOOPED	(1 << 2)
+
+/* VI only */
+#define AMDGPU_PTE_EXECUTABLE	(1 << 4)
+
+#define AMDGPU_PTE_READABLE	(1 << 5)
+#define AMDGPU_PTE_WRITEABLE	(1 << 6)
+
+#define AMDGPU_PTE_FRAG(x)	((x & 0x1f) << 7)
+
+/* How to programm VM fault handling */
+#define AMDGPU_VM_FAULT_STOP_NEVER	0
+#define AMDGPU_VM_FAULT_STOP_FIRST	1
+#define AMDGPU_VM_FAULT_STOP_ALWAYS	2
+
+struct amdgpu_vm_pt {
+	struct amdgpu_bo	*bo;
+	uint64_t		addr;
+};
+
+struct amdgpu_vm {
+	/* tree of virtual addresses mapped */
+	struct rb_root		va;
+
+	/* protecting invalidated */
+	spinlock_t		status_lock;
+
+	/* BOs moved, but not yet updated in the PT */
+	struct list_head	invalidated;
+
+	/* BOs cleared in the PT because of a move */
+	struct list_head	cleared;
+
+	/* BO mappings freed, but not yet updated in the PT */
+	struct list_head	freed;
+
+	/* contains the page directory */
+	struct amdgpu_bo	*page_directory;
+	unsigned		max_pde_used;
+	struct dma_fence		*page_directory_fence;
+	uint64_t		last_eviction_counter;
+
+	/* array of page tables, one for each page directory entry */
+	struct amdgpu_vm_pt	*page_tables;
+
+	/* for id and flush management per ring */
+	struct amdgpu_vm_id	*ids[AMDGPU_MAX_RINGS];
+
+	/* protecting freed */
+	spinlock_t		freed_lock;
+
+	/* Scheduler entity for page table updates */
+	struct amd_sched_entity	entity;
+
+	/* client id */
+	u64                     client_id;
+};
+
+struct amdgpu_vm_id {
+	struct list_head	list;
+	struct dma_fence		*first;
+	struct amdgpu_sync	active;
+	struct dma_fence		*last_flush;
+	atomic64_t		owner;
+
+	uint64_t		pd_gpu_addr;
+	/* last flushed PD/PT update */
+	struct dma_fence		*flushed_updates;
+
+	uint32_t                current_gpu_reset_count;
+
+	uint32_t		gds_base;
+	uint32_t		gds_size;
+	uint32_t		gws_base;
+	uint32_t		gws_size;
+	uint32_t		oa_base;
+	uint32_t		oa_size;
+};
+
+struct amdgpu_vm_manager {
+	/* Handling of VMIDs */
+	struct mutex				lock;
+	unsigned				num_ids;
+	struct list_head			ids_lru;
+	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];
+
+	/* Handling of VM fences */
+	u64					fence_context;
+	unsigned				seqno[AMDGPU_MAX_RINGS];
+
+	uint32_t				max_pfn;
+	/* vram base address for page table entry  */
+	u64					vram_base_offset;
+	/* is vm enabled? */
+	bool					enabled;
+	/* vm pte handling */
+	const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
+	struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
+	unsigned				vm_pte_num_rings;
+	atomic_t				vm_pte_next_ring;
+	/* client id counter */
+	atomic64_t				client_counter;
+};
+
+void amdgpu_vm_manager_init(struct amdgpu_device *adev);
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
+			 struct list_head *validated,
+			 struct amdgpu_bo_list_entry *entry);
+int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			      int (*callback)(void *p, struct amdgpu_bo *bo),
+			      void *param);
+void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
+				  struct amdgpu_vm *vm);
+int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		      struct amdgpu_sync *sync, struct dma_fence *fence,
+		      struct amdgpu_job *job);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
+int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+				    struct amdgpu_vm *vm);
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm);
+int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			     struct amdgpu_sync *sync);
+int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+			struct amdgpu_bo_va *bo_va,
+			bool clear);
+void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
+			     struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+				       struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+				      struct amdgpu_vm *vm,
+				      struct amdgpu_bo *bo);
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+		     struct amdgpu_bo_va *bo_va,
+		     uint64_t addr, uint64_t offset,
+		     uint64_t size, uint32_t flags);
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+		       struct amdgpu_bo_va *bo_va,
+		       uint64_t addr);
+void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+		      struct amdgpu_bo_va *bo_va);
+
+#endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#include <drm/drmP.h>
+#include "amdgpu.h"
+
+struct amdgpu_vram_mgr {
+	struct drm_mm mm;
+	spinlock_t lock;
+};
+
+/**
+ * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
+ *
+ * @man: TTM memory type manager
+ * @p_size: maximum size of VRAM
+ *
+ * Allocate and initialize the VRAM manager.
+ */
+static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man,
+				unsigned long p_size)
+{
+	struct amdgpu_vram_mgr *mgr;
+
+	mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
+	if (!mgr)
+		return -ENOMEM;
+
+	drm_mm_init(&mgr->mm, 0, p_size);
+	spin_lock_init(&mgr->lock);
+	man->priv = mgr;
+	return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_fini - free and destroy VRAM manager
+ *
+ * @man: TTM memory type manager
+ *
+ * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man)
+{
+	struct amdgpu_vram_mgr *mgr = man->priv;
+
+	spin_lock(&mgr->lock);
+	if (!drm_mm_clean(&mgr->mm)) {
+		spin_unlock(&mgr->lock);
+		return -EBUSY;
+	}
+
+	drm_mm_takedown(&mgr->mm);
+	spin_unlock(&mgr->lock);
+	kfree(mgr);
+	man->priv = NULL;
+	return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_new - allocate new ranges
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @mem: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
+			       struct ttm_buffer_object *tbo,
+			       const struct ttm_place *place,
+			       struct ttm_mem_reg *mem)
+{
+	struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
+	struct amdgpu_vram_mgr *mgr = man->priv;
+	struct drm_mm *mm = &mgr->mm;
+	struct drm_mm_node *nodes;
+	enum drm_mm_search_flags sflags = DRM_MM_SEARCH_DEFAULT;
+	enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
+	unsigned long lpfn, num_nodes, pages_per_node, pages_left;
+	unsigned i;
+	int r;
+
+	lpfn = place->lpfn;
+	if (!lpfn)
+		lpfn = man->size;
+
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
+	    place->lpfn || amdgpu_vram_page_split == -1) {
+		pages_per_node = ~0ul;
+		num_nodes = 1;
+	} else {
+		pages_per_node = max((uint32_t)amdgpu_vram_page_split,
+				     mem->page_alignment);
+		num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
+	}
+
+	nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL);
+	if (!nodes)
+		return -ENOMEM;
+
+	if (place->flags & TTM_PL_FLAG_TOPDOWN) {
+		sflags = DRM_MM_SEARCH_BELOW;
+		aflags = DRM_MM_CREATE_TOP;
+	}
+
+	pages_left = mem->num_pages;
+
+	spin_lock(&mgr->lock);
+	for (i = 0; i < num_nodes; ++i) {
+		unsigned long pages = min(pages_left, pages_per_node);
+		uint32_t alignment = mem->page_alignment;
+
+		if (pages == pages_per_node)
+			alignment = pages_per_node;
+		else
+			sflags |= DRM_MM_SEARCH_BEST;
+
+		r = drm_mm_insert_node_in_range_generic(mm, &nodes[i], pages,
+							alignment, 0,
+							place->fpfn, lpfn,
+							sflags, aflags);
+		if (unlikely(r))
+			goto error;
+
+		pages_left -= pages;
+	}
+	spin_unlock(&mgr->lock);
+
+	mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET;
+	mem->mm_node = nodes;
+
+	return 0;
+
+error:
+	while (i--)
+		drm_mm_remove_node(&nodes[i]);
+	spin_unlock(&mgr->lock);
+
+	kfree(nodes);
+	return r == -ENOSPC ? 0 : r;
+}
+
+/**
+ * amdgpu_vram_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @mem: TTM memory object
+ *
+ * Free the allocated VRAM again.
+ */
+static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
+				struct ttm_mem_reg *mem)
+{
+	struct amdgpu_vram_mgr *mgr = man->priv;
+	struct drm_mm_node *nodes = mem->mm_node;
+	unsigned pages = mem->num_pages;
+
+	if (!mem->mm_node)
+		return;
+
+	spin_lock(&mgr->lock);
+	while (pages) {
+		pages -= nodes->size;
+		drm_mm_remove_node(nodes);
+		++nodes;
+	}
+	spin_unlock(&mgr->lock);
+
+	kfree(mem->mm_node);
+	mem->mm_node = NULL;
+}
+
+/**
+ * amdgpu_vram_mgr_debug - dump VRAM table
+ *
+ * @man: TTM memory type manager
+ * @prefix: text prefix
+ *
+ * Dump the table content using printk.
+ */
+static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man,
+				  const char *prefix)
+{
+	struct amdgpu_vram_mgr *mgr = man->priv;
+
+	spin_lock(&mgr->lock);
+	drm_mm_debug_table(&mgr->mm, prefix);
+	spin_unlock(&mgr->lock);
+}
+
+const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = {
+	amdgpu_vram_mgr_init,
+	amdgpu_vram_mgr_fini,
+	amdgpu_vram_mgr_new,
+	amdgpu_vram_mgr_del,
+	amdgpu_vram_mgr_debug
+};
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -31,6 +31,7 @@
 #include "atom.h"
 #include "atom-bits.h"
 #include "atombios_encoders.h"
+#include "atombios_crtc.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_pll.h"
 #include "amdgpu_connectors.h"
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -887,9 +887,6 @@ static void ci_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate)
 {
 	struct ci_power_info *pi = ci_get_pi(adev);

-	if (pi->uvd_power_gated == gate)
-		return;
-
 	pi->uvd_power_gated = gate;

 	ci_update_uvd_dpm(adev, gate);
@@ -960,6 +957,12 @@ static void ci_apply_state_adjust_rules(struct amdgpu_device *adev,
 		sclk = ps->performance_levels[0].sclk;
 	}

+	if (adev->pm.pm_display_cfg.min_core_set_clock > sclk)
+		sclk = adev->pm.pm_display_cfg.min_core_set_clock;
+
+	if (adev->pm.pm_display_cfg.min_mem_set_clock > mclk)
+		mclk = adev->pm.pm_display_cfg.min_mem_set_clock;
+
 	if (rps->vce_active) {
 		if (sclk < adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].sclk)
 			sclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].sclk;
@@ -2201,6 +2204,11 @@ static int ci_upload_firmware(struct amdgpu_device *adev)
 	struct ci_power_info *pi = ci_get_pi(adev);
 	int i, ret;

+	if (amdgpu_ci_is_smc_running(adev)) {
+		DRM_INFO("smc is running, no need to load smc firmware\n");
+		return 0;
+	}
+
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (RREG32_SMC(ixRCU_UC_EVENTS) & RCU_UC_EVENTS__boot_seq_done_MASK)
 			break;
@@ -4190,8 +4198,10 @@ static int ci_update_uvd_dpm(struct amdgpu_device *adev, bool gate)
 {
 	struct ci_power_info *pi = ci_get_pi(adev);
 	u32 tmp;
+	int ret = 0;

 	if (!gate) {
+		/* turn the clocks on when decoding */
 		if (pi->caps_uvd_dpm ||
 		    (adev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count <= 0))
 			pi->smc_state_table.UvdBootLevel = 0;
@@ -4203,9 +4213,14 @@ static int ci_update_uvd_dpm(struct amdgpu_device *adev, bool gate)
 		tmp &= ~DPM_TABLE_475__UvdBootLevel_MASK;
 		tmp |= (pi->smc_state_table.UvdBootLevel << DPM_TABLE_475__UvdBootLevel__SHIFT);
 		WREG32_SMC(ixDPM_TABLE_475, tmp);
+		ret = ci_enable_uvd_dpm(adev, true);
+	} else {
+		ret = ci_enable_uvd_dpm(adev, false);
+		if (ret)
+			return ret;
 	}

-	return ci_enable_uvd_dpm(adev, !gate);
+	return ret;
 }

 static u8 ci_get_vce_boot_level(struct amdgpu_device *adev)
@@ -4247,13 +4262,12 @@ static int ci_update_vce_dpm(struct amdgpu_device *adev,

 			ret = ci_enable_vce_dpm(adev, true);
 		} else {
+			ret = ci_enable_vce_dpm(adev, false);
+			if (ret)
+				return ret;
 			/* turn the clocks off when not encoding */
 			ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
 							    AMD_CG_STATE_GATE);
-			if (ret)
-				return ret;
-
-			ret = ci_enable_vce_dpm(adev, false);
 		}
 	}
 	return ret;
@@ -5219,6 +5233,7 @@ static void ci_update_current_ps(struct amdgpu_device *adev,
 	pi->current_rps = *rps;
 	pi->current_ps = *new_ps;
 	pi->current_rps.ps_priv = &pi->current_ps;
+	adev->pm.dpm.current_ps = &pi->current_rps;
 }

 static void ci_update_requested_ps(struct amdgpu_device *adev,
@@ -5230,6 +5245,7 @@ static void ci_update_requested_ps(struct amdgpu_device *adev,
 	pi->requested_rps = *rps;
 	pi->requested_ps = *new_ps;
 	pi->requested_rps.ps_priv = &pi->requested_ps;
+	adev->pm.dpm.requested_ps = &pi->requested_rps;
 }

 static int ci_dpm_pre_set_power_state(struct amdgpu_device *adev)
@@ -5267,8 +5283,6 @@ static int ci_dpm_enable(struct amdgpu_device *adev)
 	struct amdgpu_ps *boot_ps = adev->pm.dpm.boot_ps;
 	int ret;

-	if (amdgpu_ci_is_smc_running(adev))
-		return -EINVAL;
 	if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE) {
 		ci_enable_voltage_control(adev);
 		ret = ci_construct_voltage_tables(adev);
@@ -5689,7 +5703,7 @@ static int ci_parse_power_table(struct amdgpu_device *adev)
 	adev->pm.dpm.num_ps = state_array->ucNumEntries;

 	/* fill in the vce power states */
-	for (i = 0; i < AMDGPU_MAX_VCE_LEVELS; i++) {
+	for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) {
 		u32 sclk, mclk;
 		clock_array_index = adev->pm.dpm.vce_states[i].clk_idx;
 		clock_info = (union pplib_clock_info *)
@@ -5874,7 +5888,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
 	pi->pcie_dpm_key_disabled = 0;
 	pi->thermal_sclk_dpm_enabled = 0;

-	if (amdgpu_sclk_deep_sleep_en)
+	if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
 		pi->caps_sclk_ds = true;
 	else
 		pi->caps_sclk_ds = false;
@@ -5977,7 +5991,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
 			tmp |= CNB_PWRMGT_CNTL__DPM_ENABLED_MASK;
 			break;
 		default:
-			DRM_ERROR("Invalid PCC GPIO: %u!\n", gpio.shift);
+			DRM_INFO("Invalid PCC GPIO: %u!\n", gpio.shift);
 			break;
 		}
 		WREG32_SMC(ixCNB_PWRMGT_CNTL, tmp);
@@ -6069,7 +6083,7 @@ ci_dpm_debugfs_print_current_performance_level(struct amdgpu_device *adev,
 		activity_percent = activity_percent > 100 ? 100 : activity_percent;
 	}

-	seq_printf(m, "uvd %sabled\n", pi->uvd_enabled ? "en" : "dis");
+	seq_printf(m, "uvd %sabled\n", pi->uvd_power_gated ? "dis" : "en");
 	seq_printf(m, "vce %sabled\n", rps->vce_active ? "en" : "dis");
 	seq_printf(m, "power level avg    sclk: %u mclk: %u\n",
 		   sclk, mclk);
@@ -6094,6 +6108,56 @@ static void ci_dpm_print_power_state(struct amdgpu_device *adev,
 	amdgpu_dpm_print_ps_status(adev, rps);
 }

+static inline bool ci_are_power_levels_equal(const struct ci_pl *ci_cpl1,
+						const struct ci_pl *ci_cpl2)
+{
+	return ((ci_cpl1->mclk == ci_cpl2->mclk) &&
+		  (ci_cpl1->sclk == ci_cpl2->sclk) &&
+		  (ci_cpl1->pcie_gen == ci_cpl2->pcie_gen) &&
+		  (ci_cpl1->pcie_lane == ci_cpl2->pcie_lane));
+}
+
+static int ci_check_state_equal(struct amdgpu_device *adev,
+				struct amdgpu_ps *cps,
+				struct amdgpu_ps *rps,
+				bool *equal)
+{
+	struct ci_ps *ci_cps;
+	struct ci_ps *ci_rps;
+	int i;
+
+	if (adev == NULL || cps == NULL || rps == NULL || equal == NULL)
+		return -EINVAL;
+
+	ci_cps = ci_get_ps(cps);
+	ci_rps = ci_get_ps(rps);
+
+	if (ci_cps == NULL) {
+		*equal = false;
+		return 0;
+	}
+
+	if (ci_cps->performance_level_count != ci_rps->performance_level_count) {
+
+		*equal = false;
+		return 0;
+	}
+
+	for (i = 0; i < ci_cps->performance_level_count; i++) {
+		if (!ci_are_power_levels_equal(&(ci_cps->performance_levels[i]),
+					&(ci_rps->performance_levels[i]))) {
+			*equal = false;
+			return 0;
+		}
+	}
+
+	/* If all performance levels are the same try to use the UVD clocks to break the tie.*/
+	*equal = ((cps->vclk == rps->vclk) && (cps->dclk == rps->dclk));
+	*equal &= ((cps->evclk == rps->evclk) && (cps->ecclk == rps->ecclk));
+
+	return 0;
+}
+
 static u32 ci_dpm_get_sclk(struct amdgpu_device *adev, bool low)
 {
 	struct ci_power_info *pi = ci_get_pi(adev);
@@ -6289,12 +6353,19 @@ static int ci_dpm_suspend(void *handle)

 	if (adev->pm.dpm_enabled) {
 		mutex_lock(&adev->pm.mutex);
-		/* disable dpm */
-		ci_dpm_disable(adev);
-		/* reset the power state */
-		adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps;
+		amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,
+			       AMDGPU_THERMAL_IRQ_LOW_TO_HIGH);
+		amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,
+			       AMDGPU_THERMAL_IRQ_HIGH_TO_LOW);
+		adev->pm.dpm.last_user_state = adev->pm.dpm.user_state;
+		adev->pm.dpm.last_state = adev->pm.dpm.state;
+		adev->pm.dpm.user_state = POWER_STATE_TYPE_INTERNAL_BOOT;
+		adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_BOOT;
 		mutex_unlock(&adev->pm.mutex);
+		amdgpu_pm_compute_clocks(adev);
+
 	}
+
 	return 0;
 }

@@ -6312,6 +6383,8 @@ static int ci_dpm_resume(void *handle)
 			adev->pm.dpm_enabled = false;
 		else
 			adev->pm.dpm_enabled = true;
+		adev->pm.dpm.user_state = adev->pm.dpm.last_user_state;
+		adev->pm.dpm.state = adev->pm.dpm.last_state;
 		mutex_unlock(&adev->pm.mutex);
 		if (adev->pm.dpm_enabled)
 			amdgpu_pm_compute_clocks(adev);
@@ -6646,6 +6719,8 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
 	.set_sclk_od = ci_dpm_set_sclk_od,
 	.get_mclk_od = ci_dpm_get_mclk_od,
 	.set_mclk_od = ci_dpm_set_mclk_od,
+	.check_state_equal = ci_check_state_equal,
+	.get_vce_clock_state = amdgpu_get_vce_clock_state,
 };

 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
@@ -6664,3 +6739,12 @@ static void ci_dpm_set_irq_funcs(struct amdgpu_device *adev)
 	adev->pm.dpm.thermal.irq.num_types = AMDGPU_THERMAL_IRQ_LAST;
 	adev->pm.dpm.thermal.irq.funcs = &ci_dpm_irq_funcs;
 }
+
+const struct amdgpu_ip_block_version ci_dpm_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SMC,
+	.major = 7,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &ci_dpm_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1189,18 +1189,6 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 	return r;
 }

-static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
-{
-	u32 tmp = RREG32(mmBIOS_SCRATCH_3);
-
-	if (hung)
-		tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG;
-	else
-		tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG;
-
-	WREG32(mmBIOS_SCRATCH_3, tmp);
-}
-
 /**
 * cik_asic_reset - soft reset GPU
 *
@@ -1213,11 +1201,12 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
 static int cik_asic_reset(struct amdgpu_device *adev)
 {
 	int r;
-	cik_set_bios_scratch_engine_hung(adev, true);
+
+	amdgpu_atombios_scratch_regs_engine_hung(adev, true);

 	r = cik_gpu_pci_config_reset(adev);

-	cik_set_bios_scratch_engine_hung(adev, false);
+	amdgpu_atombios_scratch_regs_engine_hung(adev, false);

 	return r;
 }
@@ -1641,745 +1630,6 @@ static void cik_detect_hw_virtualization(struct amdgpu_device *adev)
 		adev->virtualization.virtual_caps |= AMDGPU_PASSTHROUGH_MODE;
 }

-static const struct amdgpu_ip_block_version bonaire_ip_blocks[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &dce_v8_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version bonaire_ip_blocks_vd[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &dce_virtual_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version hawaii_ip_blocks[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 5,
-		.rev = 0,
-		.funcs = &dce_v8_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 3,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version hawaii_ip_blocks_vd[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 5,
-		.rev = 0,
-		.funcs = &dce_virtual_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 3,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version kabini_ip_blocks[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 3,
-		.rev = 0,
-		.funcs = &dce_v8_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version kabini_ip_blocks_vd[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 3,
-		.rev = 0,
-		.funcs = &dce_virtual_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version mullins_ip_blocks[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 3,
-		.rev = 0,
-		.funcs = &dce_v8_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version mullins_ip_blocks_vd[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 3,
-		.rev = 0,
-		.funcs = &dce_virtual_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version kaveri_ip_blocks[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 1,
-		.rev = 0,
-		.funcs = &dce_v8_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 1,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-static const struct amdgpu_ip_block_version kaveri_ip_blocks_vd[] =
-{
-	/* ORDER MATTERS! */
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 7,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 8,
-		.minor = 1,
-		.rev = 0,
-		.funcs = &dce_virtual_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 7,
-		.minor = 1,
-		.rev = 0,
-		.funcs = &gfx_v7_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &cik_sdma_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 4,
-		.minor = 2,
-		.rev = 0,
-		.funcs = &uvd_v4_2_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 2,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &vce_v2_0_ip_funcs,
-	},
-};
-
-int cik_set_ip_blocks(struct amdgpu_device *adev)
-{
-	if (adev->enable_virtual_display) {
-		switch (adev->asic_type) {
-		case CHIP_BONAIRE:
-			adev->ip_blocks = bonaire_ip_blocks_vd;
-			adev->num_ip_blocks = ARRAY_SIZE(bonaire_ip_blocks_vd);
-			break;
-		case CHIP_HAWAII:
-			adev->ip_blocks = hawaii_ip_blocks_vd;
-			adev->num_ip_blocks = ARRAY_SIZE(hawaii_ip_blocks_vd);
-			break;
-		case CHIP_KAVERI:
-			adev->ip_blocks = kaveri_ip_blocks_vd;
-			adev->num_ip_blocks = ARRAY_SIZE(kaveri_ip_blocks_vd);
-			break;
-		case CHIP_KABINI:
-			adev->ip_blocks = kabini_ip_blocks_vd;
-			adev->num_ip_blocks = ARRAY_SIZE(kabini_ip_blocks_vd);
-			break;
-		case CHIP_MULLINS:
-			adev->ip_blocks = mullins_ip_blocks_vd;
-			adev->num_ip_blocks = ARRAY_SIZE(mullins_ip_blocks_vd);
-			break;
-		default:
-			/* FIXME: not supported yet */
-			return -EINVAL;
-		}
-	} else {
-		switch (adev->asic_type) {
-		case CHIP_BONAIRE:
-			adev->ip_blocks = bonaire_ip_blocks;
-			adev->num_ip_blocks = ARRAY_SIZE(bonaire_ip_blocks);
-			break;
-		case CHIP_HAWAII:
-			adev->ip_blocks = hawaii_ip_blocks;
-			adev->num_ip_blocks = ARRAY_SIZE(hawaii_ip_blocks);
-			break;
-		case CHIP_KAVERI:
-			adev->ip_blocks = kaveri_ip_blocks;
-			adev->num_ip_blocks = ARRAY_SIZE(kaveri_ip_blocks);
-			break;
-		case CHIP_KABINI:
-			adev->ip_blocks = kabini_ip_blocks;
-			adev->num_ip_blocks = ARRAY_SIZE(kabini_ip_blocks);
-			break;
-		case CHIP_MULLINS:
-			adev->ip_blocks = mullins_ip_blocks;
-			adev->num_ip_blocks = ARRAY_SIZE(mullins_ip_blocks);
-			break;
-		default:
-			/* FIXME: not supported yet */
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 {
 	.read_disabled_bios = &cik_read_disabled_bios,
@@ -2612,7 +1862,7 @@ static int cik_common_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs cik_common_ip_funcs = {
+static const struct amd_ip_funcs cik_common_ip_funcs = {
 	.name = "cik_common",
 	.early_init = cik_common_early_init,
 	.late_init = NULL,
@@ -2628,3 +1878,79 @@ const struct amd_ip_funcs cik_common_ip_funcs = {
 	.set_clockgating_state = cik_common_set_clockgating_state,
 	.set_powergating_state = cik_common_set_powergating_state,
 };
+
+static const struct amdgpu_ip_block_version cik_common_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_COMMON,
+	.major = 1,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cik_common_ip_funcs,
+};
+
+int cik_set_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+		amdgpu_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+		else
+			amdgpu_ip_block_add(adev, &dce_v8_2_ip_block);
+		amdgpu_ip_block_add(adev, &gfx_v7_2_ip_block);
+		amdgpu_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_ip_block_add(adev, &vce_v2_0_ip_block);
+		break;
+	case CHIP_HAWAII:
+		amdgpu_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+		else
+			amdgpu_ip_block_add(adev, &dce_v8_5_ip_block);
+		amdgpu_ip_block_add(adev, &gfx_v7_3_ip_block);
+		amdgpu_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_ip_block_add(adev, &vce_v2_0_ip_block);
+		break;
+	case CHIP_KAVERI:
+		amdgpu_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+		else
+			amdgpu_ip_block_add(adev, &dce_v8_1_ip_block);
+		amdgpu_ip_block_add(adev, &gfx_v7_1_ip_block);
+		amdgpu_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_ip_block_add(adev, &vce_v2_0_ip_block);
+		break;
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		amdgpu_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+		else
+			amdgpu_ip_block_add(adev, &dce_v8_3_ip_block);
+		amdgpu_ip_block_add(adev, &gfx_v7_2_ip_block);
+		amdgpu_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_ip_block_add(adev, &vce_v2_0_ip_block);
+		break;
+	default:
+		/* FIXME: not supported yet */
+		return -EINVAL;
+	}
+	return 0;
+}
--- a/drivers/gpu/drm/amd/amdgpu/cik.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -24,8 +24,6 @@
 #ifndef __CIK_H__
 #define __CIK_H__

-extern const struct amd_ip_funcs cik_common_ip_funcs;
-
 void cik_srbm_select(struct amdgpu_device *adev,
 		     u32 me, u32 pipe, u32 queue, u32 vmid);
 int cik_set_ip_blocks(struct amdgpu_device *adev);
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -413,7 +413,7 @@ static int cik_ih_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs cik_ih_ip_funcs = {
+static const struct amd_ip_funcs cik_ih_ip_funcs = {
 	.name = "cik_ih",
 	.early_init = cik_ih_early_init,
 	.late_init = NULL,
@@ -441,3 +441,12 @@ static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
 	if (adev->irq.ih_funcs == NULL)
 		adev->irq.ih_funcs = &cik_ih_funcs;
 }
+
+const struct amdgpu_ip_block_version cik_ih_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_IH,
+	.major = 2,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cik_ih_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.h
@@ -24,6 +24,6 @@
 #ifndef __CIK_IH_H__
 #define __CIK_IH_H__

-extern const struct amd_ip_funcs cik_ih_ip_funcs;
+extern const struct amdgpu_ip_block_version cik_ih_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -206,10 +206,10 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)

 	for (i = 0; i < count; i++)
 		if (sdma && sdma->burst_nop && (i == 0))
-			amdgpu_ring_write(ring, ring->nop |
+			amdgpu_ring_write(ring, ring->funcs->nop |
 					  SDMA_NOP_COUNT(count - 1));
 		else
-			amdgpu_ring_write(ring, ring->nop);
+			amdgpu_ring_write(ring, ring->funcs->nop);
 }

 /**
@@ -622,7 +622,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	unsigned index;
 	u32 tmp = 0;
 	u64 gpu_addr;
@@ -655,7 +655,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;

-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -675,7 +675,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)

 err1:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -848,22 +848,6 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }

-static unsigned cik_sdma_ring_get_emit_ib_size(struct amdgpu_ring *ring)
-{
-	return
-		7 + 4; /* cik_sdma_ring_emit_ib */
-}
-
-static unsigned cik_sdma_ring_get_dma_frame_size(struct amdgpu_ring *ring)
-{
-	return
-		6 + /* cik_sdma_ring_emit_hdp_flush */
-		3 + /* cik_sdma_ring_emit_hdp_invalidate */
-		6 + /* cik_sdma_ring_emit_pipeline_sync */
-		12 + /* cik_sdma_ring_emit_vm_flush */
-		9 + 9 + 9; /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
-}
-
 static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
 				 bool enable)
 {
@@ -959,11 +943,10 @@ static int cik_sdma_sw_init(void *handle)
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "sdma%d", i);
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), 0xf,
 				     &adev->sdma.trap_irq,
 				     (i == 0) ?
-				     AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1,
-				     AMDGPU_RING_TYPE_SDMA);
+				     AMDGPU_SDMA_IRQ_TRAP0 :
+				     AMDGPU_SDMA_IRQ_TRAP1);
 		if (r)
 			return r;
 	}
@@ -1207,7 +1190,7 @@ static int cik_sdma_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs cik_sdma_ip_funcs = {
+static const struct amd_ip_funcs cik_sdma_ip_funcs = {
 	.name = "cik_sdma",
 	.early_init = cik_sdma_early_init,
 	.late_init = NULL,
@@ -1225,10 +1208,19 @@ const struct amd_ip_funcs cik_sdma_ip_funcs = {
 };

 static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
+	.type = AMDGPU_RING_TYPE_SDMA,
+	.align_mask = 0xf,
+	.nop = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0),
 	.get_rptr = cik_sdma_ring_get_rptr,
 	.get_wptr = cik_sdma_ring_get_wptr,
 	.set_wptr = cik_sdma_ring_set_wptr,
-	.parse_cs = NULL,
+	.emit_frame_size =
+		6 + /* cik_sdma_ring_emit_hdp_flush */
+		3 + /* cik_sdma_ring_emit_hdp_invalidate */
+		6 + /* cik_sdma_ring_emit_pipeline_sync */
+		12 + /* cik_sdma_ring_emit_vm_flush */
+		9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
+	.emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */
 	.emit_ib = cik_sdma_ring_emit_ib,
 	.emit_fence = cik_sdma_ring_emit_fence,
 	.emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
@@ -1239,8 +1231,6 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
 	.test_ib = cik_sdma_ring_test_ib,
 	.insert_nop = cik_sdma_ring_insert_nop,
 	.pad_ib = cik_sdma_ring_pad_ib,
-	.get_emit_ib_size = cik_sdma_ring_get_emit_ib_size,
-	.get_dma_frame_size = cik_sdma_ring_get_dma_frame_size,
 };

 static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1352,3 +1342,12 @@ static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
 		adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
 	}
 }
+
+const struct amdgpu_ip_block_version cik_sdma_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SDMA,
+	.major = 2,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cik_sdma_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.h
@@ -24,6 +24,6 @@
 #ifndef __CIK_SDMA_H__
 #define __CIK_SDMA_H__

-extern const struct amd_ip_funcs cik_sdma_ip_funcs;
+extern const struct amdgpu_ip_block_version cik_sdma_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -43,6 +43,14 @@
 #define CRTC4_REGISTER_OFFSET                 (0x477c - 0x1b7c)
 #define CRTC5_REGISTER_OFFSET                 (0x4a7c - 0x1b7c)

+/* hpd instance offsets */
+#define HPD0_REGISTER_OFFSET                 (0x1807 - 0x1807)
+#define HPD1_REGISTER_OFFSET                 (0x180a - 0x1807)
+#define HPD2_REGISTER_OFFSET                 (0x180d - 0x1807)
+#define HPD3_REGISTER_OFFSET                 (0x1810 - 0x1807)
+#define HPD4_REGISTER_OFFSET                 (0x1813 - 0x1807)
+#define HPD5_REGISTER_OFFSET                 (0x1816 - 0x1807)
+
 #define BONAIRE_GB_ADDR_CONFIG_GOLDEN        0x12010001
 #define HAWAII_GB_ADDR_CONFIG_GOLDEN         0x12011003

--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -438,7 +438,7 @@ static int cz_dpm_init(struct amdgpu_device *adev)
 		pi->caps_td_ramping = true;
 		pi->caps_tcp_ramping = true;
 	}
-	if (amdgpu_sclk_deep_sleep_en)
+	if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
 		pi->caps_sclk_ds = true;
 	else
 		pi->caps_sclk_ds = false;
@@ -1250,7 +1250,8 @@ static void cz_update_current_ps(struct amdgpu_device *adev,

 	pi->current_ps = *ps;
 	pi->current_rps = *rps;
-	pi->current_rps.ps_priv = ps;
+	pi->current_rps.ps_priv = &pi->current_ps;
+	adev->pm.dpm.current_ps = &pi->current_rps;

 }

@@ -1262,7 +1263,8 @@ static void cz_update_requested_ps(struct amdgpu_device *adev,

 	pi->requested_ps = *ps;
 	pi->requested_rps = *rps;
-	pi->requested_rps.ps_priv = ps;
+	pi->requested_rps.ps_priv = &pi->requested_ps;
+	adev->pm.dpm.requested_ps = &pi->requested_rps;

 }

@@ -2109,9 +2111,8 @@ static void cz_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate)

 	if (gate) {
 		if (pi->caps_uvd_pg) {
-			/* disable clockgating so we can properly shut down the block */
 			ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_CG_STATE_UNGATE);
+							    AMD_CG_STATE_GATE);
 			if (ret) {
 				DRM_ERROR("UVD DPM Power Gating failed to set clockgating state\n");
 				return;
@@ -2157,9 +2158,8 @@ static void cz_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate)
 				return;
 			}

-			/* enable clockgating. hw will dynamically gate/ungate clocks on the fly */
 			ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_CG_STATE_GATE);
+							    AMD_CG_STATE_UNGATE);
 			if (ret) {
 				DRM_ERROR("UVD DPM Power Gating Failed to set clockgating state\n");
 				return;
@@ -2257,6 +2257,18 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
 	}
 }

+static int cz_check_state_equal(struct amdgpu_device *adev,
+				struct amdgpu_ps *cps,
+				struct amdgpu_ps *rps,
+				bool *equal)
+{
+	if (equal == NULL)
+		return -EINVAL;
+
+	*equal = false;
+	return 0;
+}
+
 const struct amd_ip_funcs cz_dpm_ip_funcs = {
 	.name = "cz_dpm",
 	.early_init = cz_dpm_early_init,
@@ -2289,6 +2301,7 @@ static const struct amdgpu_dpm_funcs cz_dpm_funcs = {
 	.vblank_too_short = NULL,
 	.powergate_uvd = cz_dpm_powergate_uvd,
 	.powergate_vce = cz_dpm_powergate_vce,
+	.check_state_equal = cz_check_state_equal,
 };

 static void cz_dpm_set_funcs(struct amdgpu_device *adev)
@@ -2296,3 +2309,12 @@ static void cz_dpm_set_funcs(struct amdgpu_device *adev)
 	if (NULL == adev->pm.funcs)
 		adev->pm.funcs = &cz_dpm_funcs;
 }
+
+const struct amdgpu_ip_block_version cz_dpm_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SMC,
+	.major = 8,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cz_dpm_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -394,7 +394,7 @@ static int cz_ih_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs cz_ih_ip_funcs = {
+static const struct amd_ip_funcs cz_ih_ip_funcs = {
 	.name = "cz_ih",
 	.early_init = cz_ih_early_init,
 	.late_init = NULL,
@@ -423,3 +423,11 @@ static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev)
 		adev->irq.ih_funcs = &cz_ih_funcs;
 }

+const struct amdgpu_ip_block_version cz_ih_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_IH,
+	.major = 3,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cz_ih_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.h
@@ -24,6 +24,6 @@
 #ifndef __CZ_IH_H__
 #define __CZ_IH_H__

-extern const struct amd_ip_funcs cz_ih_ip_funcs;
+extern const struct amdgpu_ip_block_version cz_ih_ip_block;

 #endif /* __CZ_IH_H__ */
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -31,6 +31,7 @@
 #include "atombios_encoders.h"
 #include "amdgpu_pll.h"
 #include "amdgpu_connectors.h"
+#include "dce_v10_0.h"

 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
@@ -330,33 +331,12 @@ static int dce_v10_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
 static bool dce_v10_0_hpd_sense(struct amdgpu_device *adev,
 			       enum amdgpu_hpd_id hpd)
 {
-	int idx;
 	bool connected = false;

-	switch (hpd) {
-	case AMDGPU_HPD_1:
-		idx = 0;
-		break;
-	case AMDGPU_HPD_2:
-		idx = 1;
-		break;
-	case AMDGPU_HPD_3:
-		idx = 2;
-		break;
-	case AMDGPU_HPD_4:
-		idx = 3;
-		break;
-	case AMDGPU_HPD_5:
-		idx = 4;
-		break;
-	case AMDGPU_HPD_6:
-		idx = 5;
-		break;
-	default:
+	if (hpd >= adev->mode_info.num_hpd)
 		return connected;
-	}

-	if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[idx]) &
+	if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
 	    DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
 		connected = true;

@@ -376,37 +356,16 @@ static void dce_v10_0_hpd_set_polarity(struct amdgpu_device *adev,
 {
 	u32 tmp;
 	bool connected = dce_v10_0_hpd_sense(adev, hpd);
-	int idx;

-	switch (hpd) {
-	case AMDGPU_HPD_1:
-		idx = 0;
-		break;
-	case AMDGPU_HPD_2:
-		idx = 1;
-		break;
-	case AMDGPU_HPD_3:
-		idx = 2;
-		break;
-	case AMDGPU_HPD_4:
-		idx = 3;
-		break;
-	case AMDGPU_HPD_5:
-		idx = 4;
-		break;
-	case AMDGPU_HPD_6:
-		idx = 5;
-		break;
-	default:
+	if (hpd >= adev->mode_info.num_hpd)
 		return;
-	}

-	tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[idx]);
+	tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
 	if (connected)
 		tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
 	else
 		tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
-	WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[idx], tmp);
+	WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
 }

 /**
@@ -422,33 +381,12 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
 	u32 tmp;
-	int idx;

 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);

-		switch (amdgpu_connector->hpd.hpd) {
-		case AMDGPU_HPD_1:
-			idx = 0;
-			break;
-		case AMDGPU_HPD_2:
-			idx = 1;
-			break;
-		case AMDGPU_HPD_3:
-			idx = 2;
-			break;
-		case AMDGPU_HPD_4:
-			idx = 3;
-			break;
-		case AMDGPU_HPD_5:
-			idx = 4;
-			break;
-		case AMDGPU_HPD_6:
-			idx = 5;
-			break;
-		default:
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
 			continue;
-		}

 		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
 		    connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
@@ -457,24 +395,24 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
 			 * https://bugzilla.redhat.com/show_bug.cgi?id=726143
 			 * also avoid interrupt storms during dpms.
 			 */
-			tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[idx]);
+			tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
 			tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
-			WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[idx], tmp);
+			WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
 			continue;
 		}

-		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[idx]);
+		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
 		tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
-		WREG32(mmDC_HPD_CONTROL + hpd_offsets[idx], tmp);
+		WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);

-		tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[idx]);
+		tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
 		tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
 				    DC_HPD_CONNECT_INT_DELAY,
 				    AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
 		tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
 				    DC_HPD_DISCONNECT_INT_DELAY,
 				    AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
-		WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[idx], tmp);
+		WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);

 		dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
 		amdgpu_irq_get(adev, &adev->hpd_irq,
@@ -495,37 +433,16 @@ static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
 	u32 tmp;
-	int idx;

 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);

-		switch (amdgpu_connector->hpd.hpd) {
-		case AMDGPU_HPD_1:
-			idx = 0;
-			break;
-		case AMDGPU_HPD_2:
-			idx = 1;
-			break;
-		case AMDGPU_HPD_3:
-			idx = 2;
-			break;
-		case AMDGPU_HPD_4:
-			idx = 3;
-			break;
-		case AMDGPU_HPD_5:
-			idx = 4;
-			break;
-		case AMDGPU_HPD_6:
-			idx = 5;
-			break;
-		default:
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
 			continue;
-		}

-		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[idx]);
+		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
 		tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
-		WREG32(mmDC_HPD_CONTROL + hpd_offsets[idx], tmp);
+		WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);

 		amdgpu_irq_put(adev, &adev->hpd_irq,
 			       amdgpu_connector->hpd.hpd);
@@ -2115,7 +2032,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	u32 tmp, viewport_w, viewport_h;
 	int r;
 	bool bypass_lut = false;
-	char *format_name;
+	struct drm_format_name_buf format_name;

 	/* no fb bound */
 	if (!atomic && !crtc->primary->fb) {
@@ -2227,9 +2144,8 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 		bypass_lut = true;
 		break;
 	default:
-		format_name = drm_get_format_name(target_fb->pixel_format);
-		DRM_ERROR("Unsupported screen format %s\n", format_name);
-		kfree(format_name);
+		DRM_ERROR("Unsupported screen format %s\n",
+		          drm_get_format_name(target_fb->pixel_format, &format_name));
 		return -EINVAL;
 	}

@@ -2577,6 +2493,9 @@ static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc,
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 	int xorigin = 0, yorigin = 0;

+	amdgpu_crtc->cursor_x = x;
+	amdgpu_crtc->cursor_y = y;
+
 	/* avivo cursor are offset into the total surface */
 	x += crtc->x;
 	y += crtc->y;
@@ -2593,11 +2512,6 @@ static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc,

 	WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
 	WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
-	WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
-	       ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
-
-	amdgpu_crtc->cursor_x = x;
-	amdgpu_crtc->cursor_y = y;

 	return 0;
 }
@@ -2623,6 +2537,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
 				      int32_t hot_y)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = crtc->dev->dev_private;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *aobj;
 	int ret;
@@ -2661,9 +2576,6 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
 		return ret;
 	}

-	amdgpu_crtc->cursor_width = width;
-	amdgpu_crtc->cursor_height = height;
-
 	dce_v10_0_lock_cursor(crtc, true);

 	if (hot_x != amdgpu_crtc->cursor_hot_x ||
@@ -2679,6 +2591,14 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
 		amdgpu_crtc->cursor_hot_y = hot_y;
 	}

+	if (width != amdgpu_crtc->cursor_width ||
+	    height != amdgpu_crtc->cursor_height) {
+		WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+		       (width - 1) << 16 | (height - 1));
+		amdgpu_crtc->cursor_width = width;
+		amdgpu_crtc->cursor_height = height;
+	}
+
 	dce_v10_0_show_cursor(crtc);
 	dce_v10_0_lock_cursor(crtc, false);

@@ -2700,6 +2620,7 @@ unpin:
 static void dce_v10_0_cursor_reset(struct drm_crtc *crtc)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = crtc->dev->dev_private;

 	if (amdgpu_crtc->cursor_bo) {
 		dce_v10_0_lock_cursor(crtc, true);
@@ -2707,6 +2628,10 @@ static void dce_v10_0_cursor_reset(struct drm_crtc *crtc)
 		dce_v10_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
 					     amdgpu_crtc->cursor_y);

+		WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+		       (amdgpu_crtc->cursor_width - 1) << 16 |
+		       (amdgpu_crtc->cursor_height - 1));
+
 		dce_v10_0_show_cursor(crtc);

 		dce_v10_0_lock_cursor(crtc, false);
@@ -3548,7 +3473,7 @@ static int dce_v10_0_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs dce_v10_0_ip_funcs = {
+static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
 	.name = "dce_v10_0",
 	.early_init = dce_v10_0_early_init,
 	.late_init = NULL,
@@ -3833,7 +3758,6 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
 	.bandwidth_update = &dce_v10_0_bandwidth_update,
 	.vblank_get_counter = &dce_v10_0_vblank_get_counter,
 	.vblank_wait = &dce_v10_0_vblank_wait,
-	.is_display_hung = &dce_v10_0_is_display_hung,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.hpd_sense = &dce_v10_0_hpd_sense,
@@ -3879,3 +3803,21 @@ static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->hpd_irq.num_types = AMDGPU_HPD_LAST;
 	adev->hpd_irq.funcs = &dce_v10_0_hpd_irq_funcs;
 }
+
+const struct amdgpu_ip_block_version dce_v10_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 10,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &dce_v10_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v10_1_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 10,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &dce_v10_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
@@ -24,7 +24,9 @@
 #ifndef __DCE_V10_0_H__
 #define __DCE_V10_0_H__

-extern const struct amd_ip_funcs dce_v10_0_ip_funcs;
+
+extern const struct amdgpu_ip_block_version dce_v10_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v10_1_ip_block;

 void dce_v10_0_disable_dce(struct amdgpu_device *adev);

--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -31,6 +31,7 @@
 #include "atombios_encoders.h"
 #include "amdgpu_pll.h"
 #include "amdgpu_connectors.h"
+#include "dce_v11_0.h"

 #include "dce/dce_11_0_d.h"
 #include "dce/dce_11_0_sh_mask.h"
@@ -346,33 +347,12 @@ static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
 static bool dce_v11_0_hpd_sense(struct amdgpu_device *adev,
 			       enum amdgpu_hpd_id hpd)
 {
-	int idx;
 	bool connected = false;

-	switch (hpd) {
-	case AMDGPU_HPD_1:
-		idx = 0;
-		break;
-	case AMDGPU_HPD_2:
-		idx = 1;
-		break;
-	case AMDGPU_HPD_3:
-		idx = 2;
-		break;
-	case AMDGPU_HPD_4:
-		idx = 3;
-		break;
-	case AMDGPU_HPD_5:
-		idx = 4;
-		break;
-	case AMDGPU_HPD_6:
-		idx = 5;
-		break;
-	default:
+	if (hpd >= adev->mode_info.num_hpd)
 		return connected;
-	}

-	if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[idx]) &
+	if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
 	    DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
 		connected = true;

@@ -392,37 +372,16 @@ static void dce_v11_0_hpd_set_polarity(struct amdgpu_device *adev,
 {
 	u32 tmp;
 	bool connected = dce_v11_0_hpd_sense(adev, hpd);
-	int idx;

-	switch (hpd) {
-	case AMDGPU_HPD_1:
-		idx = 0;
-		break;
-	case AMDGPU_HPD_2:
-		idx = 1;
-		break;
-	case AMDGPU_HPD_3:
-		idx = 2;
-		break;
-	case AMDGPU_HPD_4:
-		idx = 3;
-		break;
-	case AMDGPU_HPD_5:
-		idx = 4;
-		break;
-	case AMDGPU_HPD_6:
-		idx = 5;
-		break;
-	default:
+	if (hpd >= adev->mode_info.num_hpd)
 		return;
-	}

-	tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[idx]);
+	tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
 	if (connected)
 		tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
 	else
 		tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
-	WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[idx], tmp);
+	WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
 }

 /**
@@ -438,33 +397,12 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
 	u32 tmp;
-	int idx;

 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);

-		switch (amdgpu_connector->hpd.hpd) {
-		case AMDGPU_HPD_1:
-			idx = 0;
-			break;
-		case AMDGPU_HPD_2:
-			idx = 1;
-			break;
-		case AMDGPU_HPD_3:
-			idx = 2;
-			break;
-		case AMDGPU_HPD_4:
-			idx = 3;
-			break;
-		case AMDGPU_HPD_5:
-			idx = 4;
-			break;
-		case AMDGPU_HPD_6:
-			idx = 5;
-			break;
-		default:
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
 			continue;
-		}

 		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
 		    connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
@@ -473,24 +411,24 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev)
 			 * https://bugzilla.redhat.com/show_bug.cgi?id=726143
 			 * also avoid interrupt storms during dpms.
 			 */
-			tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[idx]);
+			tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
 			tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
-			WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[idx], tmp);
+			WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
 			continue;
 		}

-		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[idx]);
+		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
 		tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
-		WREG32(mmDC_HPD_CONTROL + hpd_offsets[idx], tmp);
+		WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);

-		tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[idx]);
+		tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
 		tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
 				    DC_HPD_CONNECT_INT_DELAY,
 				    AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
 		tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
 				    DC_HPD_DISCONNECT_INT_DELAY,
 				    AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
-		WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[idx], tmp);
+		WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);

 		dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
 		amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
@@ -510,37 +448,16 @@ static void dce_v11_0_hpd_fini(struct amdgpu_device *adev)
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
 	u32 tmp;
-	int idx;

 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);

-		switch (amdgpu_connector->hpd.hpd) {
-		case AMDGPU_HPD_1:
-			idx = 0;
-			break;
-		case AMDGPU_HPD_2:
-			idx = 1;
-			break;
-		case AMDGPU_HPD_3:
-			idx = 2;
-			break;
-		case AMDGPU_HPD_4:
-			idx = 3;
-			break;
-		case AMDGPU_HPD_5:
-			idx = 4;
-			break;
-		case AMDGPU_HPD_6:
-			idx = 5;
-			break;
-		default:
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
 			continue;
-		}

-		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[idx]);
+		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
 		tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
-		WREG32(mmDC_HPD_CONTROL + hpd_offsets[idx], tmp);
+		WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);

 		amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
 	}
@@ -2096,7 +2013,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	u32 tmp, viewport_w, viewport_h;
 	int r;
 	bool bypass_lut = false;
-	char *format_name;
+	struct drm_format_name_buf format_name;

 	/* no fb bound */
 	if (!atomic && !crtc->primary->fb) {
@@ -2208,9 +2125,8 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 		bypass_lut = true;
 		break;
 	default:
-		format_name = drm_get_format_name(target_fb->pixel_format);
-		DRM_ERROR("Unsupported screen format %s\n", format_name);
-		kfree(format_name);
+		DRM_ERROR("Unsupported screen format %s\n",
+		          drm_get_format_name(target_fb->pixel_format, &format_name));
 		return -EINVAL;
 	}

@@ -2593,6 +2509,9 @@ static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 	int xorigin = 0, yorigin = 0;

+	amdgpu_crtc->cursor_x = x;
+	amdgpu_crtc->cursor_y = y;
+
 	/* avivo cursor are offset into the total surface */
 	x += crtc->x;
 	y += crtc->y;
@@ -2609,11 +2528,6 @@ static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc,

 	WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
 	WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
-	WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
-	       ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
-
-	amdgpu_crtc->cursor_x = x;
-	amdgpu_crtc->cursor_y = y;

 	return 0;
 }
@@ -2639,6 +2553,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
 				      int32_t hot_y)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = crtc->dev->dev_private;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *aobj;
 	int ret;
@@ -2677,9 +2592,6 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
 		return ret;
 	}

-	amdgpu_crtc->cursor_width = width;
-	amdgpu_crtc->cursor_height = height;
-
 	dce_v11_0_lock_cursor(crtc, true);

 	if (hot_x != amdgpu_crtc->cursor_hot_x ||
@@ -2695,6 +2607,14 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
 		amdgpu_crtc->cursor_hot_y = hot_y;
 	}

+	if (width != amdgpu_crtc->cursor_width ||
+	    height != amdgpu_crtc->cursor_height) {
+		WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+		       (width - 1) << 16 | (height - 1));
+		amdgpu_crtc->cursor_width = width;
+		amdgpu_crtc->cursor_height = height;
+	}
+
 	dce_v11_0_show_cursor(crtc);
 	dce_v11_0_lock_cursor(crtc, false);

@@ -2716,6 +2636,7 @@ unpin:
 static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = crtc->dev->dev_private;

 	if (amdgpu_crtc->cursor_bo) {
 		dce_v11_0_lock_cursor(crtc, true);
@@ -2723,6 +2644,10 @@ static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
 		dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
 					     amdgpu_crtc->cursor_y);

+		WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+		       (amdgpu_crtc->cursor_width - 1) << 16 |
+		       (amdgpu_crtc->cursor_height - 1));
+
 		dce_v11_0_show_cursor(crtc);

 		dce_v11_0_lock_cursor(crtc, false);
@@ -3605,7 +3530,7 @@ static int dce_v11_0_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs dce_v11_0_ip_funcs = {
+static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
 	.name = "dce_v11_0",
 	.early_init = dce_v11_0_early_init,
 	.late_init = NULL,
@@ -3889,7 +3814,6 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
 	.bandwidth_update = &dce_v11_0_bandwidth_update,
 	.vblank_get_counter = &dce_v11_0_vblank_get_counter,
 	.vblank_wait = &dce_v11_0_vblank_wait,
-	.is_display_hung = &dce_v11_0_is_display_hung,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.hpd_sense = &dce_v11_0_hpd_sense,
@@ -3935,3 +3859,21 @@ static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->hpd_irq.num_types = AMDGPU_HPD_LAST;
 	adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs;
 }
+
+const struct amdgpu_ip_block_version dce_v11_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 11,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &dce_v11_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v11_2_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 11,
+	.minor = 2,
+	.rev = 0,
+	.funcs = &dce_v11_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h
@@ -24,7 +24,8 @@
 #ifndef __DCE_V11_0_H__
 #define __DCE_V11_0_H__

-extern const struct amd_ip_funcs dce_v11_0_ip_funcs;
+extern const struct amdgpu_ip_block_version dce_v11_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v11_2_ip_block;

 void dce_v11_0_disable_dce(struct amdgpu_device *adev);

--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.h
@@ -24,6 +24,9 @@
 #ifndef __DCE_V6_0_H__
 #define __DCE_V6_0_H__

-extern const struct amd_ip_funcs dce_v6_0_ip_funcs;
+extern const struct amdgpu_ip_block_version dce_v6_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v6_4_ip_block;
+
+void dce_v6_0_disable_dce(struct amdgpu_device *adev);

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -31,6 +31,7 @@
 #include "atombios_encoders.h"
 #include "amdgpu_pll.h"
 #include "amdgpu_connectors.h"
+#include "dce_v8_0.h"

 #include "dce/dce_8_0_d.h"
 #include "dce/dce_8_0_sh_mask.h"
@@ -56,6 +57,16 @@ static const u32 crtc_offsets[6] =
 	CRTC5_REGISTER_OFFSET
 };

+static const u32 hpd_offsets[] =
+{
+	HPD0_REGISTER_OFFSET,
+	HPD1_REGISTER_OFFSET,
+	HPD2_REGISTER_OFFSET,
+	HPD3_REGISTER_OFFSET,
+	HPD4_REGISTER_OFFSET,
+	HPD5_REGISTER_OFFSET
+};
+
 static const uint32_t dig_offsets[] = {
 	CRTC0_REGISTER_OFFSET,
 	CRTC1_REGISTER_OFFSET,
@@ -104,15 +115,6 @@ static const struct {
 	.hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
 } };

-static const uint32_t hpd_int_control_offsets[6] = {
-	mmDC_HPD1_INT_CONTROL,
-	mmDC_HPD2_INT_CONTROL,
-	mmDC_HPD3_INT_CONTROL,
-	mmDC_HPD4_INT_CONTROL,
-	mmDC_HPD5_INT_CONTROL,
-	mmDC_HPD6_INT_CONTROL,
-};
-
 static u32 dce_v8_0_audio_endpt_rreg(struct amdgpu_device *adev,
 				     u32 block_offset, u32 reg)
 {
@@ -278,34 +280,12 @@ static bool dce_v8_0_hpd_sense(struct amdgpu_device *adev,
 {
 	bool connected = false;

-	switch (hpd) {
-	case AMDGPU_HPD_1:
-		if (RREG32(mmDC_HPD1_INT_STATUS) & DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
-			connected = true;
-		break;
-	case AMDGPU_HPD_2:
-		if (RREG32(mmDC_HPD2_INT_STATUS) & DC_HPD2_INT_STATUS__DC_HPD2_SENSE_MASK)
-			connected = true;
-		break;
-	case AMDGPU_HPD_3:
-		if (RREG32(mmDC_HPD3_INT_STATUS) & DC_HPD3_INT_STATUS__DC_HPD3_SENSE_MASK)
-			connected = true;
-		break;
-	case AMDGPU_HPD_4:
-		if (RREG32(mmDC_HPD4_INT_STATUS) & DC_HPD4_INT_STATUS__DC_HPD4_SENSE_MASK)
-			connected = true;
-		break;
-	case AMDGPU_HPD_5:
-		if (RREG32(mmDC_HPD5_INT_STATUS) & DC_HPD5_INT_STATUS__DC_HPD5_SENSE_MASK)
-			connected = true;
-		break;
-	case AMDGPU_HPD_6:
-		if (RREG32(mmDC_HPD6_INT_STATUS) & DC_HPD6_INT_STATUS__DC_HPD6_SENSE_MASK)
-			connected = true;
-		break;
-	default:
-		break;
-	}
+	if (hpd >= adev->mode_info.num_hpd)
+		return connected;
+
+	if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) &
+	    DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+		connected = true;

 	return connected;
 }
@@ -324,58 +304,15 @@ static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev,
 	u32 tmp;
 	bool connected = dce_v8_0_hpd_sense(adev, hpd);

-	switch (hpd) {
-	case AMDGPU_HPD_1:
-		tmp = RREG32(mmDC_HPD1_INT_CONTROL);
-		if (connected)
-			tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
-		else
-			tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
-		WREG32(mmDC_HPD1_INT_CONTROL, tmp);
-		break;
-	case AMDGPU_HPD_2:
-		tmp = RREG32(mmDC_HPD2_INT_CONTROL);
-		if (connected)
-			tmp &= ~DC_HPD2_INT_CONTROL__DC_HPD2_INT_POLARITY_MASK;
-		else
-			tmp |= DC_HPD2_INT_CONTROL__DC_HPD2_INT_POLARITY_MASK;
-		WREG32(mmDC_HPD2_INT_CONTROL, tmp);
-		break;
-	case AMDGPU_HPD_3:
-		tmp = RREG32(mmDC_HPD3_INT_CONTROL);
-		if (connected)
-			tmp &= ~DC_HPD3_INT_CONTROL__DC_HPD3_INT_POLARITY_MASK;
-		else
-			tmp |= DC_HPD3_INT_CONTROL__DC_HPD3_INT_POLARITY_MASK;
-		WREG32(mmDC_HPD3_INT_CONTROL, tmp);
-		break;
-	case AMDGPU_HPD_4:
-		tmp = RREG32(mmDC_HPD4_INT_CONTROL);
-		if (connected)
-			tmp &= ~DC_HPD4_INT_CONTROL__DC_HPD4_INT_POLARITY_MASK;
-		else
-			tmp |= DC_HPD4_INT_CONTROL__DC_HPD4_INT_POLARITY_MASK;
-		WREG32(mmDC_HPD4_INT_CONTROL, tmp);
-		break;
-	case AMDGPU_HPD_5:
-		tmp = RREG32(mmDC_HPD5_INT_CONTROL);
-		if (connected)
-			tmp &= ~DC_HPD5_INT_CONTROL__DC_HPD5_INT_POLARITY_MASK;
-		else
-			tmp |= DC_HPD5_INT_CONTROL__DC_HPD5_INT_POLARITY_MASK;
-		WREG32(mmDC_HPD5_INT_CONTROL, tmp);
-			break;
-	case AMDGPU_HPD_6:
-		tmp = RREG32(mmDC_HPD6_INT_CONTROL);
-		if (connected)
-			tmp &= ~DC_HPD6_INT_CONTROL__DC_HPD6_INT_POLARITY_MASK;
-		else
-			tmp |= DC_HPD6_INT_CONTROL__DC_HPD6_INT_POLARITY_MASK;
-		WREG32(mmDC_HPD6_INT_CONTROL, tmp);
-		break;
-	default:
-		break;
-	}
+	if (hpd >= adev->mode_info.num_hpd)
+		return;
+
+	tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+	if (connected)
+		tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+	else
+		tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+	WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
 }

 /**
@@ -390,35 +327,17 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
-	u32 tmp = (0x9c4 << DC_HPD1_CONTROL__DC_HPD1_CONNECTION_TIMER__SHIFT) |
-		(0xfa << DC_HPD1_CONTROL__DC_HPD1_RX_INT_TIMER__SHIFT) |
-		DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+	u32 tmp;

 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);

-		switch (amdgpu_connector->hpd.hpd) {
-		case AMDGPU_HPD_1:
-			WREG32(mmDC_HPD1_CONTROL, tmp);
-			break;
-		case AMDGPU_HPD_2:
-			WREG32(mmDC_HPD2_CONTROL, tmp);
-			break;
-		case AMDGPU_HPD_3:
-			WREG32(mmDC_HPD3_CONTROL, tmp);
-			break;
-		case AMDGPU_HPD_4:
-			WREG32(mmDC_HPD4_CONTROL, tmp);
-			break;
-		case AMDGPU_HPD_5:
-			WREG32(mmDC_HPD5_CONTROL, tmp);
-			break;
-		case AMDGPU_HPD_6:
-			WREG32(mmDC_HPD6_CONTROL, tmp);
-			break;
-		default:
-			break;
-		}
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+			continue;
+
+		tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp |= DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+		WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);

 		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
 		    connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
@@ -427,34 +346,9 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
 			 * https://bugzilla.redhat.com/show_bug.cgi?id=726143
 			 * also avoid interrupt storms during dpms.
 			 */
-			u32 dc_hpd_int_cntl_reg, dc_hpd_int_cntl;
-
-			switch (amdgpu_connector->hpd.hpd) {
-			case AMDGPU_HPD_1:
-				dc_hpd_int_cntl_reg = mmDC_HPD1_INT_CONTROL;
-				break;
-			case AMDGPU_HPD_2:
-				dc_hpd_int_cntl_reg = mmDC_HPD2_INT_CONTROL;
-				break;
-			case AMDGPU_HPD_3:
-				dc_hpd_int_cntl_reg = mmDC_HPD3_INT_CONTROL;
-				break;
-			case AMDGPU_HPD_4:
-				dc_hpd_int_cntl_reg = mmDC_HPD4_INT_CONTROL;
-				break;
-			case AMDGPU_HPD_5:
-				dc_hpd_int_cntl_reg = mmDC_HPD5_INT_CONTROL;
-				break;
-			case AMDGPU_HPD_6:
-				dc_hpd_int_cntl_reg = mmDC_HPD6_INT_CONTROL;
-				break;
-			default:
-				continue;
-			}
-
-			dc_hpd_int_cntl = RREG32(dc_hpd_int_cntl_reg);
-			dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
-			WREG32(dc_hpd_int_cntl_reg, dc_hpd_int_cntl);
+			tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+			tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+			WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
 			continue;
 		}

@@ -475,32 +369,18 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
 {
 	struct drm_device *dev = adev->ddev;
 	struct drm_connector *connector;
+	u32 tmp;

 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);

-		switch (amdgpu_connector->hpd.hpd) {
-		case AMDGPU_HPD_1:
-			WREG32(mmDC_HPD1_CONTROL, 0);
-			break;
-		case AMDGPU_HPD_2:
-			WREG32(mmDC_HPD2_CONTROL, 0);
-			break;
-		case AMDGPU_HPD_3:
-			WREG32(mmDC_HPD3_CONTROL, 0);
-			break;
-		case AMDGPU_HPD_4:
-			WREG32(mmDC_HPD4_CONTROL, 0);
-			break;
-		case AMDGPU_HPD_5:
-			WREG32(mmDC_HPD5_CONTROL, 0);
-			break;
-		case AMDGPU_HPD_6:
-			WREG32(mmDC_HPD6_CONTROL, 0);
-			break;
-		default:
-			break;
-		}
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+			continue;
+
+		tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+		WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
+
 		amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
 	}
 }
@@ -2030,7 +1910,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	u32 viewport_w, viewport_h;
 	int r;
 	bool bypass_lut = false;
-	char *format_name;
+	struct drm_format_name_buf format_name;

 	/* no fb bound */
 	if (!atomic && !crtc->primary->fb) {
@@ -2135,9 +2015,8 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 		bypass_lut = true;
 		break;
 	default:
-		format_name = drm_get_format_name(target_fb->pixel_format);
-		DRM_ERROR("Unsupported screen format %s\n", format_name);
-		kfree(format_name);
+		DRM_ERROR("Unsupported screen format %s\n",
+		          drm_get_format_name(target_fb->pixel_format, &format_name));
 		return -EINVAL;
 	}

@@ -2465,6 +2344,9 @@ static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc,
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 	int xorigin = 0, yorigin = 0;

+	amdgpu_crtc->cursor_x = x;
+	amdgpu_crtc->cursor_y = y;
+
 	/* avivo cursor are offset into the total surface */
 	x += crtc->x;
 	y += crtc->y;
@@ -2481,11 +2363,6 @@ static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc,

 	WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
 	WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
-	WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
-	       ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
-
-	amdgpu_crtc->cursor_x = x;
-	amdgpu_crtc->cursor_y = y;

 	return 0;
 }
@@ -2511,6 +2388,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
 				     int32_t hot_y)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = crtc->dev->dev_private;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *aobj;
 	int ret;
@@ -2549,9 +2427,6 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
 		return ret;
 	}

-	amdgpu_crtc->cursor_width = width;
-	amdgpu_crtc->cursor_height = height;
-
 	dce_v8_0_lock_cursor(crtc, true);

 	if (hot_x != amdgpu_crtc->cursor_hot_x ||
@@ -2563,10 +2438,20 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,

 		dce_v8_0_cursor_move_locked(crtc, x, y);

+		amdgpu_crtc->cursor_width = width;
+		amdgpu_crtc->cursor_height = height;
 		amdgpu_crtc->cursor_hot_x = hot_x;
 		amdgpu_crtc->cursor_hot_y = hot_y;
 	}

+	if (width != amdgpu_crtc->cursor_width ||
+	    height != amdgpu_crtc->cursor_height) {
+		WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+		       (width - 1) << 16 | (height - 1));
+		amdgpu_crtc->cursor_width = width;
+		amdgpu_crtc->cursor_height = height;
+	}
+
 	dce_v8_0_show_cursor(crtc);
 	dce_v8_0_lock_cursor(crtc, false);

@@ -2588,6 +2473,7 @@ unpin:
 static void dce_v8_0_cursor_reset(struct drm_crtc *crtc)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = crtc->dev->dev_private;

 	if (amdgpu_crtc->cursor_bo) {
 		dce_v8_0_lock_cursor(crtc, true);
@@ -2595,6 +2481,10 @@ static void dce_v8_0_cursor_reset(struct drm_crtc *crtc)
 		dce_v8_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
 					    amdgpu_crtc->cursor_y);

+		WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+		       (amdgpu_crtc->cursor_width - 1) << 16 |
+		       (amdgpu_crtc->cursor_height - 1));
+
 		dce_v8_0_show_cursor(crtc);

 		dce_v8_0_lock_cursor(crtc, false);
@@ -3198,42 +3088,23 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
 					    unsigned type,
 					    enum amdgpu_interrupt_state state)
 {
-	u32 dc_hpd_int_cntl_reg, dc_hpd_int_cntl;
+	u32 dc_hpd_int_cntl;

-	switch (type) {
-	case AMDGPU_HPD_1:
-		dc_hpd_int_cntl_reg = mmDC_HPD1_INT_CONTROL;
-		break;
-	case AMDGPU_HPD_2:
-		dc_hpd_int_cntl_reg = mmDC_HPD2_INT_CONTROL;
-		break;
-	case AMDGPU_HPD_3:
-		dc_hpd_int_cntl_reg = mmDC_HPD3_INT_CONTROL;
-		break;
-	case AMDGPU_HPD_4:
-		dc_hpd_int_cntl_reg = mmDC_HPD4_INT_CONTROL;
-		break;
-	case AMDGPU_HPD_5:
-		dc_hpd_int_cntl_reg = mmDC_HPD5_INT_CONTROL;
-		break;
-	case AMDGPU_HPD_6:
-		dc_hpd_int_cntl_reg = mmDC_HPD6_INT_CONTROL;
-		break;
-	default:
+	if (type >= adev->mode_info.num_hpd) {
 		DRM_DEBUG("invalid hdp %d\n", type);
 		return 0;
 	}

 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
-		dc_hpd_int_cntl = RREG32(dc_hpd_int_cntl_reg);
+		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
 		dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
-		WREG32(dc_hpd_int_cntl_reg, dc_hpd_int_cntl);
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
 		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
-		dc_hpd_int_cntl = RREG32(dc_hpd_int_cntl_reg);
+		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
 		dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
-		WREG32(dc_hpd_int_cntl_reg, dc_hpd_int_cntl);
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
 		break;
 	default:
 		break;
@@ -3406,7 +3277,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
 			    struct amdgpu_irq_src *source,
 			    struct amdgpu_iv_entry *entry)
 {
-	uint32_t disp_int, mask, int_control, tmp;
+	uint32_t disp_int, mask, tmp;
 	unsigned hpd;

 	if (entry->src_data >= adev->mode_info.num_hpd) {
@@ -3417,12 +3288,11 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
 	hpd = entry->src_data;
 	disp_int = RREG32(interrupt_status_offsets[hpd].reg);
 	mask = interrupt_status_offsets[hpd].hpd;
-	int_control = hpd_int_control_offsets[hpd];

 	if (disp_int & mask) {
-		tmp = RREG32(int_control);
+		tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
 		tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
-		WREG32(int_control, tmp);
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
 		schedule_work(&adev->hotplug_work);
 		DRM_DEBUG("IH: HPD%d\n", hpd + 1);
 	}
@@ -3443,7 +3313,7 @@ static int dce_v8_0_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs dce_v8_0_ip_funcs = {
+static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
 	.name = "dce_v8_0",
 	.early_init = dce_v8_0_early_init,
 	.late_init = NULL,
@@ -3727,7 +3597,6 @@ static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
 	.bandwidth_update = &dce_v8_0_bandwidth_update,
 	.vblank_get_counter = &dce_v8_0_vblank_get_counter,
 	.vblank_wait = &dce_v8_0_vblank_wait,
-	.is_display_hung = &dce_v8_0_is_display_hung,
 	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
 	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
 	.hpd_sense = &dce_v8_0_hpd_sense,
@@ -3773,3 +3642,48 @@ static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->hpd_irq.num_types = AMDGPU_HPD_LAST;
 	adev->hpd_irq.funcs = &dce_v8_0_hpd_irq_funcs;
 }
+
+const struct amdgpu_ip_block_version dce_v8_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_1_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_2_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 2,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_3_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 3,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_5_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 5,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
@@ -24,7 +24,11 @@
 #ifndef __DCE_V8_0_H__
 #define __DCE_V8_0_H__

-extern const struct amd_ip_funcs dce_v8_0_ip_funcs;
+extern const struct amdgpu_ip_block_version dce_v8_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_1_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_2_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_3_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_5_ip_block;

 void dce_v8_0_disable_dce(struct amdgpu_device *adev);

--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -27,6 +27,9 @@
 #include "atom.h"
 #include "amdgpu_pll.h"
 #include "amdgpu_connectors.h"
+#ifdef CONFIG_DRM_AMDGPU_SI
+#include "dce_v6_0.h"
+#endif
 #ifdef CONFIG_DRM_AMDGPU_CIK
 #include "dce_v8_0.h"
 #endif
@@ -34,11 +37,13 @@
 #include "dce_v11_0.h"
 #include "dce_virtual.h"

+#define DCE_VIRTUAL_VBLANK_PERIOD 16666666
+
+
 static void dce_virtual_set_display_funcs(struct amdgpu_device *adev);
 static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev);
-static int dce_virtual_pageflip_irq(struct amdgpu_device *adev,
-				  struct amdgpu_irq_src *source,
-				  struct amdgpu_iv_entry *entry);
+static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
+					      int index);

 /**
 * dce_virtual_vblank_wait - vblank wait asic callback.
@@ -90,15 +95,18 @@ static u32 dce_virtual_hpd_get_gpio_reg(struct amdgpu_device *adev)
 	return 0;
 }

-static bool dce_virtual_is_display_hung(struct amdgpu_device *adev)
-{
-	return false;
-}
-
 static void dce_virtual_stop_mc_access(struct amdgpu_device *adev,
 			      struct amdgpu_mode_mc_save *save)
 {
 	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+		dce_v6_0_disable_dce(adev);
+		break;
+#endif
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_BONAIRE:
 	case CHIP_HAWAII:
@@ -119,6 +127,9 @@ static void dce_virtual_stop_mc_access(struct amdgpu_device *adev,
 		dce_v11_0_disable_dce(adev);
 		break;
 	case CHIP_TOPAZ:
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_HAINAN:
+#endif
 		/* no DCE */
 		return;
 	default:
@@ -195,16 +206,15 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
 		amdgpu_crtc->enabled = true;
-		/* Make sure VBLANK and PFLIP interrupts are still enabled */
+		/* Make sure VBLANK interrupts are still enabled */
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
-		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
+		drm_crtc_vblank_on(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
+		drm_crtc_vblank_off(crtc);
 		amdgpu_crtc->enabled = false;
 		break;
 	}
@@ -264,24 +274,6 @@ static bool dce_virtual_crtc_mode_fixup(struct drm_crtc *crtc,
 				     const struct drm_display_mode *mode,
 				     struct drm_display_mode *adjusted_mode)
 {
-	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-	struct drm_device *dev = crtc->dev;
-	struct drm_encoder *encoder;
-
-	/* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		if (encoder->crtc == crtc) {
-			amdgpu_crtc->encoder = encoder;
-			amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
-			break;
-		}
-	}
-	if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
-		amdgpu_crtc->encoder = NULL;
-		amdgpu_crtc->connector = NULL;
-		return false;
-	}
-
 	return true;
 }

@@ -341,6 +333,7 @@ static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index)
 	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
 	amdgpu_crtc->encoder = NULL;
 	amdgpu_crtc->connector = NULL;
+	amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
 	drm_crtc_helper_add(&amdgpu_crtc->base, &dce_virtual_crtc_helper_funcs);

 	return 0;
@@ -350,48 +343,121 @@ static int dce_virtual_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-	adev->mode_info.vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
 	dce_virtual_set_display_funcs(adev);
 	dce_virtual_set_irq_funcs(adev);

-	adev->mode_info.num_crtc = 1;
 	adev->mode_info.num_hpd = 1;
 	adev->mode_info.num_dig = 1;
 	return 0;
 }

-static bool dce_virtual_get_connector_info(struct amdgpu_device *adev)
+static struct drm_encoder *
+dce_virtual_encoder(struct drm_connector *connector)
 {
-	struct amdgpu_i2c_bus_rec ddc_bus;
-	struct amdgpu_router router;
-	struct amdgpu_hpd hpd;
+	int enc_id = connector->encoder_ids[0];
+	struct drm_encoder *encoder;
+	int i;

-	/* look up gpio for ddc, hpd */
-	ddc_bus.valid = false;
-	hpd.hpd = AMDGPU_HPD_NONE;
-	/* needed for aux chan transactions */
-	ddc_bus.hpd = hpd.hpd;
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == 0)
+			break;

-	memset(&router, 0, sizeof(router));
-	router.ddc_valid = false;
-	router.cd_valid = false;
-	amdgpu_display_add_connector(adev,
-				      0,
-				      ATOM_DEVICE_CRT1_SUPPORT,
-				      DRM_MODE_CONNECTOR_VIRTUAL, &ddc_bus,
-				      CONNECTOR_OBJECT_ID_VIRTUAL,
-				      &hpd,
-				      &router);
+		encoder = drm_encoder_find(connector->dev, connector->encoder_ids[i]);
+		if (!encoder)
+			continue;

-	amdgpu_display_add_encoder(adev, ENCODER_VIRTUAL_ENUM_VIRTUAL,
-							ATOM_DEVICE_CRT1_SUPPORT,
-							0);
+		if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL)
+			return encoder;
+	}

-	amdgpu_link_encoder_connector(adev->ddev);
-
-	return true;
+	/* pick the first one */
+	if (enc_id)
+		return drm_encoder_find(connector->dev, enc_id);
+	return NULL;
 }

+static int dce_virtual_get_modes(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode = NULL;
+	unsigned i;
+	static const struct mode_size {
+		int w;
+		int h;
+	} common_modes[17] = {
+		{ 640,  480},
+		{ 720,  480},
+		{ 800,  600},
+		{ 848,  480},
+		{1024,  768},
+		{1152,  768},
+		{1280,  720},
+		{1280,  800},
+		{1280,  854},
+		{1280,  960},
+		{1280, 1024},
+		{1440,  900},
+		{1400, 1050},
+		{1680, 1050},
+		{1600, 1200},
+		{1920, 1080},
+		{1920, 1200}
+	};
+
+	for (i = 0; i < 17; i++) {
+		mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+		drm_mode_probed_add(connector, mode);
+	}
+
+	return 0;
+}
+
+static int dce_virtual_mode_valid(struct drm_connector *connector,
+				  struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static int
+dce_virtual_dpms(struct drm_connector *connector, int mode)
+{
+	return 0;
+}
+
+static int
+dce_virtual_set_property(struct drm_connector *connector,
+			 struct drm_property *property,
+			 uint64_t val)
+{
+	return 0;
+}
+
+static void dce_virtual_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
+
+static void dce_virtual_force(struct drm_connector *connector)
+{
+	return;
+}
+
+static const struct drm_connector_helper_funcs dce_virtual_connector_helper_funcs = {
+	.get_modes = dce_virtual_get_modes,
+	.mode_valid = dce_virtual_mode_valid,
+	.best_encoder = dce_virtual_encoder,
+};
+
+static const struct drm_connector_funcs dce_virtual_connector_funcs = {
+	.dpms = dce_virtual_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.set_property = dce_virtual_set_property,
+	.destroy = dce_virtual_destroy,
+	.force = dce_virtual_force,
+};
+
 static int dce_virtual_sw_init(void *handle)
 {
 	int r, i;
@@ -420,16 +486,16 @@ static int dce_virtual_sw_init(void *handle)
 	adev->ddev->mode_config.max_width = 16384;
 	adev->ddev->mode_config.max_height = 16384;

-	/* allocate crtcs */
+	/* allocate crtcs, encoders, connectors */
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		r = dce_virtual_crtc_init(adev, i);
 		if (r)
 			return r;
+		r = dce_virtual_connector_encoder_init(adev, i);
+		if (r)
+			return r;
 	}

-	dce_virtual_get_connector_info(adev);
-	amdgpu_print_display_setup(adev->ddev);
-
 	drm_kms_helper_poll_init(adev->ddev);

 	adev->mode_info.mode_config_initialized = true;
@@ -496,7 +562,7 @@ static int dce_virtual_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs dce_virtual_ip_funcs = {
+static const struct amd_ip_funcs dce_virtual_ip_funcs = {
 	.name = "dce_virtual",
 	.early_init = dce_virtual_early_init,
 	.late_init = NULL,
@@ -526,8 +592,8 @@ static void dce_virtual_encoder_commit(struct drm_encoder *encoder)

 static void
 dce_virtual_encoder_mode_set(struct drm_encoder *encoder,
-		      struct drm_display_mode *mode,
-		      struct drm_display_mode *adjusted_mode)
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adjusted_mode)
 {
 	return;
 }
@@ -547,10 +613,6 @@ static bool dce_virtual_encoder_mode_fixup(struct drm_encoder *encoder,
 				    const struct drm_display_mode *mode,
 				    struct drm_display_mode *adjusted_mode)
 {
-
-	/* set the active encoder to connector routing */
-	amdgpu_encoder_set_active_device(encoder);
-
 	return true;
 }

@@ -576,45 +638,40 @@ static const struct drm_encoder_funcs dce_virtual_encoder_funcs = {
 	.destroy = dce_virtual_encoder_destroy,
 };

-static void dce_virtual_encoder_add(struct amdgpu_device *adev,
-				 uint32_t encoder_enum,
-				 uint32_t supported_device,
-				 u16 caps)
+static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
+					      int index)
 {
-	struct drm_device *dev = adev->ddev;
 	struct drm_encoder *encoder;
-	struct amdgpu_encoder *amdgpu_encoder;
+	struct drm_connector *connector;

-	/* see if we already added it */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		amdgpu_encoder = to_amdgpu_encoder(encoder);
-		if (amdgpu_encoder->encoder_enum == encoder_enum) {
-			amdgpu_encoder->devices |= supported_device;
-			return;
-		}
+	/* add a new encoder */
+	encoder = kzalloc(sizeof(struct drm_encoder), GFP_KERNEL);
+	if (!encoder)
+		return -ENOMEM;
+	encoder->possible_crtcs = 1 << index;
+	drm_encoder_init(adev->ddev, encoder, &dce_virtual_encoder_funcs,
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
+	drm_encoder_helper_add(encoder, &dce_virtual_encoder_helper_funcs);

+	connector = kzalloc(sizeof(struct drm_connector), GFP_KERNEL);
+	if (!connector) {
+		kfree(encoder);
+		return -ENOMEM;
 	}

-	/* add a new one */
-	amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
-	if (!amdgpu_encoder)
-		return;
+	/* add a new connector */
+	drm_connector_init(adev->ddev, connector, &dce_virtual_connector_funcs,
+			   DRM_MODE_CONNECTOR_VIRTUAL);
+	drm_connector_helper_add(connector, &dce_virtual_connector_helper_funcs);
+	connector->display_info.subpixel_order = SubPixelHorizontalRGB;
+	connector->interlace_allowed = false;
+	connector->doublescan_allowed = false;
+	drm_connector_register(connector);

-	encoder = &amdgpu_encoder->base;
-	encoder->possible_crtcs = 0x1;
-	amdgpu_encoder->enc_priv = NULL;
-	amdgpu_encoder->encoder_enum = encoder_enum;
-	amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
-	amdgpu_encoder->devices = supported_device;
-	amdgpu_encoder->rmx_type = RMX_OFF;
-	amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
-	amdgpu_encoder->is_ext_encoder = false;
-	amdgpu_encoder->caps = caps;
+	/* link them */
+	drm_mode_connector_attach_encoder(connector, encoder);

-	drm_encoder_init(dev, encoder, &dce_virtual_encoder_funcs,
-					 DRM_MODE_ENCODER_VIRTUAL, NULL);
-	drm_encoder_helper_add(encoder, &dce_virtual_encoder_helper_funcs);
-	DRM_INFO("[FM]encoder: %d is VIRTUAL\n", amdgpu_encoder->encoder_id);
+	return 0;
 }

 static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
@@ -622,7 +679,6 @@ static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
 	.bandwidth_update = &dce_virtual_bandwidth_update,
 	.vblank_get_counter = &dce_virtual_vblank_get_counter,
 	.vblank_wait = &dce_virtual_vblank_wait,
-	.is_display_hung = &dce_virtual_is_display_hung,
 	.backlight_set_level = NULL,
 	.backlight_get_level = NULL,
 	.hpd_sense = &dce_virtual_hpd_sense,
@@ -630,8 +686,8 @@ static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
 	.hpd_get_gpio_reg = &dce_virtual_hpd_get_gpio_reg,
 	.page_flip = &dce_virtual_page_flip,
 	.page_flip_get_scanoutpos = &dce_virtual_crtc_get_scanoutpos,
-	.add_encoder = &dce_virtual_encoder_add,
-	.add_connector = &amdgpu_connector_add,
+	.add_encoder = NULL,
+	.add_connector = NULL,
 	.stop_mc_access = &dce_virtual_stop_mc_access,
 	.resume_mc_access = &dce_virtual_resume_mc_access,
 };
@@ -642,107 +698,13 @@ static void dce_virtual_set_display_funcs(struct amdgpu_device *adev)
 		adev->mode_info.funcs = &dce_virtual_display_funcs;
 }

-static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer)
-{
-	struct amdgpu_mode_info *mode_info = container_of(vblank_timer, struct amdgpu_mode_info ,vblank_timer);
-	struct amdgpu_device *adev = container_of(mode_info, struct amdgpu_device ,mode_info);
-	unsigned crtc = 0;
-	drm_handle_vblank(adev->ddev, crtc);
-	dce_virtual_pageflip_irq(adev, NULL, NULL);
-	hrtimer_start(vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD), HRTIMER_MODE_REL);
-	return HRTIMER_NORESTART;
-}
-
-static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
-						     int crtc,
-						     enum amdgpu_interrupt_state state)
-{
-	if (crtc >= adev->mode_info.num_crtc) {
-		DRM_DEBUG("invalid crtc %d\n", crtc);
-		return;
-	}
-
-	if (state && !adev->mode_info.vsync_timer_enabled) {
-		DRM_DEBUG("Enable software vsync timer\n");
-		hrtimer_init(&adev->mode_info.vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-		hrtimer_set_expires(&adev->mode_info.vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD));
-		adev->mode_info.vblank_timer.function = dce_virtual_vblank_timer_handle;
-		hrtimer_start(&adev->mode_info.vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD), HRTIMER_MODE_REL);
-	} else if (!state && adev->mode_info.vsync_timer_enabled) {
-		DRM_DEBUG("Disable software vsync timer\n");
-		hrtimer_cancel(&adev->mode_info.vblank_timer);
-	}
-
-	adev->mode_info.vsync_timer_enabled = state;
-	DRM_DEBUG("[FM]set crtc %d vblank interrupt state %d\n", crtc, state);
-}
-
-
-static int dce_virtual_set_crtc_irq_state(struct amdgpu_device *adev,
-                                       struct amdgpu_irq_src *source,
-                                       unsigned type,
-                                       enum amdgpu_interrupt_state state)
-{
-	switch (type) {
-	case AMDGPU_CRTC_IRQ_VBLANK1:
-		dce_virtual_set_crtc_vblank_interrupt_state(adev, 0, state);
-		break;
-	default:
-		break;
-	}
-	return 0;
-}
-
-static void dce_virtual_crtc_vblank_int_ack(struct amdgpu_device *adev,
-					  int crtc)
-{
-	if (crtc >= adev->mode_info.num_crtc) {
-		DRM_DEBUG("invalid crtc %d\n", crtc);
-		return;
-	}
-}
-
-static int dce_virtual_crtc_irq(struct amdgpu_device *adev,
-			      struct amdgpu_irq_src *source,
-			      struct amdgpu_iv_entry *entry)
-{
-	unsigned crtc = 0;
-	unsigned irq_type = AMDGPU_CRTC_IRQ_VBLANK1;
-
-	dce_virtual_crtc_vblank_int_ack(adev, crtc);
-
-	if (amdgpu_irq_enabled(adev, source, irq_type)) {
-		drm_handle_vblank(adev->ddev, crtc);
-	}
-	dce_virtual_pageflip_irq(adev, NULL, NULL);
-	DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
-	return 0;
-}
-
-static int dce_virtual_set_pageflip_irq_state(struct amdgpu_device *adev,
-					    struct amdgpu_irq_src *src,
-					    unsigned type,
-					    enum amdgpu_interrupt_state state)
-{
-	if (type >= adev->mode_info.num_crtc) {
-		DRM_ERROR("invalid pageflip crtc %d\n", type);
-		return -EINVAL;
-	}
-	DRM_DEBUG("[FM]set pageflip irq type %d state %d\n", type, state);
-
-	return 0;
-}
-
-static int dce_virtual_pageflip_irq(struct amdgpu_device *adev,
-				  struct amdgpu_irq_src *source,
-				  struct amdgpu_iv_entry *entry)
+static int dce_virtual_pageflip(struct amdgpu_device *adev,
+				unsigned crtc_id)
 {
 	unsigned long flags;
-	unsigned crtc_id = 0;
 	struct amdgpu_crtc *amdgpu_crtc;
 	struct amdgpu_flip_work *works;

-	crtc_id = 0;
 	amdgpu_crtc = adev->mode_info.crtcs[crtc_id];

 	if (crtc_id >= adev->mode_info.num_crtc) {
@@ -781,22 +743,79 @@ static int dce_virtual_pageflip_irq(struct amdgpu_device *adev,
 	return 0;
 }

+static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer)
+{
+	struct amdgpu_crtc *amdgpu_crtc = container_of(vblank_timer,
+						       struct amdgpu_crtc, vblank_timer);
+	struct drm_device *ddev = amdgpu_crtc->base.dev;
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	drm_handle_vblank(ddev, amdgpu_crtc->crtc_id);
+	dce_virtual_pageflip(adev, amdgpu_crtc->crtc_id);
+	hrtimer_start(vblank_timer, ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD),
+		      HRTIMER_MODE_REL);
+
+	return HRTIMER_NORESTART;
+}
+
+static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
+							int crtc,
+							enum amdgpu_interrupt_state state)
+{
+	if (crtc >= adev->mode_info.num_crtc) {
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	if (state && !adev->mode_info.crtcs[crtc]->vsync_timer_enabled) {
+		DRM_DEBUG("Enable software vsync timer\n");
+		hrtimer_init(&adev->mode_info.crtcs[crtc]->vblank_timer,
+			     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_set_expires(&adev->mode_info.crtcs[crtc]->vblank_timer,
+				    ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD));
+		adev->mode_info.crtcs[crtc]->vblank_timer.function =
+			dce_virtual_vblank_timer_handle;
+		hrtimer_start(&adev->mode_info.crtcs[crtc]->vblank_timer,
+			      ktime_set(0, DCE_VIRTUAL_VBLANK_PERIOD), HRTIMER_MODE_REL);
+	} else if (!state && adev->mode_info.crtcs[crtc]->vsync_timer_enabled) {
+		DRM_DEBUG("Disable software vsync timer\n");
+		hrtimer_cancel(&adev->mode_info.crtcs[crtc]->vblank_timer);
+	}
+
+	adev->mode_info.crtcs[crtc]->vsync_timer_enabled = state;
+	DRM_DEBUG("[FM]set crtc %d vblank interrupt state %d\n", crtc, state);
+}
+
+
+static int dce_virtual_set_crtc_irq_state(struct amdgpu_device *adev,
+					  struct amdgpu_irq_src *source,
+					  unsigned type,
+					  enum amdgpu_interrupt_state state)
+{
+	if (type > AMDGPU_CRTC_IRQ_VBLANK6)
+		return -EINVAL;
+
+	dce_virtual_set_crtc_vblank_interrupt_state(adev, type, state);
+
+	return 0;
+}
+
 static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = {
 	.set = dce_virtual_set_crtc_irq_state,
-	.process = dce_virtual_crtc_irq,
-};
-
-static const struct amdgpu_irq_src_funcs dce_virtual_pageflip_irq_funcs = {
-	.set = dce_virtual_set_pageflip_irq_state,
-	.process = dce_virtual_pageflip_irq,
+	.process = NULL,
 };

 static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev)
 {
 	adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST;
 	adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs;
-
-	adev->pageflip_irq.num_types = AMDGPU_PAGEFLIP_IRQ_LAST;
-	adev->pageflip_irq.funcs = &dce_virtual_pageflip_irq_funcs;
 }

+const struct amdgpu_ip_block_version dce_virtual_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 1,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &dce_virtual_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.h
@@ -24,8 +24,7 @@
 #ifndef __DCE_VIRTUAL_H__
 #define __DCE_VIRTUAL_H__

-extern const struct amd_ip_funcs dce_virtual_ip_funcs;
-#define DCE_VIRTUAL_VBLANK_PERIOD 16666666
+extern const struct amdgpu_ip_block_version dce_virtual_ip_block;

 #endif

--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.h
@@ -24,6 +24,6 @@
 #ifndef __GFX_V6_0_H__
 #define __GFX_V6_0_H__

-extern const struct amd_ip_funcs gfx_v6_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gfx_v6_0_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2077,9 +2077,9 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	u32 ref_and_mask;
-	int usepfp = ring->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;
+	int usepfp = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ? 0 : 1;

-	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
+	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 		switch (ring->me) {
 		case 1:
 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
@@ -2105,6 +2105,18 @@ static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0x20); /* poll interval */
 }

+static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
+		EVENT_INDEX(4));
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
+		EVENT_INDEX(0));
+}
+
+
 /**
 * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
 *
@@ -2260,6 +2272,7 @@ static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)

 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+		gfx_v7_0_ring_emit_vgt_flush(ring);
 		/* set load_global_config & load_global_uconfig */
 		dw2 |= 0x8001;
 		/* set load_cs_sh_regs */
@@ -2286,7 +2299,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
 	long r;
@@ -2312,7 +2325,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err2;

-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -2333,7 +2346,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)

 err2:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
@@ -3222,7 +3235,7 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
 */
 static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 {
-	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t seq = ring->fence_drv.sync_seq;
 	uint64_t addr = ring->fence_drv.gpu_addr;

@@ -3262,7 +3275,7 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
-	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);

 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -3391,7 +3404,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
 		if (adev->gfx.rlc.save_restore_obj == NULL) {
 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
-					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 					     NULL, NULL,
 					     &adev->gfx.rlc.save_restore_obj);
 			if (r) {
@@ -3435,7 +3449,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
 		if (adev->gfx.rlc.clear_state_obj == NULL) {
 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
-					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 					     NULL, NULL,
 					     &adev->gfx.rlc.clear_state_obj);
 			if (r) {
@@ -3475,7 +3490,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
 		if (adev->gfx.rlc.cp_table_obj == NULL) {
 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
-					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 					     NULL, NULL,
 					     &adev->gfx.rlc.cp_table_obj);
 			if (r) {
@@ -4354,44 +4370,69 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
 }

-static unsigned gfx_v7_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
 {
-	return
-		4; /* gfx_v7_0_ring_emit_ib_gfx */
+	WREG32(mmSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+		(address << SQ_IND_INDEX__INDEX__SHIFT) |
+		(SQ_IND_INDEX__FORCE_READ_MASK));
+	return RREG32(mmSQ_IND_DATA);
 }

-static unsigned gfx_v7_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+			   uint32_t wave, uint32_t thread,
+			   uint32_t regno, uint32_t num, uint32_t *out)
 {
-	return
-		20 + /* gfx_v7_0_ring_emit_gds_switch */
-		7 + /* gfx_v7_0_ring_emit_hdp_flush */
-		5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
-		12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
-		7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
-		17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
-		3; /* gfx_v7_ring_emit_cntxcntl */
+	WREG32(mmSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
+		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+		(SQ_IND_INDEX__FORCE_READ_MASK) |
+		(SQ_IND_INDEX__AUTO_INCR_MASK));
+	while (num--)
+		*(out++) = RREG32(mmSQ_IND_DATA);
 }

-static unsigned gfx_v7_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
+static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
 {
-	return
-		4; /* gfx_v7_0_ring_emit_ib_compute */
+	/* type 0 wave data */
+	dst[(*no_fields)++] = 0;
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
 }

-static unsigned gfx_v7_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
+static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+				     uint32_t wave, uint32_t start,
+				     uint32_t size, uint32_t *dst)
 {
-	return
-		20 + /* gfx_v7_0_ring_emit_gds_switch */
-		7 + /* gfx_v7_0_ring_emit_hdp_flush */
-		5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
-		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
-		17 + /* gfx_v7_0_ring_emit_vm_flush */
-		7 + 7 + 7; /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+	wave_read_regs(
+		adev, simd, wave, 0,
+		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
 }

 static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
 	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
 	.select_se_sh = &gfx_v7_0_select_se_sh,
+	.read_wave_data = &gfx_v7_0_read_wave_data,
+	.read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
 };

 static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
@@ -4643,9 +4684,7 @@ static int gfx_v7_0_sw_init(void *handle)
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "gfx");
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
-				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
-				     AMDGPU_RING_TYPE_GFX);
+				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
 		if (r)
 			return r;
 	}
@@ -4670,9 +4709,7 @@ static int gfx_v7_0_sw_init(void *handle)
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
-				     &adev->gfx.eop_irq, irq_type,
-				     AMDGPU_RING_TYPE_COMPUTE);
+				     &adev->gfx.eop_irq, irq_type);
 		if (r)
 			return r;
 	}
@@ -5123,7 +5160,7 @@ static int gfx_v7_0_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
+static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
 	.name = "gfx_v7_0",
 	.early_init = gfx_v7_0_early_init,
 	.late_init = gfx_v7_0_late_init,
@@ -5141,10 +5178,21 @@ const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
 };

 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
+	.type = AMDGPU_RING_TYPE_GFX,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.get_rptr = gfx_v7_0_ring_get_rptr,
 	.get_wptr = gfx_v7_0_ring_get_wptr_gfx,
 	.set_wptr = gfx_v7_0_ring_set_wptr_gfx,
-	.parse_cs = NULL,
+	.emit_frame_size =
+		20 + /* gfx_v7_0_ring_emit_gds_switch */
+		7 + /* gfx_v7_0_ring_emit_hdp_flush */
+		5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+		12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
+		7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
+		17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
+		3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+	.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
 	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
@@ -5157,15 +5205,23 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
-	.get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_gfx,
-	.get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_gfx,
 };

 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
+	.type = AMDGPU_RING_TYPE_COMPUTE,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.get_rptr = gfx_v7_0_ring_get_rptr,
 	.get_wptr = gfx_v7_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v7_0_ring_set_wptr_compute,
-	.parse_cs = NULL,
+	.emit_frame_size =
+		20 + /* gfx_v7_0_ring_emit_gds_switch */
+		7 + /* gfx_v7_0_ring_emit_hdp_flush */
+		5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
+		17 + /* gfx_v7_0_ring_emit_vm_flush */
+		7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+	.emit_ib_size =	4, /* gfx_v7_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
 	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
@@ -5177,8 +5233,6 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_compute,
-	.get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_compute,
 };

 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5289,3 +5343,39 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
 }
+
+const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 7,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &gfx_v7_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 7,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &gfx_v7_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 7,
+	.minor = 2,
+	.rev = 0,
+	.funcs = &gfx_v7_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 7,
+	.minor = 3,
+	.rev = 0,
+	.funcs = &gfx_v7_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -24,6 +24,9 @@
 #ifndef __GFX_V7_0_H__
 #define __GFX_V7_0_H__

-extern const struct amd_ip_funcs gfx_v7_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gfx_v7_0_ip_block;
+extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
+extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
+extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -25,6 +25,7 @@
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "vi.h"
+#include "vi_structs.h"
 #include "vid.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_atombios.h"
@@ -167,6 +168,7 @@ static const u32 golden_settings_tonga_a11[] =
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
@@ -797,7 +799,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
 	long r;
@@ -823,7 +825,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err2;

-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out.\n");
 		r = -ETIMEDOUT;
@@ -843,7 +845,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	}
 err2:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
@@ -1057,6 +1059,19 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 		adev->firmware.fw_size +=
 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);

+		/* we need account JT in */
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
+
+		if (amdgpu_sriov_vf(adev)) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
+			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
+			info->fw = adev->gfx.mec_fw;
+			adev->firmware.fw_size +=
+				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
+		}
+
 		if (adev->gfx.mec2_fw) {
 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
@@ -1126,34 +1141,8 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
 			PACKET3_SET_CONTEXT_REG_START);
-	switch (adev->asic_type) {
-	case CHIP_TONGA:
-	case CHIP_POLARIS10:
-		buffer[count++] = cpu_to_le32(0x16000012);
-		buffer[count++] = cpu_to_le32(0x0000002A);
-		break;
-	case CHIP_POLARIS11:
-		buffer[count++] = cpu_to_le32(0x16000012);
-		buffer[count++] = cpu_to_le32(0x00000000);
-		break;
-	case CHIP_FIJI:
-		buffer[count++] = cpu_to_le32(0x3a00161a);
-		buffer[count++] = cpu_to_le32(0x0000002e);
-		break;
-	case CHIP_TOPAZ:
-	case CHIP_CARRIZO:
-		buffer[count++] = cpu_to_le32(0x00000002);
-		buffer[count++] = cpu_to_le32(0x00000000);
-		break;
-	case CHIP_STONEY:
-		buffer[count++] = cpu_to_le32(0x00000000);
-		buffer[count++] = cpu_to_le32(0x00000000);
-		break;
-	default:
-		buffer[count++] = cpu_to_le32(0x00000000);
-		buffer[count++] = cpu_to_le32(0x00000000);
-		break;
-	}
+	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);

 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
@@ -1272,7 +1261,8 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
 		if (adev->gfx.rlc.clear_state_obj == NULL) {
 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
-					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 					     NULL, NULL,
 					     &adev->gfx.rlc.clear_state_obj);
 			if (r) {
@@ -1314,7 +1304,8 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
 		if (adev->gfx.rlc.cp_table_obj == NULL) {
 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_VRAM,
-					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
 					     NULL, NULL,
 					     &adev->gfx.rlc.cp_table_obj);
 			if (r) {
@@ -1382,7 +1373,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)

 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
@@ -1411,7 +1402,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 		return r;
 	}

-	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
+	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);

 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -1574,7 +1565,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	int r, i;
 	u32 tmp;
 	unsigned total_size, vgpr_offset, sgpr_offset;
@@ -1707,7 +1698,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	}

 	/* wait for the GPU to finish processing the IB */
-	r = fence_wait(f, false);
+	r = dma_fence_wait(f, false);
 	if (r) {
 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
 		goto fail;
@@ -1728,7 +1719,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)

 fail:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);

 	return r;
 }
@@ -2044,10 +2035,8 @@ static int gfx_v8_0_sw_init(void *handle)
 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
 		}

-		r = amdgpu_ring_init(adev, ring, 1024,
-				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
-				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
-				     AMDGPU_RING_TYPE_GFX);
+		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+				     AMDGPU_CP_IRQ_GFX_EOP);
 		if (r)
 			return r;
 	}
@@ -2071,10 +2060,8 @@ static int gfx_v8_0_sw_init(void *handle)
 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024,
-				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
-				     &adev->gfx.eop_irq, irq_type,
-				     AMDGPU_RING_TYPE_COMPUTE);
+		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+				     irq_type);
 		if (r)
 			return r;
 	}
@@ -3678,6 +3665,21 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
 							num_rb_pipes);
 	}

+	/* cache the values for userspace */
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+			adev->gfx.config.rb_config[i][j].rb_backend_disable =
+				RREG32(mmCC_RB_BACKEND_DISABLE);
+			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
+				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+			adev->gfx.config.rb_config[i][j].raster_config =
+				RREG32(mmPA_SC_RASTER_CONFIG);
+			adev->gfx.config.rb_config[i][j].raster_config_1 =
+				RREG32(mmPA_SC_RASTER_CONFIG_1);
+		}
+	}
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 }

@@ -3904,7 +3906,7 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
 	int list_size;
 	unsigned int *register_list_format =
 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
-	if (register_list_format == NULL)
+	if (!register_list_format)
 		return -ENOMEM;
 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
 			adev->gfx.rlc.reg_list_format_size_bytes);
@@ -4330,7 +4332,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 	struct amdgpu_ring *ring;
 	u32 tmp;
 	u32 rb_bufsz;
-	u64 rb_addr, rptr_addr;
+	u64 rb_addr, rptr_addr, wptr_gpu_addr;
 	int r;

 	/* Set the write pointer delay */
@@ -4361,6 +4363,9 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);

+	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
+	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
 	mdelay(1);
 	WREG32(mmCP_RB0_CNTL, tmp);

@@ -4466,267 +4471,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 	return 0;
 }

-struct vi_mqd {
-	uint32_t header;  /* ordinal0 */
-	uint32_t compute_dispatch_initiator;  /* ordinal1 */
-	uint32_t compute_dim_x;  /* ordinal2 */
-	uint32_t compute_dim_y;  /* ordinal3 */
-	uint32_t compute_dim_z;  /* ordinal4 */
-	uint32_t compute_start_x;  /* ordinal5 */
-	uint32_t compute_start_y;  /* ordinal6 */
-	uint32_t compute_start_z;  /* ordinal7 */
-	uint32_t compute_num_thread_x;  /* ordinal8 */
-	uint32_t compute_num_thread_y;  /* ordinal9 */
-	uint32_t compute_num_thread_z;  /* ordinal10 */
-	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
-	uint32_t compute_perfcount_enable;  /* ordinal12 */
-	uint32_t compute_pgm_lo;  /* ordinal13 */
-	uint32_t compute_pgm_hi;  /* ordinal14 */
-	uint32_t compute_tba_lo;  /* ordinal15 */
-	uint32_t compute_tba_hi;  /* ordinal16 */
-	uint32_t compute_tma_lo;  /* ordinal17 */
-	uint32_t compute_tma_hi;  /* ordinal18 */
-	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
-	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
-	uint32_t compute_vmid;  /* ordinal21 */
-	uint32_t compute_resource_limits;  /* ordinal22 */
-	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
-	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
-	uint32_t compute_tmpring_size;  /* ordinal25 */
-	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
-	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
-	uint32_t compute_restart_x;  /* ordinal28 */
-	uint32_t compute_restart_y;  /* ordinal29 */
-	uint32_t compute_restart_z;  /* ordinal30 */
-	uint32_t compute_thread_trace_enable;  /* ordinal31 */
-	uint32_t compute_misc_reserved;  /* ordinal32 */
-	uint32_t compute_dispatch_id;  /* ordinal33 */
-	uint32_t compute_threadgroup_id;  /* ordinal34 */
-	uint32_t compute_relaunch;  /* ordinal35 */
-	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
-	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
-	uint32_t compute_wave_restore_control;  /* ordinal38 */
-	uint32_t reserved9;  /* ordinal39 */
-	uint32_t reserved10;  /* ordinal40 */
-	uint32_t reserved11;  /* ordinal41 */
-	uint32_t reserved12;  /* ordinal42 */
-	uint32_t reserved13;  /* ordinal43 */
-	uint32_t reserved14;  /* ordinal44 */
-	uint32_t reserved15;  /* ordinal45 */
-	uint32_t reserved16;  /* ordinal46 */
-	uint32_t reserved17;  /* ordinal47 */
-	uint32_t reserved18;  /* ordinal48 */
-	uint32_t reserved19;  /* ordinal49 */
-	uint32_t reserved20;  /* ordinal50 */
-	uint32_t reserved21;  /* ordinal51 */
-	uint32_t reserved22;  /* ordinal52 */
-	uint32_t reserved23;  /* ordinal53 */
-	uint32_t reserved24;  /* ordinal54 */
-	uint32_t reserved25;  /* ordinal55 */
-	uint32_t reserved26;  /* ordinal56 */
-	uint32_t reserved27;  /* ordinal57 */
-	uint32_t reserved28;  /* ordinal58 */
-	uint32_t reserved29;  /* ordinal59 */
-	uint32_t reserved30;  /* ordinal60 */
-	uint32_t reserved31;  /* ordinal61 */
-	uint32_t reserved32;  /* ordinal62 */
-	uint32_t reserved33;  /* ordinal63 */
-	uint32_t reserved34;  /* ordinal64 */
-	uint32_t compute_user_data_0;  /* ordinal65 */
-	uint32_t compute_user_data_1;  /* ordinal66 */
-	uint32_t compute_user_data_2;  /* ordinal67 */
-	uint32_t compute_user_data_3;  /* ordinal68 */
-	uint32_t compute_user_data_4;  /* ordinal69 */
-	uint32_t compute_user_data_5;  /* ordinal70 */
-	uint32_t compute_user_data_6;  /* ordinal71 */
-	uint32_t compute_user_data_7;  /* ordinal72 */
-	uint32_t compute_user_data_8;  /* ordinal73 */
-	uint32_t compute_user_data_9;  /* ordinal74 */
-	uint32_t compute_user_data_10;  /* ordinal75 */
-	uint32_t compute_user_data_11;  /* ordinal76 */
-	uint32_t compute_user_data_12;  /* ordinal77 */
-	uint32_t compute_user_data_13;  /* ordinal78 */
-	uint32_t compute_user_data_14;  /* ordinal79 */
-	uint32_t compute_user_data_15;  /* ordinal80 */
-	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
-	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
-	uint32_t reserved35;  /* ordinal83 */
-	uint32_t reserved36;  /* ordinal84 */
-	uint32_t reserved37;  /* ordinal85 */
-	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
-	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
-	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
-	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
-	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
-	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
-	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
-	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
-	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
-	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
-	uint32_t reserved38;  /* ordinal96 */
-	uint32_t reserved39;  /* ordinal97 */
-	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
-	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
-	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
-	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
-	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
-	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
-	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
-	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
-	uint32_t reserved40;  /* ordinal106 */
-	uint32_t reserved41;  /* ordinal107 */
-	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
-	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
-	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
-	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
-	uint32_t reserved42;  /* ordinal112 */
-	uint32_t reserved43;  /* ordinal113 */
-	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
-	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
-	uint32_t cp_packet_id_lo;  /* ordinal116 */
-	uint32_t cp_packet_id_hi;  /* ordinal117 */
-	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
-	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
-	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
-	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
-	uint32_t gds_save_mask_lo;  /* ordinal122 */
-	uint32_t gds_save_mask_hi;  /* ordinal123 */
-	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
-	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
-	uint32_t reserved44;  /* ordinal126 */
-	uint32_t reserved45;  /* ordinal127 */
-	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
-	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
-	uint32_t cp_hqd_active;  /* ordinal130 */
-	uint32_t cp_hqd_vmid;  /* ordinal131 */
-	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
-	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
-	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
-	uint32_t cp_hqd_quantum;  /* ordinal135 */
-	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
-	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
-	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
-	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
-	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
-	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
-	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
-	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
-	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
-	uint32_t cp_hqd_pq_control;  /* ordinal145 */
-	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
-	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
-	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
-	uint32_t cp_hqd_ib_control;  /* ordinal149 */
-	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
-	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
-	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
-	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
-	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
-	uint32_t cp_hqd_msg_type;  /* ordinal155 */
-	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
-	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
-	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
-	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
-	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
-	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
-	uint32_t cp_mqd_control;  /* ordinal162 */
-	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
-	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
-	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
-	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
-	uint32_t cp_hqd_eop_control;  /* ordinal167 */
-	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
-	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
-	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
-	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
-	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
-	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
-	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
-	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
-	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
-	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
-	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
-	uint32_t cp_hqd_error;  /* ordinal179 */
-	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
-	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
-	uint32_t reserved46;  /* ordinal182 */
-	uint32_t reserved47;  /* ordinal183 */
-	uint32_t reserved48;  /* ordinal184 */
-	uint32_t reserved49;  /* ordinal185 */
-	uint32_t reserved50;  /* ordinal186 */
-	uint32_t reserved51;  /* ordinal187 */
-	uint32_t reserved52;  /* ordinal188 */
-	uint32_t reserved53;  /* ordinal189 */
-	uint32_t reserved54;  /* ordinal190 */
-	uint32_t reserved55;  /* ordinal191 */
-	uint32_t iqtimer_pkt_header;  /* ordinal192 */
-	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
-	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
-	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
-	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
-	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
-	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
-	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
-	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
-	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
-	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
-	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
-	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
-	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
-	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
-	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
-	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
-	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
-	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
-	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
-	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
-	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
-	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
-	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
-	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
-	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
-	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
-	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
-	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
-	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
-	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
-	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
-	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
-	uint32_t reserved56;  /* ordinal225 */
-	uint32_t reserved57;  /* ordinal226 */
-	uint32_t reserved58;  /* ordinal227 */
-	uint32_t set_resources_header;  /* ordinal228 */
-	uint32_t set_resources_dw1;  /* ordinal229 */
-	uint32_t set_resources_dw2;  /* ordinal230 */
-	uint32_t set_resources_dw3;  /* ordinal231 */
-	uint32_t set_resources_dw4;  /* ordinal232 */
-	uint32_t set_resources_dw5;  /* ordinal233 */
-	uint32_t set_resources_dw6;  /* ordinal234 */
-	uint32_t set_resources_dw7;  /* ordinal235 */
-	uint32_t reserved59;  /* ordinal236 */
-	uint32_t reserved60;  /* ordinal237 */
-	uint32_t reserved61;  /* ordinal238 */
-	uint32_t reserved62;  /* ordinal239 */
-	uint32_t reserved63;  /* ordinal240 */
-	uint32_t reserved64;  /* ordinal241 */
-	uint32_t reserved65;  /* ordinal242 */
-	uint32_t reserved66;  /* ordinal243 */
-	uint32_t reserved67;  /* ordinal244 */
-	uint32_t reserved68;  /* ordinal245 */
-	uint32_t reserved69;  /* ordinal246 */
-	uint32_t reserved70;  /* ordinal247 */
-	uint32_t reserved71;  /* ordinal248 */
-	uint32_t reserved72;  /* ordinal249 */
-	uint32_t reserved73;  /* ordinal250 */
-	uint32_t reserved74;  /* ordinal251 */
-	uint32_t reserved75;  /* ordinal252 */
-	uint32_t reserved76;  /* ordinal253 */
-	uint32_t reserved77;  /* ordinal254 */
-	uint32_t reserved78;  /* ordinal255 */
-
-	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
-};
-
 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
 {
 	int i, r;
@@ -4760,34 +4504,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
 	u32 *buf;
 	struct vi_mqd *mqd;

-	/* init the pipes */
-	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
-		int me = (i < 4) ? 1 : 2;
-		int pipe = (i < 4) ? i : (i - 4);
-
-		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
-		eop_gpu_addr >>= 8;
-
-		vi_srbm_select(adev, me, pipe, 0, 0);
-
-		/* write the EOP addr */
-		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
-		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
-
-		/* set the VMID assigned */
-		WREG32(mmCP_HQD_VMID, 0);
-
-		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
-		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
-	}
-	vi_srbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-
-	/* init the queues.  Just two for now. */
+	/* init the queues.  */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];

@@ -4839,6 +4556,22 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
 			       ring->pipe,
 			       ring->queue, 0);

+		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
+		eop_gpu_addr >>= 8;
+
+		/* write the EOP addr */
+		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
+		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
+
+		/* set the VMID assigned */
+		WREG32(mmCP_HQD_VMID, 0);
+
+		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
+		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
+
 		/* disable wptr polling */
 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
@@ -4922,9 +4655,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)

 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
+		mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
+		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);

@@ -5095,6 +4828,10 @@ static int gfx_v8_0_hw_fini(void *handle)

 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+	if (amdgpu_sriov_vf(adev)) {
+		pr_debug("For SRIOV client, shouldn't do anything.\n");
+		return 0;
+	}
 	gfx_v8_0_cp_enable(adev, false);
 	gfx_v8_0_rlc_stop(adev);
 	gfx_v8_0_cp_compute_fini(adev);
@@ -5437,9 +5174,70 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
 }

+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
+{
+	WREG32(mmSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+		(address << SQ_IND_INDEX__INDEX__SHIFT) |
+		(SQ_IND_INDEX__FORCE_READ_MASK));
+	return RREG32(mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
+			   uint32_t wave, uint32_t thread,
+			   uint32_t regno, uint32_t num, uint32_t *out)
+{
+	WREG32(mmSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
+		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
+		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
+		(SQ_IND_INDEX__FORCE_READ_MASK) |
+		(SQ_IND_INDEX__AUTO_INCR_MASK));
+	while (num--)
+		*(out++) = RREG32(mmSQ_IND_DATA);
+}
+
+static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+	/* type 0 wave data */
+	dst[(*no_fields)++] = 0;
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
+	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+}
+
+static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+				     uint32_t wave, uint32_t start,
+				     uint32_t size, uint32_t *dst)
+{
+	wave_read_regs(
+		adev, simd, wave, 0,
+		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
+}
+
+
 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
 	.select_se_sh = &gfx_v8_0_select_se_sh,
+	.read_wave_data = &gfx_v8_0_read_wave_data,
+	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
 };

 static int gfx_v8_0_early_init(void *handle)
@@ -5891,29 +5689,24 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 	adev->gfx.rlc.funcs->enter_safe_mode(adev);

 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
-		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
-		 * Cmp_busy/GFX_Idle interrupts
-		 */
-		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
-
 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
 		if (temp1 != data1)
 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);

-		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
+		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
 		gfx_v8_0_wait_for_rlc_serdes(adev);

-		/* 3 - clear cgcg override */
+		/* 2 - clear cgcg override */
 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);

 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
 		gfx_v8_0_wait_for_rlc_serdes(adev);

-		/* 4 - write cmd to set CGLS */
+		/* 3 - write cmd to set CGLS */
 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);

-		/* 5 - enable cgcg */
+		/* 4 - enable cgcg */
 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;

 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
@@ -5931,6 +5724,11 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev

 		if (temp != data)
 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
+
+		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
+		 * Cmp_busy/GFX_Idle interrupts
+		 */
+		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
 	} else {
 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
@@ -6119,7 +5917,7 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	u32 ref_and_mask, reg_mem_engine;

-	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
+	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 		switch (ring->me) {
 		case 1:
 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
@@ -6147,6 +5945,18 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0x20); /* poll interval */
 }

+static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
+		EVENT_INDEX(4));
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
+	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
+		EVENT_INDEX(0));
+}
+
+
 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
 {
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -6221,7 +6031,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,

 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 {
-	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t seq = ring->fence_drv.sync_seq;
 	uint64_t addr = ring->fence_drv.gpu_addr;

@@ -6239,11 +6049,7 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
-	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
-
-	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
-	if (usepfp)
-		amdgpu_ring_insert_nop(ring, 128);
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);

 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -6336,6 +6142,7 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)

 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+		gfx_v8_0_ring_emit_vgt_flush(ring);
 		/* set load_global_config & load_global_uconfig */
 		dw2 |= 0x8001;
 		/* set load_cs_sh_regs */
@@ -6359,42 +6166,6 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 	amdgpu_ring_write(ring, 0);
 }

-static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
-{
-	return
-		4; /* gfx_v8_0_ring_emit_ib_gfx */
-}
-
-static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
-{
-	return
-		20 + /* gfx_v8_0_ring_emit_gds_switch */
-		7 + /* gfx_v8_0_ring_emit_hdp_flush */
-		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
-		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
-		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
-		256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
-		2 + /* gfx_v8_ring_emit_sb */
-		3; /* gfx_v8_ring_emit_cntxcntl */
-}
-
-static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
-{
-	return
-		4; /* gfx_v8_0_ring_emit_ib_compute */
-}
-
-static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
-{
-	return
-		20 + /* gfx_v8_0_ring_emit_gds_switch */
-		7 + /* gfx_v8_0_ring_emit_hdp_flush */
-		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
-		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
-		17 + /* gfx_v8_0_ring_emit_vm_flush */
-		7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
-}
-
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -6540,7 +6311,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
 	return 0;
 }

-const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
+static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
 	.name = "gfx_v8_0",
 	.early_init = gfx_v8_0_early_init,
 	.late_init = gfx_v8_0_late_init,
@@ -6561,10 +6332,22 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
 };

 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
+	.type = AMDGPU_RING_TYPE_GFX,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.get_rptr = gfx_v8_0_ring_get_rptr,
 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
-	.parse_cs = NULL,
+	.emit_frame_size =
+		20 + /* gfx_v8_0_ring_emit_gds_switch */
+		7 + /* gfx_v8_0_ring_emit_hdp_flush */
+		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
+		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
+		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
+		128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
+		2 + /* gfx_v8_ring_emit_sb */
+		3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
+	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
@@ -6578,15 +6361,23 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
-	.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
-	.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
 };

 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
+	.type = AMDGPU_RING_TYPE_COMPUTE,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.get_rptr = gfx_v8_0_ring_get_rptr,
 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
-	.parse_cs = NULL,
+	.emit_frame_size =
+		20 + /* gfx_v8_0_ring_emit_gds_switch */
+		7 + /* gfx_v8_0_ring_emit_hdp_flush */
+		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
+		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
+		17 + /* gfx_v8_0_ring_emit_vm_flush */
+		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
+	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
@@ -6598,8 +6389,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
-	.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
 };

 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -6752,3 +6541,21 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
 }
+
+const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 8,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &gfx_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 8,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &gfx_v8_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -24,6 +24,7 @@
 #ifndef __GFX_V8_0_H__
 #define __GFX_V8_0_H__

-extern const struct amd_ip_funcs gfx_v8_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
+extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -1,4 +1,3 @@
-
 /*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
@@ -26,7 +25,16 @@
 #include "amdgpu.h"
 #include "gmc_v6_0.h"
 #include "amdgpu_ucode.h"
-#include "si/sid.h"
+
+#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+#include "gmc/gmc_6_0_d.h"
+#include "gmc/gmc_6_0_sh_mask.h"
+#include "dce/dce_6_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+#include "si_enums.h"

 static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -37,6 +45,16 @@ MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
 MODULE_FIRMWARE("radeon/verde_mc.bin");
 MODULE_FIRMWARE("radeon/oland_mc.bin");

+#define MC_SEQ_MISC0__MT__MASK   0xf0000000
+#define MC_SEQ_MISC0__MT__GDDR1  0x10000000
+#define MC_SEQ_MISC0__MT__DDR2   0x20000000
+#define MC_SEQ_MISC0__MT__GDDR3  0x30000000
+#define MC_SEQ_MISC0__MT__GDDR4  0x40000000
+#define MC_SEQ_MISC0__MT__GDDR5  0x50000000
+#define MC_SEQ_MISC0__MT__HBM    0x60000000
+#define MC_SEQ_MISC0__MT__DDR3   0xB0000000
+
+
 static const u32 crtc_offsets[6] =
 {
 	SI_CRTC0_REGISTER_OFFSET,
@@ -57,14 +75,14 @@ static void gmc_v6_0_mc_stop(struct amdgpu_device *adev,

 	gmc_v6_0_wait_for_idle((void *)adev);

-	blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
-	if (REG_GET_FIELD(blackout, mmMC_SHARED_BLACKOUT_CNTL, xxBLACKOUT_MODE) != 1) {
+	blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
+	if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
 		/* Block CPU access */
-		WREG32(BIF_FB_EN, 0);
+		WREG32(mmBIF_FB_EN, 0);
 		/* blackout the MC */
 		blackout = REG_SET_FIELD(blackout,
-					 mmMC_SHARED_BLACKOUT_CNTL, xxBLACKOUT_MODE, 0);
-		WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
+					 MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE, 0);
+		WREG32(mmMC_SHARED_BLACKOUT_CNTL, blackout | 1);
 	}
 	/* wait for the MC to settle */
 	udelay(100);
@@ -77,13 +95,13 @@ static void gmc_v6_0_mc_resume(struct amdgpu_device *adev,
 	u32 tmp;

 	/* unblackout the MC */
-	tmp = RREG32(MC_SHARED_BLACKOUT_CNTL);
-	tmp = REG_SET_FIELD(tmp, mmMC_SHARED_BLACKOUT_CNTL, xxBLACKOUT_MODE, 0);
-	WREG32(MC_SHARED_BLACKOUT_CNTL, tmp);
+	tmp = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
+	tmp = REG_SET_FIELD(tmp, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE, 0);
+	WREG32(mmMC_SHARED_BLACKOUT_CNTL, tmp);
 	/* allow CPU access */
-	tmp = REG_SET_FIELD(0, mmBIF_FB_EN, xxFB_READ_EN, 1);
-	tmp = REG_SET_FIELD(tmp, mmBIF_FB_EN, xxFB_WRITE_EN, 1);
-	WREG32(BIF_FB_EN, tmp);
+	tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1);
+	tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1);
+	WREG32(mmBIF_FB_EN, tmp);

 	if (adev->mode_info.num_crtc)
 		amdgpu_display_resume_mc_access(adev, save);
@@ -158,37 +176,37 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
 	new_fw_data = (const __le32 *)
 		(adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));

-	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
+	running = RREG32(mmMC_SEQ_SUP_CNTL) & MC_SEQ_SUP_CNTL__RUN_MASK;

 	if (running == 0) {

 		/* reset the engine and set to writable */
-		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
-		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
+		WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+		WREG32(mmMC_SEQ_SUP_CNTL, 0x00000010);

 		/* load mc io regs */
 		for (i = 0; i < regs_size; i++) {
-			WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
-			WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
+			WREG32(mmMC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
+			WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
 		}
 		/* load the MC ucode */
 		for (i = 0; i < ucode_size; i++) {
-			WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
+			WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
 		}

 		/* put the engine back into the active state */
-		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
-		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
-		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
+		WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008);
+		WREG32(mmMC_SEQ_SUP_CNTL, 0x00000004);
+		WREG32(mmMC_SEQ_SUP_CNTL, 0x00000001);

 		/* wait for training to complete */
 		for (i = 0; i < adev->usec_timeout; i++) {
-			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
+			if (RREG32(mmMC_SEQ_TRAIN_WAKEUP_CNTL) & MC_SEQ_TRAIN_WAKEUP_CNTL__TRAIN_DONE_D0_MASK)
 				break;
 			udelay(1);
 		}
 		for (i = 0; i < adev->usec_timeout; i++) {
-			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
+			if (RREG32(mmMC_SEQ_TRAIN_WAKEUP_CNTL) & MC_SEQ_TRAIN_WAKEUP_CNTL__TRAIN_DONE_D1_MASK)
 				break;
 			udelay(1);
 		}
@@ -225,7 +243,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 		WREG32((0xb08 + j), 0x00000000);
 		WREG32((0xb09 + j), 0x00000000);
 	}
-	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+	WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);

 	gmc_v6_0_mc_stop(adev, &save);

@@ -233,24 +251,24 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}

-	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+	WREG32(mmVGA_HDP_CONTROL, VGA_HDP_CONTROL__VGA_MEMORY_DISABLE_MASK);
 	/* Update configuration */
-	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+	WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 	       adev->mc.vram_start >> 12);
-	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+	WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 	       adev->mc.vram_end >> 12);
-	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
+	WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
 	       adev->vram_scratch.gpu_addr >> 12);
 	tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16;
 	tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF);
-	WREG32(MC_VM_FB_LOCATION, tmp);
+	WREG32(mmMC_VM_FB_LOCATION, tmp);
 	/* XXX double check these! */
-	WREG32(HDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8));
-	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
-	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
-	WREG32(MC_VM_AGP_BASE, 0);
-	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
-	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+	WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8));
+	WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
+	WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
+	WREG32(mmMC_VM_AGP_BASE, 0);
+	WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
+	WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);

 	if (gmc_v6_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
@@ -265,16 +283,16 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 	u32 tmp;
 	int chansize, numchan;

-	tmp = RREG32(MC_ARB_RAMCFG);
-	if (tmp & CHANSIZE_OVERRIDE) {
+	tmp = RREG32(mmMC_ARB_RAMCFG);
+	if (tmp & (1 << 11)) {
 		chansize = 16;
-	} else if (tmp & CHANSIZE_MASK) {
+	} else if (tmp & MC_ARB_RAMCFG__CHANSIZE_MASK) {
 		chansize = 64;
 	} else {
 		chansize = 32;
 	}
-	tmp = RREG32(MC_SHARED_CHMAP);
-	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+	tmp = RREG32(mmMC_SHARED_CHMAP);
+	switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) {
 	case 0:
 	default:
 		numchan = 1;
@@ -309,15 +327,15 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 	adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
 	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
 	/* size in MB on si */
-	adev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
-	adev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+	adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+	adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
 	adev->mc.visible_vram_size = adev->mc.aper_size;

 	/* unless the user had overridden it, set the gart
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = amdgpu_ttm_get_gtt_mem_size(adev);
+		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;

@@ -329,9 +347,9 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
 static void gmc_v6_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 					uint32_t vmid)
 {
-	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+	WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);

-	WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
+	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }

 static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev,
@@ -355,20 +373,20 @@ static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
 {
 	u32 tmp;

-	tmp = RREG32(VM_CONTEXT1_CNTL);
-	tmp = REG_SET_FIELD(tmp, mmVM_CONTEXT1_CNTL,
-			    xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-	tmp = REG_SET_FIELD(tmp, mmVM_CONTEXT1_CNTL,
-			    xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-	tmp = REG_SET_FIELD(tmp, mmVM_CONTEXT1_CNTL,
-			    xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-	tmp = REG_SET_FIELD(tmp, mmVM_CONTEXT1_CNTL,
-			    xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-	tmp = REG_SET_FIELD(tmp, mmVM_CONTEXT1_CNTL,
-			    xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-	tmp = REG_SET_FIELD(tmp, mmVM_CONTEXT1_CNTL,
-			    xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
-	WREG32(VM_CONTEXT1_CNTL, tmp);
+	tmp = RREG32(mmVM_CONTEXT1_CNTL);
+	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+			    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+			    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+			    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+			    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+			    READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
+			    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+	WREG32(mmVM_CONTEXT1_CNTL, tmp);
 }

 static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
@@ -383,33 +401,39 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
 	if (r)
 		return r;
 	/* Setup TLB control */
-	WREG32(MC_VM_MX_L1_TLB_CNTL,
+	WREG32(mmMC_VM_MX_L1_TLB_CNTL,
 	       (0xA << 7) |
-	       ENABLE_L1_TLB |
-	       ENABLE_L1_FRAGMENT_PROCESSING |
-	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
-	       ENABLE_ADVANCED_DRIVER_MODEL |
-	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+	       MC_VM_MX_L1_TLB_CNTL__ENABLE_L1_TLB_MASK |
+	       MC_VM_MX_L1_TLB_CNTL__ENABLE_L1_FRAGMENT_PROCESSING_MASK |
+	       MC_VM_MX_L1_TLB_CNTL__SYSTEM_ACCESS_MODE_MASK |
+	       MC_VM_MX_L1_TLB_CNTL__ENABLE_ADVANCED_DRIVER_MODEL_MASK |
+	       (0UL << MC_VM_MX_L1_TLB_CNTL__SYSTEM_APERTURE_UNMAPPED_ACCESS__SHIFT));
 	/* Setup L2 cache */
-	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
-	       ENABLE_L2_FRAGMENT_PROCESSING |
-	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
-	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
-	       EFFECTIVE_L2_QUEUE_SIZE(7) |
-	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
-	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
-	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-	       BANK_SELECT(4) |
-	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
+	WREG32(mmVM_L2_CNTL,
+	       VM_L2_CNTL__ENABLE_L2_CACHE_MASK |
+	       VM_L2_CNTL__ENABLE_L2_FRAGMENT_PROCESSING_MASK |
+	       VM_L2_CNTL__ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE_MASK |
+	       VM_L2_CNTL__ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE_MASK |
+	       (7UL << VM_L2_CNTL__EFFECTIVE_L2_QUEUE_SIZE__SHIFT) |
+	       (1UL << VM_L2_CNTL__CONTEXT1_IDENTITY_ACCESS_MODE__SHIFT));
+	WREG32(mmVM_L2_CNTL2,
+	       VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK |
+	       VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK);
+	WREG32(mmVM_L2_CNTL3,
+	       VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
+	       (4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
+	       (4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
 	/* setup context0 */
-	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
-	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
-	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
-	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
+	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 			(u32)(adev->dummy_page.addr >> 12));
-	WREG32(VM_CONTEXT0_CNTL2, 0);
-	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
-				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
+	WREG32(mmVM_CONTEXT0_CNTL2, 0);
+	WREG32(mmVM_CONTEXT0_CNTL,
+	       VM_CONTEXT0_CNTL__ENABLE_CONTEXT_MASK |
+	       (0UL << VM_CONTEXT0_CNTL__PAGE_TABLE_DEPTH__SHIFT) |
+	       VM_CONTEXT0_CNTL__RANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK);

 	WREG32(0x575, 0);
 	WREG32(0x576, 0);
@@ -417,39 +441,41 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)

 	/* empty context1-15 */
 	/* set vm size, must be a multiple of 4 */
-	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
-	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1);
+	WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
+	WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1);
 	/* Assign the pt base to something valid for now; the pts used for
 	 * the VMs are determined by the application and setup and assigned
 	 * on the fly in the vm part of radeon_gart.c
 	 */
 	for (i = 1; i < 16; i++) {
 		if (i < 8)
-			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
+			WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
 			       adev->gart.table_addr >> 12);
 		else
-			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
+			WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
 			       adev->gart.table_addr >> 12);
 	}

 	/* enable context1-15 */
-	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
+	WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
 	       (u32)(adev->dummy_page.addr >> 12));
-	WREG32(VM_CONTEXT1_CNTL2, 4);
-	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
-				PAGE_TABLE_BLOCK_SIZE(amdgpu_vm_block_size - 9) |
-				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
-				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
-				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
-				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
-				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
-				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
-				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
-				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
-				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
-				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
+	WREG32(mmVM_CONTEXT1_CNTL2, 4);
+	WREG32(mmVM_CONTEXT1_CNTL,
+	       VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
+	       (1UL << VM_CONTEXT1_CNTL__PAGE_TABLE_DEPTH__SHIFT) |
+	       ((amdgpu_vm_block_size - 9) << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT) |
+	       VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+	       VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK |
+	       VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+	       VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK |
+	       VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+	       VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK |
+	       VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+	       VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK |
+	       VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+	       VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_DEFAULT_MASK |
+	       VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+	       VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK);

 	gmc_v6_0_gart_flush_gpu_tlb(adev, 0);
 	dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -488,19 +514,22 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
 	}*/

 	/* Disable all tables */
-	WREG32(VM_CONTEXT0_CNTL, 0);
-	WREG32(VM_CONTEXT1_CNTL, 0);
+	WREG32(mmVM_CONTEXT0_CNTL, 0);
+	WREG32(mmVM_CONTEXT1_CNTL, 0);
 	/* Setup TLB control */
-	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
-	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
+	WREG32(mmMC_VM_MX_L1_TLB_CNTL,
+	       MC_VM_MX_L1_TLB_CNTL__SYSTEM_ACCESS_MODE_MASK |
+	       (0UL << MC_VM_MX_L1_TLB_CNTL__SYSTEM_APERTURE_UNMAPPED_ACCESS__SHIFT));
 	/* Setup L2 cache */
-	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
-	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
-	       EFFECTIVE_L2_QUEUE_SIZE(7) |
-	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
-	WREG32(VM_L2_CNTL2, 0);
-	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
+	WREG32(mmVM_L2_CNTL,
+	       VM_L2_CNTL__ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE_MASK |
+	       VM_L2_CNTL__ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE_MASK |
+	       (7UL << VM_L2_CNTL__EFFECTIVE_L2_QUEUE_SIZE__SHIFT) |
+	       (1UL << VM_L2_CNTL__CONTEXT1_IDENTITY_ACCESS_MODE__SHIFT));
+	WREG32(mmVM_L2_CNTL2, 0);
+	WREG32(mmVM_L2_CNTL3,
+	       VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK |
+	       (0UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
 	amdgpu_gart_table_vram_unpin(adev);
 }

@@ -523,7 +552,7 @@ static int gmc_v6_0_vm_init(struct amdgpu_device *adev)

 	/* base offset of vram pages */
 	if (adev->flags & AMD_IS_APU) {
-		u64 tmp = RREG32(MC_VM_FB_OFFSET);
+		u64 tmp = RREG32(mmMC_VM_FB_OFFSET);
 		tmp <<= 22;
 		adev->vm_manager.vram_base_offset = tmp;
 	} else
@@ -540,19 +569,19 @@ static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
 				     u32 status, u32 addr, u32 mc_client)
 {
 	u32 mc_id;
-	u32 vmid = REG_GET_FIELD(status, mmVM_CONTEXT1_PROTECTION_FAULT_STATUS, xxVMID);
-	u32 protections = REG_GET_FIELD(status, mmVM_CONTEXT1_PROTECTION_FAULT_STATUS,
-					xxPROTECTIONS);
+	u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+	u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+					PROTECTIONS);
 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };

-	mc_id = REG_GET_FIELD(status, mmVM_CONTEXT1_PROTECTION_FAULT_STATUS,
-			      xxMEMORY_CLIENT_ID);
+	mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+			      MEMORY_CLIENT_ID);

 	dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
 	       protections, vmid, addr,
-	       REG_GET_FIELD(status, mmVM_CONTEXT1_PROTECTION_FAULT_STATUS,
-			     xxMEMORY_CLIENT_RW) ?
+	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+			     MEMORY_CLIENT_RW) ?
 	       "write" : "read", block, mc_client, mc_id);
 }

@@ -655,7 +684,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
 {
 	u32 orig, data;

-	orig = data = RREG32(HDP_HOST_PATH_CNTL);
+	orig = data = RREG32(mmHDP_HOST_PATH_CNTL);

 	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
 		data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
@@ -663,7 +692,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
 		data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);

 	if (orig != data)
-		WREG32(HDP_HOST_PATH_CNTL, data);
+		WREG32(mmHDP_HOST_PATH_CNTL, data);
 }

 static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
@@ -671,7 +700,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
 {
 	u32 orig, data;

-	orig = data = RREG32(HDP_MEM_POWER_LS);
+	orig = data = RREG32(mmHDP_MEM_POWER_LS);

 	if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
 		data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
@@ -679,7 +708,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
 		data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);

 	if (orig != data)
-		WREG32(HDP_MEM_POWER_LS, data);
+		WREG32(mmHDP_MEM_POWER_LS, data);
 }
 */

@@ -713,7 +742,7 @@ static int gmc_v6_0_early_init(void *handle)
 	if (adev->flags & AMD_IS_APU) {
 		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 	} else {
-		u32 tmp = RREG32(MC_SEQ_MISC0);
+		u32 tmp = RREG32(mmMC_SEQ_MISC0);
 		tmp &= MC_SEQ_MISC0__MT__MASK;
 		adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp);
 	}
@@ -766,11 +795,6 @@ static int gmc_v6_0_sw_init(void *handle)
 		return r;
 	}

-	r = amdgpu_ttm_global_init(adev);
-	if (r) {
-		return r;
-	}
-
 	r = gmc_v6_0_mc_init(adev);
 	if (r)
 		return r;
@@ -879,7 +903,7 @@ static int gmc_v6_0_resume(void *handle)
 static bool gmc_v6_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	u32 tmp = RREG32(SRBM_STATUS);
+	u32 tmp = RREG32(mmSRBM_STATUS);

 	if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
 		   SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK | SRBM_STATUS__VMC_BUSY_MASK))
@@ -895,7 +919,7 @@ static int gmc_v6_0_wait_for_idle(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(SRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
+		tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
 					       SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
 					       SRBM_STATUS__MCC_BUSY_MASK |
 					       SRBM_STATUS__MCD_BUSY_MASK |
@@ -913,17 +937,17 @@ static int gmc_v6_0_soft_reset(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_mode_mc_save save;
 	u32 srbm_soft_reset = 0;
-	u32 tmp = RREG32(SRBM_STATUS);
+	u32 tmp = RREG32(mmSRBM_STATUS);

 	if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
-						mmSRBM_SOFT_RESET, xxSOFT_RESET_VMC, 1);
+						SRBM_SOFT_RESET, SOFT_RESET_VMC, 1);

 	if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
 		   SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) {
 		if (!(adev->flags & AMD_IS_APU))
 			srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
-							mmSRBM_SOFT_RESET, xxSOFT_RESET_MC, 1);
+							SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
 	}

 	if (srbm_soft_reset) {
@@ -933,17 +957,17 @@ static int gmc_v6_0_soft_reset(void *handle)
 		}


-		tmp = RREG32(SRBM_SOFT_RESET);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
 		tmp |= srbm_soft_reset;
 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
-		WREG32(SRBM_SOFT_RESET, tmp);
-		tmp = RREG32(SRBM_SOFT_RESET);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);

 		udelay(50);

 		tmp &= ~srbm_soft_reset;
-		WREG32(SRBM_SOFT_RESET, tmp);
-		tmp = RREG32(SRBM_SOFT_RESET);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);

 		udelay(50);

@@ -969,20 +993,20 @@ static int gmc_v6_0_vm_fault_interrupt_state(struct amdgpu_device *adev,

 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
-		tmp = RREG32(VM_CONTEXT0_CNTL);
+		tmp = RREG32(mmVM_CONTEXT0_CNTL);
 		tmp &= ~bits;
-		WREG32(VM_CONTEXT0_CNTL, tmp);
-		tmp = RREG32(VM_CONTEXT1_CNTL);
+		WREG32(mmVM_CONTEXT0_CNTL, tmp);
+		tmp = RREG32(mmVM_CONTEXT1_CNTL);
 		tmp &= ~bits;
-		WREG32(VM_CONTEXT1_CNTL, tmp);
+		WREG32(mmVM_CONTEXT1_CNTL, tmp);
 		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
-		tmp = RREG32(VM_CONTEXT0_CNTL);
+		tmp = RREG32(mmVM_CONTEXT0_CNTL);
 		tmp |= bits;
-		WREG32(VM_CONTEXT0_CNTL, tmp);
-		tmp = RREG32(VM_CONTEXT1_CNTL);
+		WREG32(mmVM_CONTEXT0_CNTL, tmp);
+		tmp = RREG32(mmVM_CONTEXT1_CNTL);
 		tmp |= bits;
-		WREG32(VM_CONTEXT1_CNTL, tmp);
+		WREG32(mmVM_CONTEXT1_CNTL, tmp);
 		break;
 	default:
 		break;
@@ -997,9 +1021,9 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
 {
 	u32 addr, status;

-	addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
-	status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
-	WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
+	addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
+	status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+	WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1);

 	if (!addr && !status)
 		return 0;
@@ -1007,13 +1031,15 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
 		gmc_v6_0_set_fault_enable_default(adev, false);

-	dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
-		entry->src_id, entry->src_data);
-	dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-		addr);
-	dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-		status);
-	gmc_v6_0_vm_decode_fault(adev, status, addr, 0);
+	if (printk_ratelimit()) {
+		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+			entry->src_id, entry->src_data);
+		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
+			addr);
+		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+			status);
+		gmc_v6_0_vm_decode_fault(adev, status, addr, 0);
+	}

 	return 0;
 }
@@ -1030,7 +1056,7 @@ static int gmc_v6_0_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
+static const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
 	.name = "gmc_v6_0",
 	.early_init = gmc_v6_0_early_init,
 	.late_init = gmc_v6_0_late_init,
@@ -1069,3 +1095,11 @@ static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->mc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
 }

+const struct amdgpu_ip_block_version gmc_v6_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GMC,
+	.major = 6,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &gmc_v6_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.h
@@ -24,6 +24,6 @@
 #ifndef __GMC_V6_0_H__
 #define __GMC_V6_0_H__

-extern const struct amd_ip_funcs gmc_v6_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v6_0_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -385,7 +385,7 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = amdgpu_ttm_get_gtt_mem_size(adev);
+		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;

@@ -711,7 +711,7 @@ static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev,
 	mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			      MEMORY_CLIENT_ID);

-	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+	dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
 	       protections, vmid, addr,
 	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			     MEMORY_CLIENT_RW) ?
@@ -945,11 +945,6 @@ static int gmc_v7_0_sw_init(void *handle)
 		return r;
 	}

-	r = amdgpu_ttm_global_init(adev);
-	if (r) {
-		return r;
-	}
-
 	r = gmc_v7_0_mc_init(adev);
 	if (r)
 		return r;
@@ -1198,13 +1193,15 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
 		gmc_v7_0_set_fault_enable_default(adev, false);

-	dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
-		entry->src_id, entry->src_data);
-	dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-		addr);
-	dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-		status);
-	gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client);
+	if (printk_ratelimit()) {
+		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+			entry->src_id, entry->src_data);
+		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
+			addr);
+		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+			status);
+		gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client);
+	}

 	return 0;
 }
@@ -1235,7 +1232,7 @@ static int gmc_v7_0_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
+static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
 	.name = "gmc_v7_0",
 	.early_init = gmc_v7_0_early_init,
 	.late_init = gmc_v7_0_late_init,
@@ -1273,3 +1270,21 @@ static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->mc.vm_fault.num_types = 1;
 	adev->mc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
 }
+
+const struct amdgpu_ip_block_version gmc_v7_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GMC,
+	.major = 7,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &gmc_v7_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gmc_v7_4_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GMC,
+	.major = 7,
+	.minor = 4,
+	.rev = 0,
+	.funcs = &gmc_v7_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
@@ -24,6 +24,7 @@
 #ifndef __GMC_V7_0_H__
 #define __GMC_V7_0_H__

-extern const struct amd_ip_funcs gmc_v7_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v7_0_ip_block;
+extern const struct amdgpu_ip_block_version gmc_v7_4_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -472,7 +472,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 	 * size equal to the 1024 or vram, whichever is larger.
 	 */
 	if (amdgpu_gart_size == -1)
-		adev->mc.gtt_size = amdgpu_ttm_get_gtt_mem_size(adev);
+		adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size);
 	else
 		adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20;

@@ -837,7 +837,7 @@ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev,
 	mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			      MEMORY_CLIENT_ID);

-	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+	dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
 	       protections, vmid, addr,
 	       REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
 			     MEMORY_CLIENT_RW) ?
@@ -952,11 +952,6 @@ static int gmc_v8_0_sw_init(void *handle)
 		return r;
 	}

-	r = amdgpu_ttm_global_init(adev);
-	if (r) {
-		return r;
-	}
-
 	r = gmc_v8_0_mc_init(adev);
 	if (r)
 		return r;
@@ -1242,13 +1237,15 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST)
 		gmc_v8_0_set_fault_enable_default(adev, false);

-	dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
-		entry->src_id, entry->src_data);
-	dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-		addr);
-	dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-		status);
-	gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client);
+	if (printk_ratelimit()) {
+		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
+			entry->src_id, entry->src_data);
+		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
+			addr);
+		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+			status);
+		gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client);
+	}

 	return 0;
 }
@@ -1437,7 +1434,7 @@ static int gmc_v8_0_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
+static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
 	.name = "gmc_v8_0",
 	.early_init = gmc_v8_0_early_init,
 	.late_init = gmc_v8_0_late_init,
@@ -1478,3 +1475,30 @@ static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->mc.vm_fault.num_types = 1;
 	adev->mc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
 }
+
+const struct amdgpu_ip_block_version gmc_v8_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GMC,
+	.major = 8,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &gmc_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gmc_v8_1_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GMC,
+	.major = 8,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &gmc_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version gmc_v8_5_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GMC,
+	.major = 8,
+	.minor = 5,
+	.rev = 0,
+	.funcs = &gmc_v8_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
@@ -24,6 +24,8 @@
 #ifndef __GMC_V8_0_H__
 #define __GMC_V8_0_H__

-extern const struct amd_ip_funcs gmc_v8_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v8_0_ip_block;
+extern const struct amdgpu_ip_block_version gmc_v8_1_ip_block;
+extern const struct amdgpu_ip_block_version gmc_v8_5_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -392,7 +392,7 @@ static int iceland_ih_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs iceland_ih_ip_funcs = {
+static const struct amd_ip_funcs iceland_ih_ip_funcs = {
 	.name = "iceland_ih",
 	.early_init = iceland_ih_early_init,
 	.late_init = NULL,
@@ -421,3 +421,11 @@ static void iceland_ih_set_interrupt_funcs(struct amdgpu_device *adev)
 		adev->irq.ih_funcs = &iceland_ih_funcs;
 }

+const struct amdgpu_ip_block_version iceland_ih_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_IH,
+	.major = 2,
+	.minor = 4,
+	.rev = 0,
+	.funcs = &iceland_ih_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.h
@@ -24,6 +24,6 @@
 #ifndef __ICELAND_IH_H__
 #define __ICELAND_IH_H__

-extern const struct amd_ip_funcs iceland_ih_ip_funcs;
+extern const struct amdgpu_ip_block_version iceland_ih_ip_block;

 #endif /* __ICELAND_IH_H__ */
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2796,7 +2796,7 @@ static int kv_parse_power_table(struct amdgpu_device *adev)
 	adev->pm.dpm.num_ps = state_array->ucNumEntries;

 	/* fill in the vce power states */
-	for (i = 0; i < AMDGPU_MAX_VCE_LEVELS; i++) {
+	for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) {
 		u32 sclk;
 		clock_array_index = adev->pm.dpm.vce_states[i].clk_idx;
 		clock_info = (union pplib_clock_info *)
@@ -2845,7 +2845,7 @@ static int kv_dpm_init(struct amdgpu_device *adev)
 		pi->caps_tcp_ramping = true;
 	}

-	if (amdgpu_sclk_deep_sleep_en)
+	if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
 		pi->caps_sclk_ds = true;
 	else
 		pi->caps_sclk_ds = false;
@@ -3245,6 +3245,18 @@ static int kv_dpm_set_powergating_state(void *handle,
 	return 0;
 }

+static int kv_check_state_equal(struct amdgpu_device *adev,
+				struct amdgpu_ps *cps,
+				struct amdgpu_ps *rps,
+				bool *equal)
+{
+	if (equal == NULL)
+		return -EINVAL;
+
+	*equal = false;
+	return 0;
+}
+
 const struct amd_ip_funcs kv_dpm_ip_funcs = {
 	.name = "kv_dpm",
 	.early_init = kv_dpm_early_init,
@@ -3275,6 +3287,8 @@ static const struct amdgpu_dpm_funcs kv_dpm_funcs = {
 	.force_performance_level = &kv_dpm_force_performance_level,
 	.powergate_uvd = &kv_dpm_powergate_uvd,
 	.enable_bapm = &kv_dpm_enable_bapm,
+	.get_vce_clock_state = amdgpu_get_vce_clock_state,
+	.check_state_equal = kv_check_state_equal,
 };

 static void kv_dpm_set_dpm_funcs(struct amdgpu_device *adev)
@@ -3293,3 +3307,12 @@ static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev)
 	adev->pm.dpm.thermal.irq.num_types = AMDGPU_THERMAL_IRQ_LAST;
 	adev->pm.dpm.thermal.irq.funcs = &kv_dpm_irq_funcs;
 }
+
+const struct amdgpu_ip_block_version kv_dpm_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SMC,
+	.major = 7,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &kv_dpm_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -232,10 +232,10 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)

 	for (i = 0; i < count; i++)
 		if (sdma && sdma->burst_nop && (i == 0))
-			amdgpu_ring_write(ring, ring->nop |
+			amdgpu_ring_write(ring, ring->funcs->nop |
 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
 		else
-			amdgpu_ring_write(ring, ring->nop);
+			amdgpu_ring_write(ring, ring->funcs->nop);
 }

 /**
@@ -668,7 +668,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	unsigned index;
 	u32 tmp = 0;
 	u64 gpu_addr;
@@ -705,7 +705,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;

-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -725,7 +725,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)

 err1:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -775,11 +775,11 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
 	unsigned ndw = count * 2;

 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
 	ib->ptr[ib->length_dw++] = pe;
 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 	ib->ptr[ib->length_dw++] = ndw;
-	for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+	for (; ndw > 0; ndw -= 2) {
 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
 		value += incr;
@@ -902,22 +902,6 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
 }

-static unsigned sdma_v2_4_ring_get_emit_ib_size(struct amdgpu_ring *ring)
-{
-	return
-		7 + 6; /* sdma_v2_4_ring_emit_ib */
-}
-
-static unsigned sdma_v2_4_ring_get_dma_frame_size(struct amdgpu_ring *ring)
-{
-	return
-		6 + /* sdma_v2_4_ring_emit_hdp_flush */
-		3 + /* sdma_v2_4_ring_emit_hdp_invalidate */
-		6 + /* sdma_v2_4_ring_emit_pipeline_sync */
-		12 + /* sdma_v2_4_ring_emit_vm_flush */
-		10 + 10 + 10; /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
-}
-
 static int sdma_v2_4_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -965,11 +949,10 @@ static int sdma_v2_4_sw_init(void *handle)
 		ring->use_doorbell = false;
 		sprintf(ring->name, "sdma%d", i);
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
 				     &adev->sdma.trap_irq,
 				     (i == 0) ?
-				     AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1,
-				     AMDGPU_RING_TYPE_SDMA);
+				     AMDGPU_SDMA_IRQ_TRAP0 :
+				     AMDGPU_SDMA_IRQ_TRAP1);
 		if (r)
 			return r;
 	}
@@ -1204,7 +1187,7 @@ static int sdma_v2_4_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
+static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
 	.name = "sdma_v2_4",
 	.early_init = sdma_v2_4_early_init,
 	.late_init = NULL,
@@ -1222,10 +1205,19 @@ const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
 };

 static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
+	.type = AMDGPU_RING_TYPE_SDMA,
+	.align_mask = 0xf,
+	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
 	.get_rptr = sdma_v2_4_ring_get_rptr,
 	.get_wptr = sdma_v2_4_ring_get_wptr,
 	.set_wptr = sdma_v2_4_ring_set_wptr,
-	.parse_cs = NULL,
+	.emit_frame_size =
+		6 + /* sdma_v2_4_ring_emit_hdp_flush */
+		3 + /* sdma_v2_4_ring_emit_hdp_invalidate */
+		6 + /* sdma_v2_4_ring_emit_pipeline_sync */
+		12 + /* sdma_v2_4_ring_emit_vm_flush */
+		10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
+	.emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */
 	.emit_ib = sdma_v2_4_ring_emit_ib,
 	.emit_fence = sdma_v2_4_ring_emit_fence,
 	.emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
@@ -1236,8 +1228,6 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
 	.test_ib = sdma_v2_4_ring_test_ib,
 	.insert_nop = sdma_v2_4_ring_insert_nop,
 	.pad_ib = sdma_v2_4_ring_pad_ib,
-	.get_emit_ib_size = sdma_v2_4_ring_get_emit_ib_size,
-	.get_dma_frame_size = sdma_v2_4_ring_get_dma_frame_size,
 };

 static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1350,3 +1340,12 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
 		adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
 	}
 }
+
+const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SDMA,
+	.major = 2,
+	.minor = 4,
+	.rev = 0,
+	.funcs = &sdma_v2_4_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.h
@@ -24,6 +24,6 @@
 #ifndef __SDMA_V2_4_H__
 #define __SDMA_V2_4_H__

-extern const struct amd_ip_funcs sdma_v2_4_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v2_4_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -392,10 +392,10 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)

 	for (i = 0; i < count; i++)
 		if (sdma && sdma->burst_nop && (i == 0))
-			amdgpu_ring_write(ring, ring->nop |
+			amdgpu_ring_write(ring, ring->funcs->nop |
 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
 		else
-			amdgpu_ring_write(ring, ring->nop);
+			amdgpu_ring_write(ring, ring->funcs->nop);
 }

 /**
@@ -871,7 +871,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
-	struct fence *f = NULL;
+	struct dma_fence *f = NULL;
 	unsigned index;
 	u32 tmp = 0;
 	u64 gpu_addr;
@@ -908,7 +908,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	if (r)
 		goto err1;

-	r = fence_wait_timeout(f, false, timeout);
+	r = dma_fence_wait_timeout(f, false, timeout);
 	if (r == 0) {
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
@@ -927,7 +927,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 	}
 err1:
 	amdgpu_ib_free(adev, &ib, NULL);
-	fence_put(f);
+	dma_fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -977,11 +977,11 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
 	unsigned ndw = count * 2;

 	ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+		SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 	ib->ptr[ib->length_dw++] = ndw;
-	for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+	for (; ndw > 0; ndw -= 2) {
 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
 		value += incr;
@@ -1104,22 +1104,6 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
 }

-static unsigned sdma_v3_0_ring_get_emit_ib_size(struct amdgpu_ring *ring)
-{
-	return
-		7 + 6; /* sdma_v3_0_ring_emit_ib */
-}
-
-static unsigned sdma_v3_0_ring_get_dma_frame_size(struct amdgpu_ring *ring)
-{
-	return
-		6 + /* sdma_v3_0_ring_emit_hdp_flush */
-		3 + /* sdma_v3_0_ring_emit_hdp_invalidate */
-		6 + /* sdma_v3_0_ring_emit_pipeline_sync */
-		12 + /* sdma_v3_0_ring_emit_vm_flush */
-		10 + 10 + 10; /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */
-}
-
 static int sdma_v3_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1177,11 +1161,10 @@ static int sdma_v3_0_sw_init(void *handle)

 		sprintf(ring->name, "sdma%d", i);
 		r = amdgpu_ring_init(adev, ring, 1024,
-				     SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
 				     &adev->sdma.trap_irq,
 				     (i == 0) ?
-				     AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP1,
-				     AMDGPU_RING_TYPE_SDMA);
+				     AMDGPU_SDMA_IRQ_TRAP0 :
+				     AMDGPU_SDMA_IRQ_TRAP1);
 		if (r)
 			return r;
 	}
@@ -1544,7 +1527,7 @@ static int sdma_v3_0_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
+static const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
 	.name = "sdma_v3_0",
 	.early_init = sdma_v3_0_early_init,
 	.late_init = NULL,
@@ -1565,10 +1548,19 @@ const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
 };

 static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
+	.type = AMDGPU_RING_TYPE_SDMA,
+	.align_mask = 0xf,
+	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
 	.get_rptr = sdma_v3_0_ring_get_rptr,
 	.get_wptr = sdma_v3_0_ring_get_wptr,
 	.set_wptr = sdma_v3_0_ring_set_wptr,
-	.parse_cs = NULL,
+	.emit_frame_size =
+		6 + /* sdma_v3_0_ring_emit_hdp_flush */
+		3 + /* sdma_v3_0_ring_emit_hdp_invalidate */
+		6 + /* sdma_v3_0_ring_emit_pipeline_sync */
+		12 + /* sdma_v3_0_ring_emit_vm_flush */
+		10 + 10 + 10, /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */
+	.emit_ib_size = 7 + 6, /* sdma_v3_0_ring_emit_ib */
 	.emit_ib = sdma_v3_0_ring_emit_ib,
 	.emit_fence = sdma_v3_0_ring_emit_fence,
 	.emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
@@ -1579,8 +1571,6 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
 	.test_ib = sdma_v3_0_ring_test_ib,
 	.insert_nop = sdma_v3_0_ring_insert_nop,
 	.pad_ib = sdma_v3_0_ring_pad_ib,
-	.get_emit_ib_size = sdma_v3_0_ring_get_emit_ib_size,
-	.get_dma_frame_size = sdma_v3_0_ring_get_dma_frame_size,
 };

 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1693,3 +1683,21 @@ static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 		adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
 	}
 }
+
+const struct amdgpu_ip_block_version sdma_v3_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SDMA,
+	.major = 3,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &sdma_v3_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version sdma_v3_1_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SDMA,
+	.major = 3,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &sdma_v3_0_ip_funcs,
+};
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.h
@@ -24,6 +24,7 @@
 #ifndef __SDMA_V3_0_H__
 #define __SDMA_V3_0_H__

-extern const struct amd_ip_funcs sdma_v3_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v3_0_ip_block;
+extern const struct amdgpu_ip_block_version sdma_v3_1_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -39,6 +39,7 @@
 #include "si_dma.h"
 #include "dce_v6_0.h"
 #include "si.h"
+#include "dce_virtual.h"

 static const u32 tahiti_golden_registers[] =
 {
@@ -905,7 +906,7 @@ static void si_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 }

-u32 si_pciep_rreg(struct amdgpu_device *adev, u32 reg)
+static u32 si_pciep_rreg(struct amdgpu_device *adev, u32 reg)
 {
 	unsigned long flags;
 	u32 r;
@@ -918,7 +919,7 @@ u32 si_pciep_rreg(struct amdgpu_device *adev, u32 reg)
 	return r;
 }

-void si_pciep_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+static void si_pciep_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
 	unsigned long flags;

@@ -1811,7 +1812,7 @@ static int si_common_set_powergating_state(void *handle,
 	return 0;
 }

-const struct amd_ip_funcs si_common_ip_funcs = {
+static const struct amd_ip_funcs si_common_ip_funcs = {
 	.name = "si_common",
 	.early_init = si_common_early_init,
 	.late_init = NULL,
@@ -1828,119 +1829,13 @@ const struct amd_ip_funcs si_common_ip_funcs = {
 	.set_powergating_state = si_common_set_powergating_state,
 };

-static const struct amdgpu_ip_block_version verde_ip_blocks[] =
+static const struct amdgpu_ip_block_version si_common_ip_block =
 {
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &si_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 6,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v6_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &si_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 6,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_DCE,
-		.major = 6,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &dce_v6_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 6,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gfx_v6_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &si_dma_ip_funcs,
-	},
-/*	{
-		.type = AMD_IP_BLOCK_TYPE_UVD,
-		.major = 3,
-		.minor = 1,
-		.rev = 0,
-		.funcs = &si_null_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_VCE,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &si_null_ip_funcs,
-	},
-	*/
-};
-
-
-static const struct amdgpu_ip_block_version hainan_ip_blocks[] =
-{
-	{
-		.type = AMD_IP_BLOCK_TYPE_COMMON,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &si_common_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GMC,
-		.major = 6,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gmc_v6_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_IH,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &si_ih_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SMC,
-		.major = 6,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &amdgpu_pp_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_GFX,
-		.major = 6,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &gfx_v6_0_ip_funcs,
-	},
-	{
-		.type = AMD_IP_BLOCK_TYPE_SDMA,
-		.major = 1,
-		.minor = 0,
-		.rev = 0,
-		.funcs = &si_dma_ip_funcs,
-	},
+	.type = AMD_IP_BLOCK_TYPE_COMMON,
+	.major = 1,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &si_common_ip_funcs,
 };

 int si_set_ip_blocks(struct amdgpu_device *adev)
@@ -1949,13 +1844,42 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
 	case CHIP_VERDE:
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
+		amdgpu_ip_block_add(adev, &si_common_ip_block);
+		amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block);
+		amdgpu_ip_block_add(adev, &si_ih_ip_block);
+		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+		else
+			amdgpu_ip_block_add(adev, &dce_v6_0_ip_block);
+		amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block);
+		amdgpu_ip_block_add(adev, &si_dma_ip_block);
+		/* amdgpu_ip_block_add(adev, &uvd_v3_1_ip_block); */
+		/* amdgpu_ip_block_add(adev, &vce_v1_0_ip_block); */
+		break;
 	case CHIP_OLAND:
-		adev->ip_blocks = verde_ip_blocks;
-		adev->num_ip_blocks = ARRAY_SIZE(verde_ip_blocks);
+		amdgpu_ip_block_add(adev, &si_common_ip_block);
+		amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block);
+		amdgpu_ip_block_add(adev, &si_ih_ip_block);
+		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+		else
+			amdgpu_ip_block_add(adev, &dce_v6_4_ip_block);
+		amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block);
+		amdgpu_ip_block_add(adev, &si_dma_ip_block);
+		/* amdgpu_ip_block_add(adev, &uvd_v3_1_ip_block); */
+		/* amdgpu_ip_block_add(adev, &vce_v1_0_ip_block); */
 		break;
 	case CHIP_HAINAN:
-		adev->ip_blocks = hainan_ip_blocks;
-		adev->num_ip_blocks = ARRAY_SIZE(hainan_ip_blocks);
+		amdgpu_ip_block_add(adev, &si_common_ip_block);
+		amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block);
+		amdgpu_ip_block_add(adev, &si_ih_ip_block);
+		amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_ip_block_add(adev, &dce_virtual_ip_block);
+		amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block);
+		amdgpu_ip_block_add(adev, &si_dma_ip_block);
 		break;
 	default:
 		BUG();
--- a/drivers/gpu/drm/amd/amdgpu/si.h
+++ b/drivers/gpu/drm/amd/amdgpu/si.h
@@ -24,8 +24,6 @@
 #ifndef __SI_H__
 #define __SI_H__

-extern const struct amd_ip_funcs si_common_ip_funcs;
-
 void si_srbm_select(struct amdgpu_device *adev,
 		     u32 me, u32 pipe, u32 queue, u32 vmid);
 int si_set_ip_blocks(struct amdgpu_device *adev);
--- a/Show More
+++ b/Show More