Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux

Pull drm updates from Dave Airlie:
 "I Was Almost Tempted To Capitalise Every Word, but then I decided I
  couldn't read it myself!

  I've also got one pull request for the sti driver outstanding.  It
  relied on a commit in Greg's tree and I didn't find out in time, that
  commit is in your tree now so I might send that along once this is
  merged.

  I also had the accidental misfortune to have access to a Skylake on my
  desk for a few days, and I've had to encourage Intel to try harder,
  which seems to be happening now.

  Here is the main drm-next pull request for 4.4.

  Highlights:

  New driver:
        vc4 driver for the Rasberry Pi VPU.
        (From Eric Anholt at Broadcom.)

  Core:
        Atomic fbdev support
        Atomic helpers for runtime pm
        dp/aux i2c STATUS_UPDATE handling
        struct_mutex usage cleanups.
        Generic of probing support.

  Documentation:
        Kerneldoc for VGA switcheroo code.
        Rename to gpu instead of drm to reflect scope.

  i915:
        Skylake GuC firmware fixes
        HPD A support
        VBT backlight fallbacks
        Fastboot by default for some systems
        FBC work
        BXT/SKL workarounds
        Skylake deeper sleep state fixes

  amdgpu:
        Enable GPU scheduler by default
        New atombios opcodes
        GPUVM debugging options
        Stoney support.
        Fencing cleanups.

  radeon:
        More efficient CS checking

  nouveau:
        gk20a instance memory handling improvements.
        Improved PGOB detection and GK107 support
        Kepler GDDR5 PLL statbility improvement
        G8x/GT2xx reclock improvements
        new userspace API compatiblity fixes.

  virtio-gpu:
        Add 3D support - qemu 2.5 has it merged for it's gtk backend.

  msm:
        Initial msm88896 (snapdragon 8200)

  exynos:
        HDMI cleanups
        Enable mixer driver byt default
        Add DECON-TV support

  vmwgfx:
        Move to using memremap + fixes.

  rcar-du:
        Add support for R8A7793/4 DU

  armada:
        Remove support for non-component mode
        Improved plane handling
        Power savings while in DPMS off.

  tda998x:
        Remove unused slave encoder support
        Use more HDMI helpers
        Fix EDID read handling

  dwhdmi:
        Interlace video mode support for ipu-v3/dw_hdmi
        Hotplug state fixes
        Audio driver integration

  imx:
        More color formats support.

  tegra:
        Minor fixes/improvements"

[ Merge fixup: remove unused variable 'dev' that had all uses removed in
  commit 4e270f0880: "drm/gem: Drop struct_mutex requirement from
  drm_gem_mmap_obj" ]

* 'drm-next' of git://people.freedesktop.org/~airlied/linux: (764 commits)
  drm/vmwgfx: Relax irq locking somewhat
  drm/vmwgfx: Properly flush cursor updates and page-flips
  drm/i915/skl: disable display side power well support for now
  drm/i915: Extend DSL readout fix to BDW and SKL.
  drm/i915: Do graphics device reset under forcewake
  drm/i915: Skip fence installation for objects with rotated views (v4)
  vga_switcheroo: Drop client power state VGA_SWITCHEROO_INIT
  drm/amdgpu: group together common fence implementation
  drm/amdgpu: remove AMDGPU_FENCE_OWNER_MOVE
  drm/amdgpu: remove now unused fence functions
  drm/amdgpu: fix fence fallback check
  drm/amdgpu: fix stoping the scheduler timeout
  drm/amdgpu: cleanup on error in amdgpu_cs_ioctl()
  drm/i915: Fix locking around GuC firmware load
  drm/amdgpu: update Fiji's Golden setting
  drm/amdgpu: update Fiji's rev id
  drm/amdgpu: extract common code in vi_common_early_init
  drm/amd/scheduler: don't oops on failure to load
  drm/amdgpu: don't oops on failure to load (v2)
  drm/amdgpu: don't VT switch on suspend
  ...
This commit is contained in:
Linus Torvalds
2015-11-10 09:33:06 -08:00
468 changed files with 53369 additions and 9828 deletions

View File

@@ -1005,12 +1005,14 @@ out:
if (!needs_clflush_after &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
if (i915_gem_clflush_object(obj, obj->pin_display))
i915_gem_chipset_flush(dev);
needs_clflush_after = true;
}
}
if (needs_clflush_after)
i915_gem_chipset_flush(dev);
else
obj->cache_dirty = true;
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
return ret;
@@ -1711,8 +1713,8 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
/**
* i915_gem_fault - fault a page into the GTT
* vma: VMA in question
* vmf: fault info
* @vma: VMA in question
* @vmf: fault info
*
* The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
* from userspace. The fault handler takes care of binding the object to
@@ -3205,7 +3207,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
old_write_domain);
}
int i915_vma_unbind(struct i915_vma *vma)
static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
{
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
@@ -3224,13 +3226,11 @@ int i915_vma_unbind(struct i915_vma *vma)
BUG_ON(obj->pages == NULL);
ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;
/* Continue on if we fail due to EIO, the GPU is hung so we
* should be safe and we need to cleanup or else we might
* cause memory corruption through use-after-free.
*/
if (wait) {
ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;
}
if (i915_is_ggtt(vma->vm) &&
vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
@@ -3275,6 +3275,16 @@ int i915_vma_unbind(struct i915_vma *vma)
return 0;
}
int i915_vma_unbind(struct i915_vma *vma)
{
return __i915_vma_unbind(vma, true);
}
int __i915_vma_unbind_no_wait(struct i915_vma *vma)
{
return __i915_vma_unbind(vma, false);
}
int i915_gpu_idle(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3354,11 +3364,10 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 size, fence_size, fence_alignment, unfenced_alignment;
u64 start =
flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
u64 end =
flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
u32 fence_alignment, unfenced_alignment;
u32 search_flag, alloc_flag;
u64 start, end;
u64 size, fence_size;
struct i915_vma *vma;
int ret;
@@ -3398,6 +3407,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
}
start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
end = vm->total;
if (flags & PIN_MAPPABLE)
end = min_t(u64, end, dev_priv->gtt.mappable_end);
if (flags & PIN_ZONE_4G)
end = min_t(u64, end, (1ULL << 32));
if (alignment == 0)
alignment = flags & PIN_MAPPABLE ? fence_alignment :
unfenced_alignment;
@@ -3413,7 +3429,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
* attempt to find space.
*/
if (size > end) {
DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%llu\n",
DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
ggtt_view ? ggtt_view->type : 0,
size,
flags & PIN_MAPPABLE ? "mappable" : "total",
@@ -3433,13 +3449,21 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
goto err_unpin;
if (flags & PIN_HIGH) {
search_flag = DRM_MM_SEARCH_BELOW;
alloc_flag = DRM_MM_CREATE_TOP;
} else {
search_flag = DRM_MM_SEARCH_DEFAULT;
alloc_flag = DRM_MM_CREATE_DEFAULT;
}
search_free:
ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
size, alignment,
obj->cache_level,
start, end,
DRM_MM_SEARCH_DEFAULT,
DRM_MM_CREATE_DEFAULT);
search_flag,
alloc_flag);
if (ret) {
ret = i915_gem_evict_something(dev, vm, size, alignment,
obj->cache_level,
@@ -3632,59 +3656,117 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
return 0;
}
/**
* Changes the cache-level of an object across all VMA.
*
* After this function returns, the object will be in the new cache-level
* across all GTT and the contents of the backing storage will be coherent,
* with respect to the new cache-level. In order to keep the backing storage
* coherent for all users, we only allow a single cache level to be set
* globally on the object and prevent it from being changed whilst the
* hardware is reading from the object. That is if the object is currently
* on the scanout it will be set to uncached (or equivalent display
* cache coherency) and all non-MOCS GPU access will also be uncached so
* that all direct access to the scanout remains coherent.
*/
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
{
struct drm_device *dev = obj->base.dev;
struct i915_vma *vma, *next;
int ret;
bool bound = false;
int ret = 0;
if (obj->cache_level == cache_level)
return 0;
if (i915_gem_obj_is_pinned(obj)) {
DRM_DEBUG("can not change the cache level of pinned objects\n");
return -EBUSY;
}
goto out;
/* Inspect the list of currently bound VMA and unbind any that would
* be invalid given the new cache-level. This is principally to
* catch the issue of the CS prefetch crossing page boundaries and
* reading an invalid PTE on older architectures.
*/
list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
if (vma->pin_count) {
DRM_DEBUG("can not change the cache level of pinned objects\n");
return -EBUSY;
}
if (!i915_gem_valid_gtt_space(vma, cache_level)) {
ret = i915_vma_unbind(vma);
if (ret)
return ret;
}
} else
bound = true;
}
if (i915_gem_obj_bound_any(obj)) {
/* We can reuse the existing drm_mm nodes but need to change the
* cache-level on the PTE. We could simply unbind them all and
* rebind with the correct cache-level on next use. However since
* we already have a valid slot, dma mapping, pages etc, we may as
* rewrite the PTE in the belief that doing so tramples upon less
* state and so involves less work.
*/
if (bound) {
/* Before we change the PTE, the GPU must not be accessing it.
* If we wait upon the object, we know that all the bound
* VMA are no longer active.
*/
ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;
i915_gem_object_finish_gtt(obj);
if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
/* Access to snoopable pages through the GTT is
* incoherent and on some machines causes a hard
* lockup. Relinquish the CPU mmaping to force
* userspace to refault in the pages and we can
* then double check if the GTT mapping is still
* valid for that pointer access.
*/
i915_gem_release_mmap(obj);
/* Before SandyBridge, you could not use tiling or fence
* registers with snooped memory, so relinquish any fences
* currently pointing to our region in the aperture.
*/
if (INTEL_INFO(dev)->gen < 6) {
/* As we no longer need a fence for GTT access,
* we can relinquish it now (and so prevent having
* to steal a fence from someone else on the next
* fence request). Note GPU activity would have
* dropped the fence as all snoopable access is
* supposed to be linear.
*/
ret = i915_gem_object_put_fence(obj);
if (ret)
return ret;
} else {
/* We either have incoherent backing store and
* so no GTT access or the architecture is fully
* coherent. In such cases, existing GTT mmaps
* ignore the cache bit in the PTE and we can
* rewrite it without confusing the GPU or having
* to force userspace to fault back in its mmaps.
*/
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
if (drm_mm_node_allocated(&vma->node)) {
ret = i915_vma_bind(vma, cache_level,
PIN_UPDATE);
if (ret)
return ret;
}
list_for_each_entry(vma, &obj->vma_list, vma_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
if (ret)
return ret;
}
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
vma->node.color = cache_level;
obj->cache_level = cache_level;
out:
/* Flush the dirty CPU caches to the backing storage so that the
* object is now coherent at its new cache level (with respect
* to the access domain).
*/
if (obj->cache_dirty &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
cpu_write_needs_clflush(obj)) {
@@ -3737,6 +3819,15 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
level = I915_CACHE_NONE;
break;
case I915_CACHING_CACHED:
/*
* Due to a HW issue on BXT A stepping, GPU stores via a
* snooped mapping may leave stale data in a corresponding CPU
* cacheline, whereas normally such cachelines would get
* invalidated.
*/
if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)
return -ENODEV;
level = I915_CACHE_LLC;
break;
case I915_CACHING_DISPLAY:
@@ -4010,15 +4101,13 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
return -EBUSY;
if (i915_vma_misplaced(vma, alignment, flags)) {
unsigned long offset;
offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) :
i915_gem_obj_offset(obj, vm);
WARN(vma->pin_count,
"bo is already pinned in %s with incorrect alignment:"
" offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
" offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
" obj->map_and_fenceable=%d\n",
ggtt_view ? "ggtt" : "ppgtt",
offset,
upper_32_bits(vma->node.start),
lower_32_bits(vma->node.start),
alignment,
!!(flags & PIN_MAPPABLE),
obj->map_and_fenceable);
@@ -4525,22 +4614,6 @@ void i915_gem_init_swizzling(struct drm_device *dev)
BUG();
}
static bool
intel_enable_blt(struct drm_device *dev)
{
if (!HAS_BLT(dev))
return false;
/* The blitter was dysfunctional on early prototypes */
if (IS_GEN6(dev) && dev->pdev->revision < 8) {
DRM_INFO("BLT not supported on this pre-production hardware;"
" graphics performance will be degraded.\n");
return false;
}
return true;
}
static void init_unused_ring(struct drm_device *dev, u32 base)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4583,7 +4656,7 @@ int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_render_ring;
}
if (intel_enable_blt(dev)) {
if (HAS_BLT(dev)) {
ret = intel_init_blt_ring_buffer(dev);
if (ret)
goto cleanup_bsd_ring;
@@ -4601,14 +4674,8 @@ int i915_gem_init_rings(struct drm_device *dev)
goto cleanup_vebox_ring;
}
ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
if (ret)
goto cleanup_bsd2_ring;
return 0;
cleanup_bsd2_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]);
cleanup_vebox_ring:
intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
cleanup_blt_ring:
@@ -4678,6 +4745,33 @@ i915_gem_init_hw(struct drm_device *dev)
goto out;
}
/* We can't enable contexts until all firmware is loaded */
if (HAS_GUC_UCODE(dev)) {
ret = intel_guc_ucode_load(dev);
if (ret) {
/*
* If we got an error and GuC submission is enabled, map
* the error to -EIO so the GPU will be declared wedged.
* OTOH, if we didn't intend to use the GuC anyway, just
* discard the error and carry on.
*/
DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
i915.enable_guc_submission ? "" :
" (ignored)");
ret = i915.enable_guc_submission ? -EIO : 0;
if (ret)
goto out;
}
}
/*
* Increment the next seqno by 0x100 so we have a visible break
* on re-initialisation
*/
ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
if (ret)
goto out;
/* Now it is safe to go back round and do everything else: */
for_each_ring(ring, dev_priv, i) {
struct drm_i915_gem_request *req;
@@ -4815,18 +4909,6 @@ init_ring_lists(struct intel_engine_cs *ring)
INIT_LIST_HEAD(&ring->request_list);
}
void i915_init_vm(struct drm_i915_private *dev_priv,
struct i915_address_space *vm)
{
if (!i915_is_ggtt(vm))
drm_mm_init(&vm->mm, vm->start, vm->total);
vm->dev = dev_priv->dev;
INIT_LIST_HEAD(&vm->active_list);
INIT_LIST_HEAD(&vm->inactive_list);
INIT_LIST_HEAD(&vm->global_link);
list_add_tail(&vm->global_link, &dev_priv->vm_list);
}
void
i915_gem_load(struct drm_device *dev)
{
@@ -4850,8 +4932,6 @@ i915_gem_load(struct drm_device *dev)
NULL);
INIT_LIST_HEAD(&dev_priv->vm_list);
i915_init_vm(dev_priv, &dev_priv->gtt.base);
INIT_LIST_HEAD(&dev_priv->context_list);
INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
INIT_LIST_HEAD(&dev_priv->mm.bound_list);
@@ -4879,6 +4959,14 @@ i915_gem_load(struct drm_device *dev)
dev_priv->num_fence_regs =
I915_READ(vgtif_reg(avail_rs.fence_num));
/*
* Set initial sequence number for requests.
* Using this number allows the wraparound to happen early,
* catching any obvious problems.
*/
dev_priv->next_seqno = ((u32)~0 - 0x1100);
dev_priv->last_seqno = ((u32)~0 - 0x1101);
/* Initialize fence registers to zero */
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
i915_gem_restore_fences(dev);
@@ -4948,9 +5036,9 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
/**
* i915_gem_track_fb - update frontbuffer tracking
* old: current GEM buffer for the frontbuffer slots
* new: new GEM buffer for the frontbuffer slots
* frontbuffer_bits: bitmask of frontbuffer slots
* @old: current GEM buffer for the frontbuffer slots
* @new: new GEM buffer for the frontbuffer slots
* @frontbuffer_bits: bitmask of frontbuffer slots
*
* This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
* from @old and setting them in @new. Both @old and @new can be NULL.
@@ -4973,9 +5061,8 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
}
/* All the new VM stuff */
unsigned long
i915_gem_obj_offset(struct drm_i915_gem_object *o,
struct i915_address_space *vm)
u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
struct i915_address_space *vm)
{
struct drm_i915_private *dev_priv = o->base.dev->dev_private;
struct i915_vma *vma;
@@ -4995,9 +5082,8 @@ i915_gem_obj_offset(struct drm_i915_gem_object *o,
return -1;
}
unsigned long
i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
const struct i915_ggtt_view *view)
u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
const struct i915_ggtt_view *view)
{
struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
struct i915_vma *vma;