summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c266
1 files changed, 202 insertions, 64 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85f713746a1f..d1cd8b89f47d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1374,25 +1374,24 @@ i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
}
static uint32_t
-i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
+i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
{
- struct drm_device *dev = obj->base.dev;
- uint32_t size;
+ uint32_t gtt_size;
if (INTEL_INFO(dev)->gen >= 4 ||
- obj->tiling_mode == I915_TILING_NONE)
- return obj->base.size;
+ tiling_mode == I915_TILING_NONE)
+ return size;
/* Previous chips need a power-of-two fence region when tiling */
if (INTEL_INFO(dev)->gen == 3)
- size = 1024*1024;
+ gtt_size = 1024*1024;
else
- size = 512*1024;
+ gtt_size = 512*1024;
- while (size < obj->base.size)
- size <<= 1;
+ while (gtt_size < size)
+ gtt_size <<= 1;
- return size;
+ return gtt_size;
}
/**
@@ -1403,59 +1402,52 @@ i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
* potential fence register mapping.
*/
static uint32_t
-i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
+i915_gem_get_gtt_alignment(struct drm_device *dev,
+ uint32_t size,
+ int tiling_mode)
{
- struct drm_device *dev = obj->base.dev;
-
/*
* Minimum alignment is 4k (GTT page size), but might be greater
* if a fence register is needed for the object.
*/
if (INTEL_INFO(dev)->gen >= 4 ||
- obj->tiling_mode == I915_TILING_NONE)
+ tiling_mode == I915_TILING_NONE)
return 4096;
/*
* Previous chips need to be aligned to the size of the smallest
* fence register that can contain the object.
*/
- return i915_gem_get_gtt_size(obj);
+ return i915_gem_get_gtt_size(dev, size, tiling_mode);
}
/**
* i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
* unfenced object
- * @obj: object to check
+ * @dev: the device
+ * @size: size of the object
+ * @tiling_mode: tiling mode of the object
*
* Return the required GTT alignment for an object, only taking into account
* unfenced tiled surface requirements.
*/
uint32_t
-i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
+i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
+ uint32_t size,
+ int tiling_mode)
{
- struct drm_device *dev = obj->base.dev;
- int tile_height;
-
/*
* Minimum alignment is 4k (GTT page size) for sane hw.
*/
if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
- obj->tiling_mode == I915_TILING_NONE)
+ tiling_mode == I915_TILING_NONE)
return 4096;
- /*
- * Older chips need unfenced tiled buffers to be aligned to the left
- * edge of an even tile row (where tile rows are counted as if the bo is
- * placed in a fenced gtt region).
+ /* Previous hardware however needs to be aligned to a power-of-two
+ * tile height. The simplest method for determining this is to reuse
+ * the power-of-tile object size.
*/
- if (IS_GEN2(dev))
- tile_height = 16;
- else if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
- tile_height = 32;
- else
- tile_height = 8;
-
- return tile_height * obj->stride * 2;
+ return i915_gem_get_gtt_size(dev, size, tiling_mode);
}
int
@@ -1771,8 +1763,11 @@ i915_add_request(struct intel_ring_buffer *ring,
ring->outstanding_lazy_request = false;
if (!dev_priv->mm.suspended) {
- mod_timer(&dev_priv->hangcheck_timer,
- jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+ if (i915_enable_hangcheck) {
+ mod_timer(&dev_priv->hangcheck_timer,
+ jiffies +
+ msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+ }
if (was_empty)
queue_delayed_work(dev_priv->wq,
&dev_priv->mm.retire_work, HZ);
@@ -2072,8 +2067,8 @@ i915_wait_request(struct intel_ring_buffer *ring,
if (!ier) {
DRM_ERROR("something (likely vbetool) disabled "
"interrupts, re-enabling\n");
- i915_driver_irq_preinstall(ring->dev);
- i915_driver_irq_postinstall(ring->dev);
+ ring->dev->driver->irq_preinstall(ring->dev);
+ ring->dev->driver->irq_postinstall(ring->dev);
}
trace_i915_gem_request_wait_begin(ring, seqno);
@@ -2143,6 +2138,30 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
return 0;
}
+static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
+{
+ u32 old_write_domain, old_read_domains;
+
+ /* Act a barrier for all accesses through the GTT */
+ mb();
+
+ /* Force a pagefault for domain tracking on next user access */
+ i915_gem_release_mmap(obj);
+
+ if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+ return;
+
+ old_read_domains = obj->base.read_domains;
+ old_write_domain = obj->base.write_domain;
+
+ obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
+ obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ old_write_domain);
+}
+
/**
* Unbinds an object from the GTT aperture.
*/
@@ -2159,23 +2178,28 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
return -EINVAL;
}
- /* blow away mappings if mapped through GTT */
- i915_gem_release_mmap(obj);
-
- /* Move the object to the CPU domain to ensure that
- * any possible CPU writes while it's not in the GTT
- * are flushed when we go to remap it. This will
- * also ensure that all pending GPU writes are finished
- * before we unbind.
- */
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ ret = i915_gem_object_finish_gpu(obj);
if (ret == -ERESTARTSYS)
return ret;
/* Continue on if we fail due to EIO, the GPU is hung so we
* should be safe and we need to cleanup or else we might
* cause memory corruption through use-after-free.
*/
+
+ i915_gem_object_finish_gtt(obj);
+
+ /* Move the object to the CPU domain to ensure that
+ * any possible CPU writes while it's not in the GTT
+ * are flushed when we go to remap it.
+ */
+ if (ret == 0)
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ if (ret == -ERESTARTSYS)
+ return ret;
if (ret) {
+ /* In the event of a disaster, abandon all caches and
+ * hope for the best.
+ */
i915_gem_clflush_object(obj);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
@@ -2744,9 +2768,16 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
return -EINVAL;
}
- fence_size = i915_gem_get_gtt_size(obj);
- fence_alignment = i915_gem_get_gtt_alignment(obj);
- unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj);
+ fence_size = i915_gem_get_gtt_size(dev,
+ obj->base.size,
+ obj->tiling_mode);
+ fence_alignment = i915_gem_get_gtt_alignment(dev,
+ obj->base.size,
+ obj->tiling_mode);
+ unfenced_alignment =
+ i915_gem_get_unfenced_gtt_alignment(dev,
+ obj->base.size,
+ obj->tiling_mode);
if (alignment == 0)
alignment = map_and_fenceable ? fence_alignment :
@@ -2997,51 +3028,139 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
return 0;
}
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+ enum i915_cache_level cache_level)
+{
+ int ret;
+
+ if (obj->cache_level == cache_level)
+ return 0;
+
+ if (obj->pin_count) {
+ DRM_DEBUG("can not change the cache level of pinned objects\n");
+ return -EBUSY;
+ }
+
+ if (obj->gtt_space) {
+ ret = i915_gem_object_finish_gpu(obj);
+ if (ret)
+ return ret;
+
+ i915_gem_object_finish_gtt(obj);
+
+ /* Before SandyBridge, you could not use tiling or fence
+ * registers with snooped memory, so relinquish any fences
+ * currently pointing to our region in the aperture.
+ */
+ if (INTEL_INFO(obj->base.dev)->gen < 6) {
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ return ret;
+ }
+
+ i915_gem_gtt_rebind_object(obj, cache_level);
+ }
+
+ if (cache_level == I915_CACHE_NONE) {
+ u32 old_read_domains, old_write_domain;
+
+ /* If we're coming from LLC cached, then we haven't
+ * actually been tracking whether the data is in the
+ * CPU cache or not, since we only allow one bit set
+ * in obj->write_domain and have been skipping the clflushes.
+ * Just set it to the CPU cache for now.
+ */
+ WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
+ WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
+
+ old_read_domains = obj->base.read_domains;
+ old_write_domain = obj->base.write_domain;
+
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ old_write_domain);
+ }
+
+ obj->cache_level = cache_level;
+ return 0;
+}
+
/*
- * Prepare buffer for display plane. Use uninterruptible for possible flush
- * wait, as in modesetting process we're not supposed to be interrupted.
+ * Prepare buffer for display plane (scanout, cursors, etc).
+ * Can be called from an uninterruptible phase (modesetting) and allows
+ * any flushes to be pipelined (for pageflips).
+ *
+ * For the display plane, we want to be in the GTT but out of any write
+ * domains. So in many ways this looks like set_to_gtt_domain() apart from the
+ * ability to pipeline the waits, pinning and any additional subtleties
+ * that may differentiate the display plane from ordinary buffers.
*/
int
-i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+ u32 alignment,
struct intel_ring_buffer *pipelined)
{
- uint32_t old_read_domains;
+ u32 old_read_domains, old_write_domain;
int ret;
- /* Not valid to be called on unbound objects. */
- if (obj->gtt_space == NULL)
- return -EINVAL;
-
ret = i915_gem_object_flush_gpu_write_domain(obj);
if (ret)
return ret;
-
- /* Currently, we are always called from an non-interruptible context. */
if (pipelined != obj->ring) {
ret = i915_gem_object_wait_rendering(obj);
if (ret)
return ret;
}
+ /* The display engine is not coherent with the LLC cache on gen6. As
+ * a result, we make sure that the pinning that is about to occur is
+ * done with uncached PTEs. This is lowest common denominator for all
+ * chipsets.
+ *
+ * However for gen6+, we could do better by using the GFDT bit instead
+ * of uncaching, which would allow us to flush all the LLC-cached data
+ * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+ */
+ ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+ if (ret)
+ return ret;
+
+ /* As the user may map the buffer once pinned in the display plane
+ * (e.g. libkms for the bootup splash), we have to ensure that we
+ * always use map_and_fenceable for all scanout buffers.
+ */
+ ret = i915_gem_object_pin(obj, alignment, true);
+ if (ret)
+ return ret;
+
i915_gem_object_flush_cpu_write_domain(obj);
+ old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
+
+ /* It should now be out of any other write domains, and we can update
+ * the domain values for our changes.
+ */
+ BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
trace_i915_gem_object_change_domain(obj,
old_read_domains,
- obj->base.write_domain);
+ old_write_domain);
return 0;
}
int
-i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
+i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
{
int ret;
- if (!obj->active)
+ if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
return 0;
if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
@@ -3050,6 +3169,9 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
return ret;
}
+ /* Ensure that we invalidate the GPU's caches and TLBs. */
+ obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
+
return i915_gem_object_wait_rendering(obj);
}
@@ -3576,7 +3698,23 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->cache_level = I915_CACHE_NONE;
+ if (IS_GEN6(dev)) {
+ /* On Gen6, we can have the GPU use the LLC (the CPU
+ * cache) for about a 10% performance improvement
+ * compared to uncached. Graphics requests other than
+ * display scanout are coherent with the CPU in
+ * accessing this cache. This means in this mode we
+ * don't need to clflush on the CPU side, and on the
+ * GPU side we only need to flush internal caches to
+ * get data visible to the CPU.
+ *
+ * However, we maintain the display planes as UC, and so
+ * need to rebind when first used as such.
+ */
+ obj->cache_level = I915_CACHE_LLC;
+ } else
+ obj->cache_level = I915_CACHE_NONE;
+
obj->base.driver_private = NULL;
obj->fence_reg = I915_FENCE_REG_NONE;
INIT_LIST_HEAD(&obj->mm_list);