Skip to content

Instantly share code, notes, and snippets.

@mthierry
Created November 12, 2015 11:51
Show Gist options
  • Save mthierry/0acf466664b18fcdb971 to your computer and use it in GitHub Desktop.
Save mthierry/0acf466664b18fcdb971 to your computer and use it in GitHub Desktop.
commit c6c90dd2d7c4895a360b0ca11b440cdf7f82c143
Author: Chris Wilson <[email protected]>
Date: Fri Oct 16 11:59:00 2015 +0100
FROM_UPSTREAM [VPG]: drm/i915: Add soft-pinning API for execbuffer
Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.
Signed-off-by: Chris Wilson <[email protected]>
v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski.
(Not published externally)
v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction
to allow eviction of soft-pinned objects when another soft-pinned object used
by a subsequent execbuffer overlaps reported by Michal Winiarski.
(Not published externally)
v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
pinned first after an address conflict happens to avoid repeated conflicts in
rare cases (Suggested by Chris Wilson). Expanded comment on
drm_i915_gem_exec_object2.offset to cover this new API.
v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
(Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
buffers are not considered misplaced without the user specifying
EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for any
addressing workarounds. Updated object2.offset field comment again to clarify
NO_RELOC case (Chris). checkpatch cleanup.
v6: Trivial rebase on latest drm-intel-nightly
Android note: This patch has not been merged in upstream yet; until this
happens, I915_PARAM_HAS_EXEC_SOFTPIN will be listed as private (not
upstreamed) to prevent any conflict. Once this patch lands upstream, the
softpin parameter number can be updated with a SQUASHME patch.
Change-Id: Ib88934c830db23016eea0cc36da50a69c3bdc9d5
Tracked-On: https://jira01.devtools.intel.com/browse/OAM-8400
Upstream-repo: http://patchwork.freedesktop.org/patch/62008/
Cc: Chris Wilson <[email protected]>
Cc: Akash Goel <[email protected]>
Cc: Vinay Belgaumkar <[email protected]>
Cc: Michal Winiarski <[email protected]>
Cc: Zou Nanhai <[email protected]>
Cc: Kristian Høgsberg <[email protected]>
Signed-off-by: Thomas Daniel <[email protected]>
Reviewed-by: "Akash Goel <[email protected]>"
Signed-off-by: Michel Thierry <[email protected]>
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 735eff0..22ff6e1 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -183,6 +183,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_GEM_FALLOCATE:
value = 1;
break;
+ case I915_PARAM_HAS_EXEC_SOFTPIN:
+ value = 1;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ed5d709..7160e33 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3072,6 +3072,7 @@ int i915_gem_object_update(struct drm_i915_gem_object *obj,
#define PIN_UPDATE (1<<5)
#define PIN_ZONE_4G (1<<6)
#define PIN_HIGH (1<<7)
+#define PIN_OFFSET_FIXED (1<<8)
#define PIN_OFFSET_MASK (~4095)
int __must_check
i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -3417,6 +3418,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
unsigned long start,
unsigned long end,
unsigned flags);
+int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
int i915_gem_evict_everything(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 70da740..ae6ed7c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4440,30 +4440,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
goto err_unpin;
- if (flags & PIN_HIGH) {
- search_flag = DRM_MM_SEARCH_BELOW;
- alloc_flag = DRM_MM_CREATE_TOP;
+ if (flags & PIN_OFFSET_FIXED) {
+ uint64_t offset = flags & PIN_OFFSET_MASK;
+
+ if (offset & (alignment - 1)) {
+ ret = -EINVAL;
+ goto err_free_vma;
+ }
+ vma->node.start = offset;
+ vma->node.size = size;
+ vma->node.color = obj->cache_level;
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ if (ret) {
+ ret = i915_gem_evict_for_vma(vma);
+ if (ret == 0)
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ }
+ if (ret)
+ goto err_free_vma;
} else {
- search_flag = DRM_MM_SEARCH_DEFAULT;
- alloc_flag = DRM_MM_CREATE_DEFAULT;
- }
+ if (flags & PIN_HIGH) {
+ search_flag = DRM_MM_SEARCH_BELOW;
+ alloc_flag = DRM_MM_CREATE_TOP;
+ } else {
+ search_flag = DRM_MM_SEARCH_DEFAULT;
+ alloc_flag = DRM_MM_CREATE_DEFAULT;
+ }
search_free:
- ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
- size, alignment,
- obj->cache_level,
- start, end,
- search_flag,
- alloc_flag);
- if (ret) {
- ret = i915_gem_evict_something(dev, vm, size, alignment,
- obj->cache_level,
- start, end,
- flags);
- if (ret == 0)
- goto search_free;
-
- goto err_free_vma;
+ ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+ size, alignment,
+ obj->cache_level,
+ start, end,
+ search_flag,
+ alloc_flag);
+ if (ret) {
+ ret = i915_gem_evict_something(dev, vm, size, alignment,
+ obj->cache_level,
+ start, end,
+ flags);
+ if (ret == 0)
+ goto search_free;
+
+ goto err_free_vma;
+ }
}
if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
ret = -EINVAL;
@@ -5017,6 +5037,10 @@ i915_vma_misplaced(struct i915_vma *vma,
vma->node.start < (flags & PIN_OFFSET_MASK))
return true;
+ if (flags & PIN_OFFSET_FIXED &&
+ vma->node.start != (flags & PIN_OFFSET_MASK))
+ return true;
+
return false;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d09e35e..bc5b91c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -199,6 +199,45 @@ found:
return ret;
}
+int
+i915_gem_evict_for_vma(struct i915_vma *target)
+{
+ struct drm_mm_node *node, *next;
+
+ list_for_each_entry_safe(node, next,
+ &target->vm->mm.head_node.node_list,
+ node_list) {
+ struct i915_vma *vma;
+ int ret;
+
+ if (node->start + node->size <= target->node.start)
+ continue;
+ if (node->start >= target->node.start + target->node.size)
+ break;
+
+ vma = container_of(node, typeof(*vma), node);
+
+ if (vma->pin_count) {
+ if (!vma->exec_entry || (vma->pin_count > 1))
+ /* Object is pinned for some other use */
+ return -EBUSY;
+
+ /* We need to evict a buffer in the same batch */
+ if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
+ /* Overlapping fixed objects in the same batch */
+ return -EINVAL;
+
+ return -ENOSPC;
+ }
+
+ ret = i915_vma_unbind(vma);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* i915_gem_evict_vm - Evict all idle vmas from a vm
* @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index eca5162..87b2209 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -603,6 +603,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
flags |= PIN_GLOBAL | PIN_MAPPABLE;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+ if (entry->flags & EXEC_OBJECT_PINNED)
+ flags |= entry->offset | PIN_OFFSET_FIXED;
if ((flags & PIN_MAPPABLE) == 0)
flags |= PIN_HIGH;
}
@@ -681,6 +683,10 @@ eb_vma_misplaced(struct i915_vma *vma)
if (vma->node.size < entry->pad_to_size)
return true;
+ if (entry->flags & EXEC_OBJECT_PINNED &&
+ vma->node.start != entry->offset)
+ return true;
+
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
vma->node.start < BATCH_OFFSET_BIAS)
return true;
@@ -689,7 +695,8 @@ eb_vma_misplaced(struct i915_vma *vma)
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
return !only_mappable_for_reloc(entry->flags);
- if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
+ if (!(entry->flags &
+ (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
(vma->node.start + vma->node.size - 1) >> 32)
return true;
@@ -706,6 +713,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
struct i915_vma *vma;
struct i915_address_space *vm;
struct list_head ordered_vmas;
+ struct list_head pinned_vmas;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
int retry;
@@ -714,6 +722,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
INIT_LIST_HEAD(&ordered_vmas);
+ INIT_LIST_HEAD(&pinned_vmas);
while (!list_empty(vmas)) {
struct drm_i915_gem_exec_object2 *entry;
bool need_fence, need_mappable;
@@ -732,7 +741,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
- if (need_mappable) {
+ if (entry->flags & EXEC_OBJECT_PINNED)
+ list_move_tail(&vma->exec_list, &pinned_vmas);
+ else if (need_mappable) {
entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
} else
@@ -742,6 +753,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
obj->base.pending_write_domain = 0;
}
list_splice(&ordered_vmas, vmas);
+ list_splice(&pinned_vmas, vmas);
/* Attempt to pin all of the buffers into the GTT.
* This is done in 3 phases:
@@ -1563,7 +1575,8 @@ eb_get_batch(struct eb_vmas *eb)
* Note that actual hangs have only been observed on gen7, but for
* paranoia do it everywhere.
*/
- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
return vma->obj;
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8d2c6716..0910dfc 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -437,6 +437,7 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_HAS_DPST 0x800
#define I915_PARAM_HAS_GEM_FALLOCATE 0x802
+#define I915_PARAM_HAS_EXEC_SOFTPIN 0x803
/* Avoid breakage of userspace until name change is adopted */
#define I915_PARAM_EU_COUNT I915_PARAM_EU_TOTAL
@@ -787,8 +788,12 @@ struct drm_i915_gem_exec_object2 {
__u64 alignment;
/**
- * Returned value of the updated offset of the object, for future
- * presumed_offset writes.
+ * When the EXEC_OBJECT_PINNED flag is specified this is populated by
+ * the user with the GTT offset at which this object will be pinned.
+ * When the I915_EXEC_NO_RELOC flag is specified this must contain the
+ * presumed_offset of the object.
+ * During execbuffer2 the kernel populates it with the value of the
+ * current GTT offset of the object, for future presumed_offset writes.
*/
__u64 offset;
@@ -797,7 +802,8 @@ struct drm_i915_gem_exec_object2 {
#define EXEC_OBJECT_WRITE (1<<2)
#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
#define EXEC_OBJECT_PAD_TO_SIZE (1<<4)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define EXEC_OBJECT_PINNED (1<<5)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
__u64 flags;
__u64 pad_to_size;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment