Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/linux into drm-next

This is a new pull for drm-next on top of last weeks with the following changes: - Fixed 64 bit divide - Fixed vram type on vega20 - Misc vega20 fixes - Misc DC fixes - Fix GDS/GWS/OA domain handling Previous changes from last week: amdgpu/kfd: - Picasso (new APU) support - Raven2 (new APU) support - Vega20 enablement - ACP powergating improvements - Add ABGR/XBGR display support - VCN JPEG engine support - Initial xGMI support - Use load balancing for engine scheduling - Lots of new documentation - Rework and clean up i2c and aux handling in DC - Add DP YCbCr 4:2:0 support in DC - Add DMCU firmware loading for Raven (used for ABM and PSR) - New debugfs features in DC - LVDS support in DC - Implement wave kill for gfx/compute (light weight reset for shaders) - Use AGP aperture to avoid gart mappings when possible - GPUVM performance improvements - Bulk moves for more efficient GPUVM LRU handling - Merge amdgpu and amdkfd into one module - Enable gfxoff and stutter mode on Raven - Misc cleanups Scheduler: - Load balancing support - Bug fixes ttm: - Bulk move functionality - Bug fixes radeon: - Misc cleanups Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180920150438.12693-1-alexander.deucher@amd.com
2018-09-21 09:52:34 +10:00
parent 0320ac5188 846311ae68
commit 36c9c3c911
280 changed files with 81925 additions and 8278 deletions
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -50,7 +50,10 @@ enum drm_sched_priority {
 *
 * @list: used to append this struct to the list of entities in the
 *        runqueue.
- * @rq: runqueue to which this entity belongs.
+ * @rq: runqueue on which this entity is currently scheduled.
+ * @rq_list: a list of run queues on which jobs from this entity can
+ *           be scheduled
+ * @num_rq_list: number of run queues in the rq_list
 * @rq_lock: lock to modify the runqueue to which this entity belongs.
 * @job_queue: the list of jobs of this entity.
 * @fence_seq: a linearly increasing seqno incremented with each
@@ -67,6 +70,7 @@ enum drm_sched_priority {
 * @fini_status: contains the exit status in case the process was signalled.
 * @last_scheduled: points to the finished fence of the last scheduled job.
 * @last_user: last group leader pushing a job into the entity.
+ * @stopped: Marks the enity as removed from rq and destined for termination.
 *
 * Entities will emit jobs in order to their corresponding hardware
 * ring, and the scheduler will alternate between entities based on
@@ -75,6 +79,8 @@ enum drm_sched_priority {
 struct drm_sched_entity {
 	struct list_head		list;
 	struct drm_sched_rq		*rq;
+	struct drm_sched_rq		**rq_list;
+	unsigned int                    num_rq_list;
 	spinlock_t			rq_lock;

 	struct spsc_queue		job_queue;
@@ -87,6 +93,7 @@ struct drm_sched_entity {
 	atomic_t			*guilty;
 	struct dma_fence                *last_scheduled;
 	struct task_struct		*last_user;
+	bool 				stopped;
 };

 /**
@@ -257,6 +264,7 @@ struct drm_sched_backend_ops {
 * @job_list_lock: lock to protect the ring_mirror_list.
 * @hang_limit: once the hangs by a job crosses this limit then it is marked
 *              guilty and it will be considered for scheduling further.
+ * @num_jobs: the number of jobs in queue in the scheduler
 *
 * One scheduler is implemented for each hardware ring.
 */
@@ -274,6 +282,7 @@ struct drm_gpu_scheduler {
 	struct list_head		ring_mirror_list;
 	spinlock_t			job_list_lock;
 	int				hang_limit;
+	atomic_t                        num_jobs;
 };

 int drm_sched_init(struct drm_gpu_scheduler *sched,
@@ -281,6 +290,21 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   uint32_t hw_submission, unsigned hang_limit, long timeout,
 		   const char *name);
 void drm_sched_fini(struct drm_gpu_scheduler *sched);
+int drm_sched_job_init(struct drm_sched_job *job,
+		       struct drm_sched_entity *entity,
+		       void *owner);
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
+void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
+			    struct drm_sched_job *job);
+void drm_sched_job_recovery(struct drm_gpu_scheduler *sched);
+bool drm_sched_dependency_optimized(struct dma_fence* fence,
+				    struct drm_sched_entity *entity);
+void drm_sched_job_kickout(struct drm_sched_job *s_job);
+
+void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
+			     struct drm_sched_entity *entity);
+void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
+				struct drm_sched_entity *entity);

 int drm_sched_entity_init(struct drm_sched_entity *entity,
 			  struct drm_sched_rq **rq_list,
@@ -289,23 +313,17 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout);
 void drm_sched_entity_fini(struct drm_sched_entity *entity);
 void drm_sched_entity_destroy(struct drm_sched_entity *entity);
+void drm_sched_entity_select_rq(struct drm_sched_entity *entity);
+struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity);
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 			       struct drm_sched_entity *entity);
-void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
-			     struct drm_sched_rq *rq);
+void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
+				   enum drm_sched_priority priority);
+bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);

 struct drm_sched_fence *drm_sched_fence_create(
 	struct drm_sched_entity *s_entity, void *owner);
 void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
 void drm_sched_fence_finished(struct drm_sched_fence *fence);
-int drm_sched_job_init(struct drm_sched_job *job,
-		       struct drm_sched_entity *entity,
-		       void *owner);
-void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
-			    struct drm_sched_job *job);
-void drm_sched_job_recovery(struct drm_gpu_scheduler *sched);
-bool drm_sched_dependency_optimized(struct dma_fence* fence,
-				    struct drm_sched_entity *entity);
-void drm_sched_job_kickout(struct drm_sched_job *s_job);

 #endif
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -51,6 +51,8 @@ struct ttm_placement;

 struct ttm_place;

+struct ttm_lru_bulk_move;
+
 /**
 * struct ttm_bus_placement
 *
@@ -405,12 +407,24 @@ void ttm_bo_del_from_lru(struct ttm_buffer_object *bo);
 * ttm_bo_move_to_lru_tail
 *
 * @bo: The buffer object.
+ * @bulk: optional bulk move structure to remember BO positions
 *
 * Move this BO to the tail of all lru lists used to lookup and reserve an
 * object. This function must be called with struct ttm_bo_global::lru_lock
 * held, and is used to make a BO less likely to be considered for eviction.
 */
-void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo);
+void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
+			     struct ttm_lru_bulk_move *bulk);
+
+/**
+ * ttm_bo_bulk_move_lru_tail
+ *
+ * @bulk: bulk move structure
+ *
+ * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that
+ * BO order never changes. Should be called with ttm_bo_global::lru_lock held.
+ */
+void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk);

 /**
 * ttm_bo_lock_delayed_workqueue
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -490,6 +490,34 @@ struct ttm_bo_device {
 	bool no_retry;
 };

+/**
+ * struct ttm_lru_bulk_move_pos
+ *
+ * @first: first BO in the bulk move range
+ * @last: last BO in the bulk move range
+ *
+ * Positions for a lru bulk move.
+ */
+struct ttm_lru_bulk_move_pos {
+	struct ttm_buffer_object *first;
+	struct ttm_buffer_object *last;
+};
+
+/**
+ * struct ttm_lru_bulk_move
+ *
+ * @tt: first/last lru entry for BOs in the TT domain
+ * @vram: first/last lru entry for BOs in the VRAM domain
+ * @swap: first/last lru entry for BOs on the swap list
+ *
+ * Helper structure for bulk moves on the LRU list.
+ */
+struct ttm_lru_bulk_move {
+	struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
+	struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
+	struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
+};
+
 /**
 * ttm_flag_masked
 *