i915_scheduler.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. /*
  2. * SPDX-License-Identifier: MIT
  3. *
  4. * Copyright © 2018 Intel Corporation
  5. */
  6. #include <linux/mutex.h>
  7. #include "i915_drv.h"
  8. #include "i915_request.h"
  9. #include "i915_scheduler.h"
  10. static struct kmem_cache *slab_dependencies;
  11. static struct kmem_cache *slab_priorities;
  12. static DEFINE_SPINLOCK(schedule_lock);
  13. static const struct i915_request *
  14. node_to_request(const struct i915_sched_node *node)
  15. {
  16. return container_of(node, const struct i915_request, sched);
  17. }
  18. static inline bool node_started(const struct i915_sched_node *node)
  19. {
  20. return i915_request_started(node_to_request(node));
  21. }
  22. static inline bool node_signaled(const struct i915_sched_node *node)
  23. {
  24. return i915_request_completed(node_to_request(node));
  25. }
  26. static inline struct i915_priolist *to_priolist(struct rb_node *rb)
  27. {
  28. return rb_entry(rb, struct i915_priolist, node);
  29. }
  30. static void assert_priolists(struct i915_sched_engine * const sched_engine)
  31. {
  32. struct rb_node *rb;
  33. long last_prio;
  34. if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
  35. return;
  36. GEM_BUG_ON(rb_first_cached(&sched_engine->queue) !=
  37. rb_first(&sched_engine->queue.rb_root));
  38. last_prio = INT_MAX;
  39. for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) {
  40. const struct i915_priolist *p = to_priolist(rb);
  41. GEM_BUG_ON(p->priority > last_prio);
  42. last_prio = p->priority;
  43. }
  44. }
  45. struct list_head *
  46. i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio)
  47. {
  48. struct i915_priolist *p;
  49. struct rb_node **parent, *rb;
  50. bool first = true;
  51. lockdep_assert_held(&sched_engine->lock);
  52. assert_priolists(sched_engine);
  53. if (unlikely(sched_engine->no_priolist))
  54. prio = I915_PRIORITY_NORMAL;
  55. find_priolist:
  56. /* most positive priority is scheduled first, equal priorities fifo */
  57. rb = NULL;
  58. parent = &sched_engine->queue.rb_root.rb_node;
  59. while (*parent) {
  60. rb = *parent;
  61. p = to_priolist(rb);
  62. if (prio > p->priority) {
  63. parent = &rb->rb_left;
  64. } else if (prio < p->priority) {
  65. parent = &rb->rb_right;
  66. first = false;
  67. } else {
  68. return &p->requests;
  69. }
  70. }
  71. if (prio == I915_PRIORITY_NORMAL) {
  72. p = &sched_engine->default_priolist;
  73. } else {
  74. p = kmem_cache_alloc(slab_priorities, GFP_ATOMIC);
  75. /* Convert an allocation failure to a priority bump */
  76. if (unlikely(!p)) {
  77. prio = I915_PRIORITY_NORMAL; /* recurses just once */
  78. /* To maintain ordering with all rendering, after an
  79. * allocation failure we have to disable all scheduling.
  80. * Requests will then be executed in fifo, and schedule
  81. * will ensure that dependencies are emitted in fifo.
  82. * There will be still some reordering with existing
  83. * requests, so if userspace lied about their
  84. * dependencies that reordering may be visible.
  85. */
  86. sched_engine->no_priolist = true;
  87. goto find_priolist;
  88. }
  89. }
  90. p->priority = prio;
  91. INIT_LIST_HEAD(&p->requests);
  92. rb_link_node(&p->node, rb, parent);
  93. rb_insert_color_cached(&p->node, &sched_engine->queue, first);
  94. return &p->requests;
  95. }
  96. void __i915_priolist_free(struct i915_priolist *p)
  97. {
  98. kmem_cache_free(slab_priorities, p);
  99. }
  100. struct sched_cache {
  101. struct list_head *priolist;
  102. };
  103. static struct i915_sched_engine *
  104. lock_sched_engine(struct i915_sched_node *node,
  105. struct i915_sched_engine *locked,
  106. struct sched_cache *cache)
  107. {
  108. const struct i915_request *rq = node_to_request(node);
  109. struct i915_sched_engine *sched_engine;
  110. GEM_BUG_ON(!locked);
  111. /*
  112. * Virtual engines complicate acquiring the engine timeline lock,
  113. * as their rq->engine pointer is not stable until under that
  114. * engine lock. The simple ploy we use is to take the lock then
  115. * check that the rq still belongs to the newly locked engine.
  116. */
  117. while (locked != (sched_engine = READ_ONCE(rq->engine)->sched_engine)) {
  118. spin_unlock(&locked->lock);
  119. memset(cache, 0, sizeof(*cache));
  120. spin_lock(&sched_engine->lock);
  121. locked = sched_engine;
  122. }
  123. GEM_BUG_ON(locked != sched_engine);
  124. return locked;
  125. }
  126. static void __i915_schedule(struct i915_sched_node *node,
  127. const struct i915_sched_attr *attr)
  128. {
  129. const int prio = max(attr->priority, node->attr.priority);
  130. struct i915_sched_engine *sched_engine;
  131. struct i915_dependency *dep, *p;
  132. struct i915_dependency stack;
  133. struct sched_cache cache;
  134. LIST_HEAD(dfs);
  135. /* Needed in order to use the temporary link inside i915_dependency */
  136. lockdep_assert_held(&schedule_lock);
  137. GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
  138. if (node_signaled(node))
  139. return;
  140. stack.signaler = node;
  141. list_add(&stack.dfs_link, &dfs);
  142. /*
  143. * Recursively bump all dependent priorities to match the new request.
  144. *
  145. * A naive approach would be to use recursion:
  146. * static void update_priorities(struct i915_sched_node *node, prio) {
  147. * list_for_each_entry(dep, &node->signalers_list, signal_link)
  148. * update_priorities(dep->signal, prio)
  149. * queue_request(node);
  150. * }
  151. * but that may have unlimited recursion depth and so runs a very
  152. * real risk of overunning the kernel stack. Instead, we build
  153. * a flat list of all dependencies starting with the current request.
  154. * As we walk the list of dependencies, we add all of its dependencies
  155. * to the end of the list (this may include an already visited
  156. * request) and continue to walk onwards onto the new dependencies. The
  157. * end result is a topological list of requests in reverse order, the
  158. * last element in the list is the request we must execute first.
  159. */
  160. list_for_each_entry(dep, &dfs, dfs_link) {
  161. struct i915_sched_node *node = dep->signaler;
  162. /* If we are already flying, we know we have no signalers */
  163. if (node_started(node))
  164. continue;
  165. /*
  166. * Within an engine, there can be no cycle, but we may
  167. * refer to the same dependency chain multiple times
  168. * (redundant dependencies are not eliminated) and across
  169. * engines.
  170. */
  171. list_for_each_entry(p, &node->signalers_list, signal_link) {
  172. GEM_BUG_ON(p == dep); /* no cycles! */
  173. if (node_signaled(p->signaler))
  174. continue;
  175. if (prio > READ_ONCE(p->signaler->attr.priority))
  176. list_move_tail(&p->dfs_link, &dfs);
  177. }
  178. }
  179. /*
  180. * If we didn't need to bump any existing priorities, and we haven't
  181. * yet submitted this request (i.e. there is no potential race with
  182. * execlists_submit_request()), we can set our own priority and skip
  183. * acquiring the engine locks.
  184. */
  185. if (node->attr.priority == I915_PRIORITY_INVALID) {
  186. GEM_BUG_ON(!list_empty(&node->link));
  187. node->attr = *attr;
  188. if (stack.dfs_link.next == stack.dfs_link.prev)
  189. return;
  190. __list_del_entry(&stack.dfs_link);
  191. }
  192. memset(&cache, 0, sizeof(cache));
  193. sched_engine = node_to_request(node)->engine->sched_engine;
  194. spin_lock(&sched_engine->lock);
  195. /* Fifo and depth-first replacement ensure our deps execute before us */
  196. sched_engine = lock_sched_engine(node, sched_engine, &cache);
  197. list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
  198. struct i915_request *from = container_of(dep->signaler,
  199. struct i915_request,
  200. sched);
  201. INIT_LIST_HEAD(&dep->dfs_link);
  202. node = dep->signaler;
  203. sched_engine = lock_sched_engine(node, sched_engine, &cache);
  204. lockdep_assert_held(&sched_engine->lock);
  205. /* Recheck after acquiring the engine->timeline.lock */
  206. if (prio <= node->attr.priority || node_signaled(node))
  207. continue;
  208. GEM_BUG_ON(node_to_request(node)->engine->sched_engine !=
  209. sched_engine);
  210. /* Must be called before changing the nodes priority */
  211. if (sched_engine->bump_inflight_request_prio)
  212. sched_engine->bump_inflight_request_prio(from, prio);
  213. WRITE_ONCE(node->attr.priority, prio);
  214. /*
  215. * Once the request is ready, it will be placed into the
  216. * priority lists and then onto the HW runlist. Before the
  217. * request is ready, it does not contribute to our preemption
  218. * decisions and we can safely ignore it, as it will, and
  219. * any preemption required, be dealt with upon submission.
  220. * See engine->submit_request()
  221. */
  222. if (list_empty(&node->link))
  223. continue;
  224. if (i915_request_in_priority_queue(node_to_request(node))) {
  225. if (!cache.priolist)
  226. cache.priolist =
  227. i915_sched_lookup_priolist(sched_engine,
  228. prio);
  229. list_move_tail(&node->link, cache.priolist);
  230. }
  231. /* Defer (tasklet) submission until after all of our updates. */
  232. if (sched_engine->kick_backend)
  233. sched_engine->kick_backend(node_to_request(node), prio);
  234. }
  235. spin_unlock(&sched_engine->lock);
  236. }
  237. void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
  238. {
  239. spin_lock_irq(&schedule_lock);
  240. __i915_schedule(&rq->sched, attr);
  241. spin_unlock_irq(&schedule_lock);
  242. }
  243. void i915_sched_node_init(struct i915_sched_node *node)
  244. {
  245. INIT_LIST_HEAD(&node->signalers_list);
  246. INIT_LIST_HEAD(&node->waiters_list);
  247. INIT_LIST_HEAD(&node->link);
  248. i915_sched_node_reinit(node);
  249. }
  250. void i915_sched_node_reinit(struct i915_sched_node *node)
  251. {
  252. node->attr.priority = I915_PRIORITY_INVALID;
  253. node->semaphores = 0;
  254. node->flags = 0;
  255. GEM_BUG_ON(!list_empty(&node->signalers_list));
  256. GEM_BUG_ON(!list_empty(&node->waiters_list));
  257. GEM_BUG_ON(!list_empty(&node->link));
  258. }
  259. static struct i915_dependency *
  260. i915_dependency_alloc(void)
  261. {
  262. return kmem_cache_alloc(slab_dependencies, GFP_KERNEL);
  263. }
  264. static void
  265. i915_dependency_free(struct i915_dependency *dep)
  266. {
  267. kmem_cache_free(slab_dependencies, dep);
  268. }
  269. bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
  270. struct i915_sched_node *signal,
  271. struct i915_dependency *dep,
  272. unsigned long flags)
  273. {
  274. bool ret = false;
  275. spin_lock_irq(&schedule_lock);
  276. if (!node_signaled(signal)) {
  277. INIT_LIST_HEAD(&dep->dfs_link);
  278. dep->signaler = signal;
  279. dep->waiter = node;
  280. dep->flags = flags;
  281. /* All set, now publish. Beware the lockless walkers. */
  282. list_add_rcu(&dep->signal_link, &node->signalers_list);
  283. list_add_rcu(&dep->wait_link, &signal->waiters_list);
  284. /* Propagate the chains */
  285. node->flags |= signal->flags;
  286. ret = true;
  287. }
  288. spin_unlock_irq(&schedule_lock);
  289. return ret;
  290. }
  291. int i915_sched_node_add_dependency(struct i915_sched_node *node,
  292. struct i915_sched_node *signal,
  293. unsigned long flags)
  294. {
  295. struct i915_dependency *dep;
  296. dep = i915_dependency_alloc();
  297. if (!dep)
  298. return -ENOMEM;
  299. if (!__i915_sched_node_add_dependency(node, signal, dep,
  300. flags | I915_DEPENDENCY_ALLOC))
  301. i915_dependency_free(dep);
  302. return 0;
  303. }
  304. void i915_sched_node_fini(struct i915_sched_node *node)
  305. {
  306. struct i915_dependency *dep, *tmp;
  307. spin_lock_irq(&schedule_lock);
  308. /*
  309. * Everyone we depended upon (the fences we wait to be signaled)
  310. * should retire before us and remove themselves from our list.
  311. * However, retirement is run independently on each timeline and
  312. * so we may be called out-of-order.
  313. */
  314. list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
  315. GEM_BUG_ON(!list_empty(&dep->dfs_link));
  316. list_del_rcu(&dep->wait_link);
  317. if (dep->flags & I915_DEPENDENCY_ALLOC)
  318. i915_dependency_free(dep);
  319. }
  320. INIT_LIST_HEAD(&node->signalers_list);
  321. /* Remove ourselves from everyone who depends upon us */
  322. list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
  323. GEM_BUG_ON(dep->signaler != node);
  324. GEM_BUG_ON(!list_empty(&dep->dfs_link));
  325. list_del_rcu(&dep->signal_link);
  326. if (dep->flags & I915_DEPENDENCY_ALLOC)
  327. i915_dependency_free(dep);
  328. }
  329. INIT_LIST_HEAD(&node->waiters_list);
  330. spin_unlock_irq(&schedule_lock);
  331. }
  332. void i915_request_show_with_schedule(struct drm_printer *m,
  333. const struct i915_request *rq,
  334. const char *prefix,
  335. int indent)
  336. {
  337. struct i915_dependency *dep;
  338. i915_request_show(m, rq, prefix, indent);
  339. if (i915_request_completed(rq))
  340. return;
  341. rcu_read_lock();
  342. for_each_signaler(dep, rq) {
  343. const struct i915_request *signaler =
  344. node_to_request(dep->signaler);
  345. /* Dependencies along the same timeline are expected. */
  346. if (signaler->timeline == rq->timeline)
  347. continue;
  348. if (__i915_request_is_complete(signaler))
  349. continue;
  350. i915_request_show(m, signaler, prefix, indent + 2);
  351. }
  352. rcu_read_unlock();
  353. }
  354. static void default_destroy(struct kref *kref)
  355. {
  356. struct i915_sched_engine *sched_engine =
  357. container_of(kref, typeof(*sched_engine), ref);
  358. tasklet_kill(&sched_engine->tasklet); /* flush the callback */
  359. kfree(sched_engine);
  360. }
  361. static bool default_disabled(struct i915_sched_engine *sched_engine)
  362. {
  363. return false;
  364. }
  365. struct i915_sched_engine *
  366. i915_sched_engine_create(unsigned int subclass)
  367. {
  368. struct i915_sched_engine *sched_engine;
  369. sched_engine = kzalloc(sizeof(*sched_engine), GFP_KERNEL);
  370. if (!sched_engine)
  371. return NULL;
  372. kref_init(&sched_engine->ref);
  373. sched_engine->queue = RB_ROOT_CACHED;
  374. sched_engine->queue_priority_hint = INT_MIN;
  375. sched_engine->destroy = default_destroy;
  376. sched_engine->disabled = default_disabled;
  377. INIT_LIST_HEAD(&sched_engine->requests);
  378. INIT_LIST_HEAD(&sched_engine->hold);
  379. spin_lock_init(&sched_engine->lock);
  380. lockdep_set_subclass(&sched_engine->lock, subclass);
  381. /*
  382. * Due to an interesting quirk in lockdep's internal debug tracking,
  383. * after setting a subclass we must ensure the lock is used. Otherwise,
  384. * nr_unused_locks is incremented once too often.
  385. */
  386. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  387. local_irq_disable();
  388. lock_map_acquire(&sched_engine->lock.dep_map);
  389. lock_map_release(&sched_engine->lock.dep_map);
  390. local_irq_enable();
  391. #endif
  392. return sched_engine;
  393. }
  394. void i915_scheduler_module_exit(void)
  395. {
  396. kmem_cache_destroy(slab_dependencies);
  397. kmem_cache_destroy(slab_priorities);
  398. }
  399. int __init i915_scheduler_module_init(void)
  400. {
  401. slab_dependencies = KMEM_CACHE(i915_dependency,
  402. SLAB_HWCACHE_ALIGN |
  403. SLAB_TYPESAFE_BY_RCU);
  404. if (!slab_dependencies)
  405. return -ENOMEM;
  406. slab_priorities = KMEM_CACHE(i915_priolist, 0);
  407. if (!slab_priorities)
  408. goto err_priorities;
  409. return 0;
  410. err_priorities:
  411. kmem_cache_destroy(slab_priorities);
  412. return -ENOMEM;
  413. }