blk-mq-sched.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * blk-mq scheduling framework
  4. *
  5. * Copyright (C) 2016 Jens Axboe
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/module.h>
  9. #include <linux/blk-mq.h>
  10. #include <linux/list_sort.h>
  11. #include <trace/events/block.h>
  12. #include "blk.h"
  13. #include "blk-mq.h"
  14. #include "blk-mq-debugfs.h"
  15. #include "blk-mq-sched.h"
  16. #include "blk-mq-tag.h"
  17. #include "blk-wbt.h"
  18. /*
  19. * Mark a hardware queue as needing a restart.
  20. */
  21. void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
  22. {
  23. if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
  24. return;
  25. set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  26. }
  27. EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
  28. void __blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
  29. {
  30. clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  31. /*
  32. * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
  33. * in blk_mq_run_hw_queue(). Its pair is the barrier in
  34. * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
  35. * meantime new request added to hctx->dispatch is missed to check in
  36. * blk_mq_run_hw_queue().
  37. */
  38. smp_mb();
  39. blk_mq_run_hw_queue(hctx, true);
  40. }
  41. static int sched_rq_cmp(void *priv, const struct list_head *a,
  42. const struct list_head *b)
  43. {
  44. struct request *rqa = container_of(a, struct request, queuelist);
  45. struct request *rqb = container_of(b, struct request, queuelist);
  46. return rqa->mq_hctx > rqb->mq_hctx;
  47. }
  48. static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
  49. {
  50. struct blk_mq_hw_ctx *hctx =
  51. list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
  52. struct request *rq;
  53. LIST_HEAD(hctx_list);
  54. unsigned int count = 0;
  55. list_for_each_entry(rq, rq_list, queuelist) {
  56. if (rq->mq_hctx != hctx) {
  57. list_cut_before(&hctx_list, rq_list, &rq->queuelist);
  58. goto dispatch;
  59. }
  60. count++;
  61. }
  62. list_splice_tail_init(rq_list, &hctx_list);
  63. dispatch:
  64. return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
  65. }
  66. #define BLK_MQ_BUDGET_DELAY 3 /* ms units */
  67. /*
  68. * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
  69. * its queue by itself in its completion handler, so we don't need to
  70. * restart queue if .get_budget() fails to get the budget.
  71. *
  72. * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
  73. * be run again. This is necessary to avoid starving flushes.
  74. */
  75. static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
  76. {
  77. struct request_queue *q = hctx->queue;
  78. struct elevator_queue *e = q->elevator;
  79. bool multi_hctxs = false, run_queue = false;
  80. bool dispatched = false, busy = false;
  81. unsigned int max_dispatch;
  82. LIST_HEAD(rq_list);
  83. int count = 0;
  84. if (hctx->dispatch_busy)
  85. max_dispatch = 1;
  86. else
  87. max_dispatch = hctx->queue->nr_requests;
  88. do {
  89. struct request *rq;
  90. int budget_token;
  91. if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
  92. break;
  93. if (!list_empty_careful(&hctx->dispatch)) {
  94. busy = true;
  95. break;
  96. }
  97. budget_token = blk_mq_get_dispatch_budget(q);
  98. if (budget_token < 0)
  99. break;
  100. rq = e->type->ops.dispatch_request(hctx);
  101. if (!rq) {
  102. blk_mq_put_dispatch_budget(q, budget_token);
  103. /*
  104. * We're releasing without dispatching. Holding the
  105. * budget could have blocked any "hctx"s with the
  106. * same queue and if we didn't dispatch then there's
  107. * no guarantee anyone will kick the queue. Kick it
  108. * ourselves.
  109. */
  110. run_queue = true;
  111. break;
  112. }
  113. blk_mq_set_rq_budget_token(rq, budget_token);
  114. /*
  115. * Now this rq owns the budget which has to be released
  116. * if this rq won't be queued to driver via .queue_rq()
  117. * in blk_mq_dispatch_rq_list().
  118. */
  119. list_add_tail(&rq->queuelist, &rq_list);
  120. count++;
  121. if (rq->mq_hctx != hctx)
  122. multi_hctxs = true;
  123. /*
  124. * If we cannot get tag for the request, stop dequeueing
  125. * requests from the IO scheduler. We are unlikely to be able
  126. * to submit them anyway and it creates false impression for
  127. * scheduling heuristics that the device can take more IO.
  128. */
  129. if (!blk_mq_get_driver_tag(rq))
  130. break;
  131. } while (count < max_dispatch);
  132. if (!count) {
  133. if (run_queue)
  134. blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
  135. } else if (multi_hctxs) {
  136. /*
  137. * Requests from different hctx may be dequeued from some
  138. * schedulers, such as bfq and deadline.
  139. *
  140. * Sort the requests in the list according to their hctx,
  141. * dispatch batching requests from same hctx at a time.
  142. */
  143. list_sort(NULL, &rq_list, sched_rq_cmp);
  144. do {
  145. dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
  146. } while (!list_empty(&rq_list));
  147. } else {
  148. dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
  149. }
  150. if (busy)
  151. return -EAGAIN;
  152. return !!dispatched;
  153. }
  154. static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
  155. {
  156. unsigned long end = jiffies + HZ;
  157. int ret;
  158. do {
  159. ret = __blk_mq_do_dispatch_sched(hctx);
  160. if (ret != 1)
  161. break;
  162. if (need_resched() || time_is_before_jiffies(end)) {
  163. blk_mq_delay_run_hw_queue(hctx, 0);
  164. break;
  165. }
  166. } while (1);
  167. return ret;
  168. }
  169. static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
  170. struct blk_mq_ctx *ctx)
  171. {
  172. unsigned short idx = ctx->index_hw[hctx->type];
  173. if (++idx == hctx->nr_ctx)
  174. idx = 0;
  175. return hctx->ctxs[idx];
  176. }
  177. /*
  178. * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
  179. * its queue by itself in its completion handler, so we don't need to
  180. * restart queue if .get_budget() fails to get the budget.
  181. *
  182. * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
  183. * be run again. This is necessary to avoid starving flushes.
  184. */
  185. static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
  186. {
  187. struct request_queue *q = hctx->queue;
  188. LIST_HEAD(rq_list);
  189. struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
  190. int ret = 0;
  191. struct request *rq;
  192. do {
  193. int budget_token;
  194. if (!list_empty_careful(&hctx->dispatch)) {
  195. ret = -EAGAIN;
  196. break;
  197. }
  198. if (!sbitmap_any_bit_set(&hctx->ctx_map))
  199. break;
  200. budget_token = blk_mq_get_dispatch_budget(q);
  201. if (budget_token < 0)
  202. break;
  203. rq = blk_mq_dequeue_from_ctx(hctx, ctx);
  204. if (!rq) {
  205. blk_mq_put_dispatch_budget(q, budget_token);
  206. /*
  207. * We're releasing without dispatching. Holding the
  208. * budget could have blocked any "hctx"s with the
  209. * same queue and if we didn't dispatch then there's
  210. * no guarantee anyone will kick the queue. Kick it
  211. * ourselves.
  212. */
  213. blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
  214. break;
  215. }
  216. blk_mq_set_rq_budget_token(rq, budget_token);
  217. /*
  218. * Now this rq owns the budget which has to be released
  219. * if this rq won't be queued to driver via .queue_rq()
  220. * in blk_mq_dispatch_rq_list().
  221. */
  222. list_add(&rq->queuelist, &rq_list);
  223. /* round robin for fair dispatch */
  224. ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
  225. } while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
  226. WRITE_ONCE(hctx->dispatch_from, ctx);
  227. return ret;
  228. }
  229. static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
  230. {
  231. struct request_queue *q = hctx->queue;
  232. const bool has_sched = q->elevator;
  233. int ret = 0;
  234. LIST_HEAD(rq_list);
  235. /*
  236. * If we have previous entries on our dispatch list, grab them first for
  237. * more fair dispatch.
  238. */
  239. if (!list_empty_careful(&hctx->dispatch)) {
  240. spin_lock(&hctx->lock);
  241. if (!list_empty(&hctx->dispatch))
  242. list_splice_init(&hctx->dispatch, &rq_list);
  243. spin_unlock(&hctx->lock);
  244. }
  245. /*
  246. * Only ask the scheduler for requests, if we didn't have residual
  247. * requests from the dispatch list. This is to avoid the case where
  248. * we only ever dispatch a fraction of the requests available because
  249. * of low device queue depth. Once we pull requests out of the IO
  250. * scheduler, we can no longer merge or sort them. So it's best to
  251. * leave them there for as long as we can. Mark the hw queue as
  252. * needing a restart in that case.
  253. *
  254. * We want to dispatch from the scheduler if there was nothing
  255. * on the dispatch list or we were able to dispatch from the
  256. * dispatch list.
  257. */
  258. if (!list_empty(&rq_list)) {
  259. blk_mq_sched_mark_restart_hctx(hctx);
  260. if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
  261. if (has_sched)
  262. ret = blk_mq_do_dispatch_sched(hctx);
  263. else
  264. ret = blk_mq_do_dispatch_ctx(hctx);
  265. }
  266. } else if (has_sched) {
  267. ret = blk_mq_do_dispatch_sched(hctx);
  268. } else if (hctx->dispatch_busy) {
  269. /* dequeue request one by one from sw queue if queue is busy */
  270. ret = blk_mq_do_dispatch_ctx(hctx);
  271. } else {
  272. blk_mq_flush_busy_ctxs(hctx, &rq_list);
  273. blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
  274. }
  275. return ret;
  276. }
  277. void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
  278. {
  279. struct request_queue *q = hctx->queue;
  280. /* RCU or SRCU read lock is needed before checking quiesced flag */
  281. if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
  282. return;
  283. hctx->run++;
  284. /*
  285. * A return of -EAGAIN is an indication that hctx->dispatch is not
  286. * empty and we must run again in order to avoid starving flushes.
  287. */
  288. if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
  289. if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
  290. blk_mq_run_hw_queue(hctx, true);
  291. }
  292. }
  293. bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
  294. unsigned int nr_segs)
  295. {
  296. struct elevator_queue *e = q->elevator;
  297. struct blk_mq_ctx *ctx;
  298. struct blk_mq_hw_ctx *hctx;
  299. bool ret = false;
  300. enum hctx_type type;
  301. if (e && e->type->ops.bio_merge) {
  302. ret = e->type->ops.bio_merge(q, bio, nr_segs);
  303. goto out_put;
  304. }
  305. ctx = blk_mq_get_ctx(q);
  306. hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
  307. type = hctx->type;
  308. if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
  309. list_empty_careful(&ctx->rq_lists[type]))
  310. goto out_put;
  311. /* default per sw-queue merge */
  312. spin_lock(&ctx->lock);
  313. /*
  314. * Reverse check our software queue for entries that we could
  315. * potentially merge with. Currently includes a hand-wavy stop
  316. * count of 8, to not spend too much time checking for merges.
  317. */
  318. if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs))
  319. ret = true;
  320. spin_unlock(&ctx->lock);
  321. out_put:
  322. return ret;
  323. }
  324. bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
  325. struct list_head *free)
  326. {
  327. return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free);
  328. }
  329. EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
  330. static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
  331. struct request *rq)
  332. {
  333. /*
  334. * dispatch flush and passthrough rq directly
  335. *
  336. * passthrough request has to be added to hctx->dispatch directly.
  337. * For some reason, device may be in one situation which can't
  338. * handle FS request, so STS_RESOURCE is always returned and the
  339. * FS request will be added to hctx->dispatch. However passthrough
  340. * request may be required at that time for fixing the problem. If
  341. * passthrough request is added to scheduler queue, there isn't any
  342. * chance to dispatch it given we prioritize requests in hctx->dispatch.
  343. */
  344. if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
  345. return true;
  346. return false;
  347. }
  348. void blk_mq_sched_insert_request(struct request *rq, bool at_head,
  349. bool run_queue, bool async)
  350. {
  351. struct request_queue *q = rq->q;
  352. struct elevator_queue *e = q->elevator;
  353. struct blk_mq_ctx *ctx = rq->mq_ctx;
  354. struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
  355. WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
  356. if (blk_mq_sched_bypass_insert(hctx, rq)) {
  357. /*
  358. * Firstly normal IO request is inserted to scheduler queue or
  359. * sw queue, meantime we add flush request to dispatch queue(
  360. * hctx->dispatch) directly and there is at most one in-flight
  361. * flush request for each hw queue, so it doesn't matter to add
  362. * flush request to tail or front of the dispatch queue.
  363. *
  364. * Secondly in case of NCQ, flush request belongs to non-NCQ
  365. * command, and queueing it will fail when there is any
  366. * in-flight normal IO request(NCQ command). When adding flush
  367. * rq to the front of hctx->dispatch, it is easier to introduce
  368. * extra time to flush rq's latency because of S_SCHED_RESTART
  369. * compared with adding to the tail of dispatch queue, then
  370. * chance of flush merge is increased, and less flush requests
  371. * will be issued to controller. It is observed that ~10% time
  372. * is saved in blktests block/004 on disk attached to AHCI/NCQ
  373. * drive when adding flush rq to the front of hctx->dispatch.
  374. *
  375. * Simply queue flush rq to the front of hctx->dispatch so that
  376. * intensive flush workloads can benefit in case of NCQ HW.
  377. */
  378. at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
  379. blk_mq_request_bypass_insert(rq, at_head, false);
  380. goto run;
  381. }
  382. if (e) {
  383. LIST_HEAD(list);
  384. list_add(&rq->queuelist, &list);
  385. e->type->ops.insert_requests(hctx, &list, at_head);
  386. } else {
  387. spin_lock(&ctx->lock);
  388. __blk_mq_insert_request(hctx, rq, at_head);
  389. spin_unlock(&ctx->lock);
  390. }
  391. run:
  392. if (run_queue)
  393. blk_mq_run_hw_queue(hctx, async);
  394. }
  395. void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
  396. struct blk_mq_ctx *ctx,
  397. struct list_head *list, bool run_queue_async)
  398. {
  399. struct elevator_queue *e;
  400. struct request_queue *q = hctx->queue;
  401. /*
  402. * blk_mq_sched_insert_requests() is called from flush plug
  403. * context only, and hold one usage counter to prevent queue
  404. * from being released.
  405. */
  406. percpu_ref_get(&q->q_usage_counter);
  407. e = hctx->queue->elevator;
  408. if (e) {
  409. e->type->ops.insert_requests(hctx, list, false);
  410. } else {
  411. /*
  412. * try to issue requests directly if the hw queue isn't
  413. * busy in case of 'none' scheduler, and this way may save
  414. * us one extra enqueue & dequeue to sw queue.
  415. */
  416. if (!hctx->dispatch_busy && !run_queue_async) {
  417. blk_mq_run_dispatch_ops(hctx->queue,
  418. blk_mq_try_issue_list_directly(hctx, list));
  419. if (list_empty(list))
  420. goto out;
  421. }
  422. blk_mq_insert_requests(hctx, ctx, list);
  423. }
  424. blk_mq_run_hw_queue(hctx, run_queue_async);
  425. out:
  426. percpu_ref_put(&q->q_usage_counter);
  427. }
  428. static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
  429. struct blk_mq_hw_ctx *hctx,
  430. unsigned int hctx_idx)
  431. {
  432. if (blk_mq_is_shared_tags(q->tag_set->flags)) {
  433. hctx->sched_tags = q->sched_shared_tags;
  434. return 0;
  435. }
  436. hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx,
  437. q->nr_requests);
  438. if (!hctx->sched_tags)
  439. return -ENOMEM;
  440. return 0;
  441. }
  442. static void blk_mq_exit_sched_shared_tags(struct request_queue *queue)
  443. {
  444. blk_mq_free_rq_map(queue->sched_shared_tags);
  445. queue->sched_shared_tags = NULL;
  446. }
  447. /* called in queue's release handler, tagset has gone away */
  448. static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
  449. {
  450. struct blk_mq_hw_ctx *hctx;
  451. unsigned long i;
  452. queue_for_each_hw_ctx(q, hctx, i) {
  453. if (hctx->sched_tags) {
  454. if (!blk_mq_is_shared_tags(flags))
  455. blk_mq_free_rq_map(hctx->sched_tags);
  456. hctx->sched_tags = NULL;
  457. }
  458. }
  459. if (blk_mq_is_shared_tags(flags))
  460. blk_mq_exit_sched_shared_tags(q);
  461. }
  462. static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
  463. {
  464. struct blk_mq_tag_set *set = queue->tag_set;
  465. /*
  466. * Set initial depth at max so that we don't need to reallocate for
  467. * updating nr_requests.
  468. */
  469. queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set,
  470. BLK_MQ_NO_HCTX_IDX,
  471. MAX_SCHED_RQ);
  472. if (!queue->sched_shared_tags)
  473. return -ENOMEM;
  474. blk_mq_tag_update_sched_shared_tags(queue);
  475. return 0;
  476. }
  477. int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
  478. {
  479. unsigned int flags = q->tag_set->flags;
  480. struct blk_mq_hw_ctx *hctx;
  481. struct elevator_queue *eq;
  482. unsigned long i;
  483. int ret;
  484. if (!e) {
  485. blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
  486. q->elevator = NULL;
  487. q->nr_requests = q->tag_set->queue_depth;
  488. return 0;
  489. }
  490. /*
  491. * Default to double of smaller one between hw queue_depth and 128,
  492. * since we don't split into sync/async like the old code did.
  493. * Additionally, this is a per-hw queue depth.
  494. */
  495. q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
  496. BLKDEV_DEFAULT_RQ);
  497. if (blk_mq_is_shared_tags(flags)) {
  498. ret = blk_mq_init_sched_shared_tags(q);
  499. if (ret)
  500. return ret;
  501. }
  502. queue_for_each_hw_ctx(q, hctx, i) {
  503. ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
  504. if (ret)
  505. goto err_free_map_and_rqs;
  506. }
  507. ret = e->ops.init_sched(q, e);
  508. if (ret)
  509. goto err_free_map_and_rqs;
  510. mutex_lock(&q->debugfs_mutex);
  511. blk_mq_debugfs_register_sched(q);
  512. mutex_unlock(&q->debugfs_mutex);
  513. queue_for_each_hw_ctx(q, hctx, i) {
  514. if (e->ops.init_hctx) {
  515. ret = e->ops.init_hctx(hctx, i);
  516. if (ret) {
  517. eq = q->elevator;
  518. blk_mq_sched_free_rqs(q);
  519. blk_mq_exit_sched(q, eq);
  520. kobject_put(&eq->kobj);
  521. return ret;
  522. }
  523. }
  524. mutex_lock(&q->debugfs_mutex);
  525. blk_mq_debugfs_register_sched_hctx(q, hctx);
  526. mutex_unlock(&q->debugfs_mutex);
  527. }
  528. return 0;
  529. err_free_map_and_rqs:
  530. blk_mq_sched_free_rqs(q);
  531. blk_mq_sched_tags_teardown(q, flags);
  532. q->elevator = NULL;
  533. return ret;
  534. }
  535. /*
  536. * called in either blk_queue_cleanup or elevator_switch, tagset
  537. * is required for freeing requests
  538. */
  539. void blk_mq_sched_free_rqs(struct request_queue *q)
  540. {
  541. struct blk_mq_hw_ctx *hctx;
  542. unsigned long i;
  543. if (blk_mq_is_shared_tags(q->tag_set->flags)) {
  544. blk_mq_free_rqs(q->tag_set, q->sched_shared_tags,
  545. BLK_MQ_NO_HCTX_IDX);
  546. } else {
  547. queue_for_each_hw_ctx(q, hctx, i) {
  548. if (hctx->sched_tags)
  549. blk_mq_free_rqs(q->tag_set,
  550. hctx->sched_tags, i);
  551. }
  552. }
  553. }
  554. void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
  555. {
  556. struct blk_mq_hw_ctx *hctx;
  557. unsigned long i;
  558. unsigned int flags = 0;
  559. queue_for_each_hw_ctx(q, hctx, i) {
  560. mutex_lock(&q->debugfs_mutex);
  561. blk_mq_debugfs_unregister_sched_hctx(hctx);
  562. mutex_unlock(&q->debugfs_mutex);
  563. if (e->type->ops.exit_hctx && hctx->sched_data) {
  564. e->type->ops.exit_hctx(hctx, i);
  565. hctx->sched_data = NULL;
  566. }
  567. flags = hctx->flags;
  568. }
  569. mutex_lock(&q->debugfs_mutex);
  570. blk_mq_debugfs_unregister_sched(q);
  571. mutex_unlock(&q->debugfs_mutex);
  572. if (e->type->ops.exit_sched)
  573. e->type->ops.exit_sched(e);
  574. blk_mq_sched_tags_teardown(q, flags);
  575. q->elevator = NULL;
  576. }