blk.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef BLK_INTERNAL_H
  3. #define BLK_INTERNAL_H
  4. #include <linux/blk-crypto.h>
  5. #include <linux/memblock.h> /* for max_pfn/max_low_pfn */
  6. #include <xen/xen.h>
  7. #include "blk-crypto-internal.h"
  8. struct elevator_type;
  9. /* Max future timer expiry for timeouts */
  10. #define BLK_MAX_TIMEOUT (5 * HZ)
  11. extern struct dentry *blk_debugfs_root;
  12. struct blk_flush_queue {
  13. unsigned int flush_pending_idx:1;
  14. unsigned int flush_running_idx:1;
  15. blk_status_t rq_status;
  16. unsigned long flush_pending_since;
  17. struct list_head flush_queue[2];
  18. struct list_head flush_data_in_flight;
  19. struct request *flush_rq;
  20. spinlock_t mq_flush_lock;
  21. };
  22. extern struct kmem_cache *blk_requestq_cachep;
  23. extern struct kmem_cache *blk_requestq_srcu_cachep;
  24. extern struct kobj_type blk_queue_ktype;
  25. extern struct ida blk_queue_ida;
  26. bool is_flush_rq(struct request *req);
  27. struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
  28. gfp_t flags);
  29. void blk_free_flush_queue(struct blk_flush_queue *q);
  30. void blk_freeze_queue(struct request_queue *q);
  31. void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
  32. void blk_queue_start_drain(struct request_queue *q);
  33. int __bio_queue_enter(struct request_queue *q, struct bio *bio);
  34. void submit_bio_noacct_nocheck(struct bio *bio);
  35. static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
  36. {
  37. rcu_read_lock();
  38. if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
  39. goto fail;
  40. /*
  41. * The code that increments the pm_only counter must ensure that the
  42. * counter is globally visible before the queue is unfrozen.
  43. */
  44. if (blk_queue_pm_only(q) &&
  45. (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
  46. goto fail_put;
  47. rcu_read_unlock();
  48. return true;
  49. fail_put:
  50. blk_queue_exit(q);
  51. fail:
  52. rcu_read_unlock();
  53. return false;
  54. }
  55. static inline int bio_queue_enter(struct bio *bio)
  56. {
  57. struct request_queue *q = bdev_get_queue(bio->bi_bdev);
  58. if (blk_try_enter_queue(q, false))
  59. return 0;
  60. return __bio_queue_enter(q, bio);
  61. }
  62. #define BIO_INLINE_VECS 4
  63. struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
  64. gfp_t gfp_mask);
  65. void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs);
  66. static inline bool biovec_phys_mergeable(struct request_queue *q,
  67. struct bio_vec *vec1, struct bio_vec *vec2)
  68. {
  69. unsigned long mask = queue_segment_boundary(q);
  70. phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
  71. phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
  72. /*
  73. * Merging adjacent physical pages may not work correctly under KMSAN
  74. * if their metadata pages aren't adjacent. Just disable merging.
  75. */
  76. if (IS_ENABLED(CONFIG_KMSAN))
  77. return false;
  78. if (addr1 + vec1->bv_len != addr2)
  79. return false;
  80. if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page))
  81. return false;
  82. if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask))
  83. return false;
  84. return true;
  85. }
  86. static inline bool __bvec_gap_to_prev(struct queue_limits *lim,
  87. struct bio_vec *bprv, unsigned int offset)
  88. {
  89. return (offset & lim->virt_boundary_mask) ||
  90. ((bprv->bv_offset + bprv->bv_len) & lim->virt_boundary_mask);
  91. }
  92. /*
  93. * Check if adding a bio_vec after bprv with offset would create a gap in
  94. * the SG list. Most drivers don't care about this, but some do.
  95. */
  96. static inline bool bvec_gap_to_prev(struct queue_limits *lim,
  97. struct bio_vec *bprv, unsigned int offset)
  98. {
  99. if (!lim->virt_boundary_mask)
  100. return false;
  101. return __bvec_gap_to_prev(lim, bprv, offset);
  102. }
  103. static inline bool rq_mergeable(struct request *rq)
  104. {
  105. if (blk_rq_is_passthrough(rq))
  106. return false;
  107. if (req_op(rq) == REQ_OP_FLUSH)
  108. return false;
  109. if (req_op(rq) == REQ_OP_WRITE_ZEROES)
  110. return false;
  111. if (req_op(rq) == REQ_OP_ZONE_APPEND)
  112. return false;
  113. if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
  114. return false;
  115. if (rq->rq_flags & RQF_NOMERGE_FLAGS)
  116. return false;
  117. return true;
  118. }
  119. /*
  120. * There are two different ways to handle DISCARD merges:
  121. * 1) If max_discard_segments > 1, the driver treats every bio as a range and
  122. * send the bios to controller together. The ranges don't need to be
  123. * contiguous.
  124. * 2) Otherwise, the request will be normal read/write requests. The ranges
  125. * need to be contiguous.
  126. */
  127. static inline bool blk_discard_mergable(struct request *req)
  128. {
  129. if (req_op(req) == REQ_OP_DISCARD &&
  130. queue_max_discard_segments(req->q) > 1)
  131. return true;
  132. return false;
  133. }
  134. static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
  135. enum req_op op)
  136. {
  137. if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
  138. return min(q->limits.max_discard_sectors,
  139. UINT_MAX >> SECTOR_SHIFT);
  140. if (unlikely(op == REQ_OP_WRITE_ZEROES))
  141. return q->limits.max_write_zeroes_sectors;
  142. return q->limits.max_sectors;
  143. }
  144. #ifdef CONFIG_BLK_DEV_INTEGRITY
  145. void blk_flush_integrity(void);
  146. bool __bio_integrity_endio(struct bio *);
  147. void bio_integrity_free(struct bio *bio);
  148. static inline bool bio_integrity_endio(struct bio *bio)
  149. {
  150. if (bio_integrity(bio))
  151. return __bio_integrity_endio(bio);
  152. return true;
  153. }
  154. bool blk_integrity_merge_rq(struct request_queue *, struct request *,
  155. struct request *);
  156. bool blk_integrity_merge_bio(struct request_queue *, struct request *,
  157. struct bio *);
  158. static inline bool integrity_req_gap_back_merge(struct request *req,
  159. struct bio *next)
  160. {
  161. struct bio_integrity_payload *bip = bio_integrity(req->bio);
  162. struct bio_integrity_payload *bip_next = bio_integrity(next);
  163. return bvec_gap_to_prev(&req->q->limits,
  164. &bip->bip_vec[bip->bip_vcnt - 1],
  165. bip_next->bip_vec[0].bv_offset);
  166. }
  167. static inline bool integrity_req_gap_front_merge(struct request *req,
  168. struct bio *bio)
  169. {
  170. struct bio_integrity_payload *bip = bio_integrity(bio);
  171. struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
  172. return bvec_gap_to_prev(&req->q->limits,
  173. &bip->bip_vec[bip->bip_vcnt - 1],
  174. bip_next->bip_vec[0].bv_offset);
  175. }
  176. int blk_integrity_add(struct gendisk *disk);
  177. void blk_integrity_del(struct gendisk *);
  178. #else /* CONFIG_BLK_DEV_INTEGRITY */
  179. static inline bool blk_integrity_merge_rq(struct request_queue *rq,
  180. struct request *r1, struct request *r2)
  181. {
  182. return true;
  183. }
  184. static inline bool blk_integrity_merge_bio(struct request_queue *rq,
  185. struct request *r, struct bio *b)
  186. {
  187. return true;
  188. }
  189. static inline bool integrity_req_gap_back_merge(struct request *req,
  190. struct bio *next)
  191. {
  192. return false;
  193. }
  194. static inline bool integrity_req_gap_front_merge(struct request *req,
  195. struct bio *bio)
  196. {
  197. return false;
  198. }
  199. static inline void blk_flush_integrity(void)
  200. {
  201. }
  202. static inline bool bio_integrity_endio(struct bio *bio)
  203. {
  204. return true;
  205. }
  206. static inline void bio_integrity_free(struct bio *bio)
  207. {
  208. }
  209. static inline int blk_integrity_add(struct gendisk *disk)
  210. {
  211. return 0;
  212. }
  213. static inline void blk_integrity_del(struct gendisk *disk)
  214. {
  215. }
  216. #endif /* CONFIG_BLK_DEV_INTEGRITY */
  217. unsigned long blk_rq_timeout(unsigned long timeout);
  218. void blk_add_timer(struct request *req);
  219. const char *blk_status_to_str(blk_status_t status);
  220. bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
  221. unsigned int nr_segs);
  222. bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
  223. struct bio *bio, unsigned int nr_segs);
  224. /*
  225. * Plug flush limits
  226. */
  227. #define BLK_MAX_REQUEST_COUNT 32
  228. #define BLK_PLUG_FLUSH_SIZE (128 * 1024)
  229. /*
  230. * Internal elevator interface
  231. */
  232. #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
  233. void blk_insert_flush(struct request *rq);
  234. int elevator_switch(struct request_queue *q, struct elevator_type *new_e);
  235. void elevator_exit(struct request_queue *q);
  236. int elv_register_queue(struct request_queue *q, bool uevent);
  237. void elv_unregister_queue(struct request_queue *q);
  238. ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
  239. char *buf);
  240. ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
  241. char *buf);
  242. ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
  243. char *buf);
  244. ssize_t part_fail_show(struct device *dev, struct device_attribute *attr,
  245. char *buf);
  246. ssize_t part_fail_store(struct device *dev, struct device_attribute *attr,
  247. const char *buf, size_t count);
  248. ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
  249. ssize_t part_timeout_store(struct device *, struct device_attribute *,
  250. const char *, size_t);
  251. static inline bool bio_may_exceed_limits(struct bio *bio,
  252. struct queue_limits *lim)
  253. {
  254. switch (bio_op(bio)) {
  255. case REQ_OP_DISCARD:
  256. case REQ_OP_SECURE_ERASE:
  257. case REQ_OP_WRITE_ZEROES:
  258. return true; /* non-trivial splitting decisions */
  259. default:
  260. break;
  261. }
  262. /*
  263. * All drivers must accept single-segments bios that are <= PAGE_SIZE.
  264. * This is a quick and dirty check that relies on the fact that
  265. * bi_io_vec[0] is always valid if a bio has data. The check might
  266. * lead to occasional false negatives when bios are cloned, but compared
  267. * to the performance impact of cloned bios themselves the loop below
  268. * doesn't matter anyway.
  269. */
  270. return lim->chunk_sectors || bio->bi_vcnt != 1 ||
  271. bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
  272. }
  273. struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim,
  274. unsigned int *nr_segs);
  275. int ll_back_merge_fn(struct request *req, struct bio *bio,
  276. unsigned int nr_segs);
  277. bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
  278. struct request *next);
  279. unsigned int blk_recalc_rq_segments(struct request *rq);
  280. void blk_rq_set_mixed_merge(struct request *rq);
  281. bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
  282. enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
  283. void blk_set_default_limits(struct queue_limits *lim);
  284. int blk_dev_init(void);
  285. /*
  286. * Contribute to IO statistics IFF:
  287. *
  288. * a) it's attached to a gendisk, and
  289. * b) the queue had IO stats enabled when this request was started
  290. */
  291. static inline bool blk_do_io_stat(struct request *rq)
  292. {
  293. return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq);
  294. }
  295. void update_io_ticks(struct block_device *part, unsigned long now, bool end);
  296. static inline void req_set_nomerge(struct request_queue *q, struct request *req)
  297. {
  298. req->cmd_flags |= REQ_NOMERGE;
  299. if (req == q->last_merge)
  300. q->last_merge = NULL;
  301. }
  302. /*
  303. * Internal io_context interface
  304. */
  305. struct io_cq *ioc_find_get_icq(struct request_queue *q);
  306. struct io_cq *ioc_lookup_icq(struct request_queue *q);
  307. #ifdef CONFIG_BLK_ICQ
  308. void ioc_clear_queue(struct request_queue *q);
  309. #else
  310. static inline void ioc_clear_queue(struct request_queue *q)
  311. {
  312. }
  313. #endif /* CONFIG_BLK_ICQ */
  314. #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
  315. extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
  316. extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
  317. const char *page, size_t count);
  318. extern void blk_throtl_bio_endio(struct bio *bio);
  319. extern void blk_throtl_stat_add(struct request *rq, u64 time);
  320. #else
  321. static inline void blk_throtl_bio_endio(struct bio *bio) { }
  322. static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
  323. #endif
  324. struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
  325. static inline bool blk_queue_may_bounce(struct request_queue *q)
  326. {
  327. return IS_ENABLED(CONFIG_BOUNCE) &&
  328. q->limits.bounce == BLK_BOUNCE_HIGH &&
  329. max_low_pfn >= max_pfn;
  330. }
  331. static inline struct bio *blk_queue_bounce(struct bio *bio,
  332. struct request_queue *q)
  333. {
  334. if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio)))
  335. return __blk_queue_bounce(bio, q);
  336. return bio;
  337. }
  338. #ifdef CONFIG_BLK_CGROUP_IOLATENCY
  339. int blk_iolatency_init(struct gendisk *disk);
  340. #else
  341. static inline int blk_iolatency_init(struct gendisk *disk) { return 0; };
  342. #endif
  343. #ifdef CONFIG_BLK_DEV_ZONED
  344. void disk_free_zone_bitmaps(struct gendisk *disk);
  345. void disk_clear_zone_settings(struct gendisk *disk);
  346. #else
  347. static inline void disk_free_zone_bitmaps(struct gendisk *disk) {}
  348. static inline void disk_clear_zone_settings(struct gendisk *disk) {}
  349. #endif
  350. int blk_alloc_ext_minor(void);
  351. void blk_free_ext_minor(unsigned int minor);
  352. #define ADDPART_FLAG_NONE 0
  353. #define ADDPART_FLAG_RAID 1
  354. #define ADDPART_FLAG_WHOLEDISK 2
  355. int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
  356. sector_t length);
  357. int bdev_del_partition(struct gendisk *disk, int partno);
  358. int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
  359. sector_t length);
  360. void blk_drop_partitions(struct gendisk *disk);
  361. struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
  362. struct lock_class_key *lkclass);
  363. int bio_add_hw_page(struct request_queue *q, struct bio *bio,
  364. struct page *page, unsigned int len, unsigned int offset,
  365. unsigned int max_sectors, bool *same_page);
  366. static inline struct kmem_cache *blk_get_queue_kmem_cache(bool srcu)
  367. {
  368. if (srcu)
  369. return blk_requestq_srcu_cachep;
  370. return blk_requestq_cachep;
  371. }
  372. struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu);
  373. int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
  374. int disk_alloc_events(struct gendisk *disk);
  375. void disk_add_events(struct gendisk *disk);
  376. void disk_del_events(struct gendisk *disk);
  377. void disk_release_events(struct gendisk *disk);
  378. void disk_block_events(struct gendisk *disk);
  379. void disk_unblock_events(struct gendisk *disk);
  380. void disk_flush_events(struct gendisk *disk, unsigned int mask);
  381. extern struct device_attribute dev_attr_events;
  382. extern struct device_attribute dev_attr_events_async;
  383. extern struct device_attribute dev_attr_events_poll_msecs;
  384. extern struct attribute_group blk_trace_attr_group;
  385. long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
  386. long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
  387. extern const struct address_space_operations def_blk_aops;
  388. int disk_register_independent_access_ranges(struct gendisk *disk);
  389. void disk_unregister_independent_access_ranges(struct gendisk *disk);
  390. #ifdef CONFIG_FAIL_MAKE_REQUEST
  391. bool should_fail_request(struct block_device *part, unsigned int bytes);
  392. #else /* CONFIG_FAIL_MAKE_REQUEST */
  393. static inline bool should_fail_request(struct block_device *part,
  394. unsigned int bytes)
  395. {
  396. return false;
  397. }
  398. #endif /* CONFIG_FAIL_MAKE_REQUEST */
  399. /*
  400. * Optimized request reference counting. Ideally we'd make timeouts be more
  401. * clever, as that's the only reason we need references at all... But until
  402. * this happens, this is faster than using refcount_t. Also see:
  403. *
  404. * abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count")
  405. */
  406. #define req_ref_zero_or_close_to_overflow(req) \
  407. ((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u)
  408. static inline bool req_ref_inc_not_zero(struct request *req)
  409. {
  410. return atomic_inc_not_zero(&req->ref);
  411. }
  412. static inline bool req_ref_put_and_test(struct request *req)
  413. {
  414. WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
  415. return atomic_dec_and_test(&req->ref);
  416. }
  417. static inline void req_ref_set(struct request *req, int value)
  418. {
  419. atomic_set(&req->ref, value);
  420. }
  421. static inline int req_ref_read(struct request *req)
  422. {
  423. return atomic_read(&req->ref);
  424. }
  425. #endif /* BLK_INTERNAL_H */