Merge tag 'for-linus-20190715' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: "A later pull request with some followup items. I had some vacation coming up to the merge window, so certain things items were delayed a bit. This pull request also contains fixes that came in within the last few days of the merge window, which I didn't want to push right before sending you a pull request. This contains: - NVMe pull request, mostly fixes, but also a few minor items on the feature side that were timing constrained (Christoph et al) - Report zones fixes (Damien) - Removal of dead code (Damien) - Turn on cgroup psi memstall (Josef) - block cgroup MAINTAINERS entry (Konstantin) - Flush init fix (Josef) - blk-throttle low iops timing fix (Konstantin) - nbd resize fixes (Mike) - nbd 0 blocksize crash fix (Xiubo) - block integrity error leak fix (Wenwen) - blk-cgroup writeback and priority inheritance fixes (Tejun)" * tag 'for-linus-20190715' of git://git.kernel.dk/linux-block: (42 commits) MAINTAINERS: add entry for block io cgroup null_blk: fixup ->report_zones() for !CONFIG_BLK_DEV_ZONED block: Limit zone array allocation size sd_zbc: Fix report zones buffer allocation block: Kill gfp_t argument of blkdev_report_zones() block: Allow mapping of vmalloc-ed buffers block/bio-integrity: fix a memory leak bug nvme: fix NULL deref for fabrics options nbd: add netlink reconfigure resize support nbd: fix crash when the blksize is zero block: Disable write plugging for zoned block devices block: Fix elevator name declaration block: Remove unused definitions nvme: fix regression upon hot device removal and insertion blk-throttle: fix zero wait time for iops throttled group block: Fix potential overflow in blk_report_zones() blkcg: implement REQ_CGROUP_PUNT blkcg, writeback: Implement wbc_blkcg_css() blkcg, writeback: Add wbc->no_cgroup_owner blkcg, writeback: Rename wbc_account_io() to wbc_account_cgroup_owner() ...
This commit is contained in:
@@ -48,6 +48,7 @@ extern spinlock_t bdi_lock;
|
||||
extern struct list_head bdi_list;
|
||||
|
||||
extern struct workqueue_struct *bdi_wq;
|
||||
extern struct workqueue_struct *bdi_async_bio_wq;
|
||||
|
||||
static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
|
||||
{
|
||||
|
@@ -132,13 +132,17 @@ struct blkcg_gq {
|
||||
|
||||
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
|
||||
|
||||
struct rcu_head rcu_head;
|
||||
spinlock_t async_bio_lock;
|
||||
struct bio_list async_bios;
|
||||
struct work_struct async_bio_work;
|
||||
|
||||
atomic_t use_delay;
|
||||
atomic64_t delay_nsec;
|
||||
atomic64_t delay_start;
|
||||
u64 last_delay;
|
||||
int last_use;
|
||||
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
|
||||
@@ -701,6 +705,15 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
|
||||
struct bio *bio) { return false; }
|
||||
#endif
|
||||
|
||||
bool __blkcg_punt_bio_submit(struct bio *bio);
|
||||
|
||||
static inline bool blkcg_punt_bio_submit(struct bio *bio)
|
||||
{
|
||||
if (bio->bi_opf & REQ_CGROUP_PUNT)
|
||||
return __blkcg_punt_bio_submit(bio);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void blkcg_bio_issue_init(struct bio *bio)
|
||||
{
|
||||
@@ -848,6 +861,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
|
||||
static inline void blkg_get(struct blkcg_gq *blkg) { }
|
||||
static inline void blkg_put(struct blkcg_gq *blkg) { }
|
||||
|
||||
static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
|
||||
static inline void blkcg_bio_issue_init(struct bio *bio) { }
|
||||
static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
||||
struct bio *bio) { return true; }
|
||||
|
@@ -311,6 +311,14 @@ enum req_flag_bits {
|
||||
__REQ_RAHEAD, /* read ahead, can fail anytime */
|
||||
__REQ_BACKGROUND, /* background IO */
|
||||
__REQ_NOWAIT, /* Don't wait if request will block */
|
||||
/*
|
||||
* When a shared kthread needs to issue a bio for a cgroup, doing
|
||||
* so synchronously can lead to priority inversions as the kthread
|
||||
* can be trapped waiting for that cgroup. CGROUP_PUNT flag makes
|
||||
* submit_bio() punt the actual issuing to a dedicated per-blkcg
|
||||
* work item to avoid such priority inversions.
|
||||
*/
|
||||
__REQ_CGROUP_PUNT,
|
||||
|
||||
/* command specific flags for REQ_OP_WRITE_ZEROES: */
|
||||
__REQ_NOUNMAP, /* do not free blocks when zeroing */
|
||||
@@ -337,6 +345,8 @@ enum req_flag_bits {
|
||||
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
|
||||
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
|
||||
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
|
||||
#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
|
||||
|
||||
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
|
||||
#define REQ_HIPRI (1ULL << __REQ_HIPRI)
|
||||
|
||||
|
@@ -344,10 +344,15 @@ struct queue_limits {
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
|
||||
/*
|
||||
* Maximum number of zones to report with a single report zones command.
|
||||
*/
|
||||
#define BLK_ZONED_REPORT_MAX_ZONES 8192U
|
||||
|
||||
extern unsigned int blkdev_nr_zones(struct block_device *bdev);
|
||||
extern int blkdev_report_zones(struct block_device *bdev,
|
||||
sector_t sector, struct blk_zone *zones,
|
||||
unsigned int *nr_zones, gfp_t gfp_mask);
|
||||
unsigned int *nr_zones);
|
||||
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
|
||||
sector_t nr_sectors, gfp_t gfp_mask);
|
||||
extern int blk_revalidate_disk_zones(struct gendisk *disk);
|
||||
@@ -681,7 +686,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
|
||||
static inline sector_t blk_queue_zone_sectors(struct request_queue *q)
|
||||
{
|
||||
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
|
||||
}
|
||||
@@ -1418,7 +1423,7 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
|
||||
static inline sector_t bdev_zone_sectors(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
@@ -1673,8 +1678,7 @@ struct block_device_operations {
|
||||
/* this callback is with swap_lock and sometimes page table lock held */
|
||||
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
|
||||
int (*report_zones)(struct gendisk *, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask);
|
||||
struct blk_zone *zones, unsigned int *nr_zones);
|
||||
struct module *owner;
|
||||
const struct pr_ops *pr_ops;
|
||||
};
|
||||
|
@@ -699,6 +699,7 @@ void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
|
||||
struct cgroup_subsys_state;
|
||||
struct cgroup;
|
||||
|
||||
static inline void css_get(struct cgroup_subsys_state *css) {}
|
||||
static inline void css_put(struct cgroup_subsys_state *css) {}
|
||||
static inline int cgroup_attach_task_all(struct task_struct *from,
|
||||
struct task_struct *t) { return 0; }
|
||||
|
@@ -95,8 +95,7 @@ typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **
|
||||
|
||||
typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector,
|
||||
struct blk_zone *zones,
|
||||
unsigned int *nr_zones,
|
||||
gfp_t gfp_mask);
|
||||
unsigned int *nr_zones);
|
||||
|
||||
/*
|
||||
* These iteration functions are typically used to check (and combine)
|
||||
|
@@ -75,7 +75,7 @@ struct elevator_type
|
||||
size_t icq_size; /* see iocontext.h */
|
||||
size_t icq_align; /* ditto */
|
||||
struct elv_fs_entry *elevator_attrs;
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
const char *elevator_name;
|
||||
const char *elevator_alias;
|
||||
struct module *elevator_owner;
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
@@ -160,15 +160,6 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
|
||||
#define ELEVATOR_INSERT_FLUSH 5
|
||||
#define ELEVATOR_INSERT_SORT_MERGE 6
|
||||
|
||||
/*
|
||||
* return values from elevator_may_queue_fn
|
||||
*/
|
||||
enum {
|
||||
ELV_MQUEUE_MAY,
|
||||
ELV_MQUEUE_NO,
|
||||
ELV_MQUEUE_MUST,
|
||||
};
|
||||
|
||||
#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
|
||||
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
|
||||
|
||||
|
@@ -315,7 +315,7 @@ struct nvme_id_ns {
|
||||
__u8 nmic;
|
||||
__u8 rescap;
|
||||
__u8 fpi;
|
||||
__u8 rsvd33;
|
||||
__u8 dlfeat;
|
||||
__le16 nawun;
|
||||
__le16 nawupf;
|
||||
__le16 nacwu;
|
||||
@@ -324,11 +324,17 @@ struct nvme_id_ns {
|
||||
__le16 nabspf;
|
||||
__le16 noiob;
|
||||
__u8 nvmcap[16];
|
||||
__u8 rsvd64[28];
|
||||
__le16 npwg;
|
||||
__le16 npwa;
|
||||
__le16 npdg;
|
||||
__le16 npda;
|
||||
__le16 nows;
|
||||
__u8 rsvd74[18];
|
||||
__le32 anagrpid;
|
||||
__u8 rsvd96[3];
|
||||
__u8 nsattr;
|
||||
__u8 rsvd100[4];
|
||||
__le16 nvmsetid;
|
||||
__le16 endgid;
|
||||
__u8 nguid[16];
|
||||
__u8 eui64[8];
|
||||
struct nvme_lbaf lbaf[16];
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#include <linux/flex_proportions.h>
|
||||
#include <linux/backing-dev-defs.h>
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
struct bio;
|
||||
|
||||
@@ -68,6 +69,17 @@ struct writeback_control {
|
||||
unsigned for_reclaim:1; /* Invoked from the page allocator */
|
||||
unsigned range_cyclic:1; /* range_start is cyclic */
|
||||
unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
|
||||
|
||||
/*
|
||||
* When writeback IOs are bounced through async layers, only the
|
||||
* initial synchronous phase should be accounted towards inode
|
||||
* cgroup ownership arbitration to avoid confusion. Later stages
|
||||
* can set the following flag to disable the accounting.
|
||||
*/
|
||||
unsigned no_cgroup_owner:1;
|
||||
|
||||
unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
struct bdi_writeback *wb; /* wb this writeback is issued under */
|
||||
struct inode *inode; /* inode being written out */
|
||||
@@ -84,12 +96,27 @@ struct writeback_control {
|
||||
|
||||
static inline int wbc_to_write_flags(struct writeback_control *wbc)
|
||||
{
|
||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||
return REQ_SYNC;
|
||||
else if (wbc->for_kupdate || wbc->for_background)
|
||||
return REQ_BACKGROUND;
|
||||
int flags = 0;
|
||||
|
||||
return 0;
|
||||
if (wbc->punt_to_cgroup)
|
||||
flags = REQ_CGROUP_PUNT;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||
flags |= REQ_SYNC;
|
||||
else if (wbc->for_kupdate || wbc->for_background)
|
||||
flags |= REQ_BACKGROUND;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline struct cgroup_subsys_state *
|
||||
wbc_blkcg_css(struct writeback_control *wbc)
|
||||
{
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
if (wbc->wb)
|
||||
return wbc->wb->blkcg_css;
|
||||
#endif
|
||||
return blkcg_root_css;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -188,8 +215,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
|
||||
struct inode *inode)
|
||||
__releases(&inode->i_lock);
|
||||
void wbc_detach_inode(struct writeback_control *wbc);
|
||||
void wbc_account_io(struct writeback_control *wbc, struct page *page,
|
||||
size_t bytes);
|
||||
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
|
||||
size_t bytes);
|
||||
void cgroup_writeback_umount(void);
|
||||
|
||||
/**
|
||||
@@ -291,8 +318,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void wbc_account_io(struct writeback_control *wbc,
|
||||
struct page *page, size_t bytes)
|
||||
static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
|
||||
struct page *page, size_t bytes)
|
||||
{
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user