Merge tag 'for-linus-20190715' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe:
 "A later pull request with some followup items. I had some vacation
  coming up to the merge window, so certain things items were delayed a
  bit. This pull request also contains fixes that came in within the
  last few days of the merge window, which I didn't want to push right
  before sending you a pull request.

  This contains:

   - NVMe pull request, mostly fixes, but also a few minor items on the
     feature side that were timing constrained (Christoph et al)

   - Report zones fixes (Damien)

   - Removal of dead code (Damien)

   - Turn on cgroup psi memstall (Josef)

   - block cgroup MAINTAINERS entry (Konstantin)

   - Flush init fix (Josef)

   - blk-throttle low iops timing fix (Konstantin)

   - nbd resize fixes (Mike)

   - nbd 0 blocksize crash fix (Xiubo)

   - block integrity error leak fix (Wenwen)

   - blk-cgroup writeback and priority inheritance fixes (Tejun)"

* tag 'for-linus-20190715' of git://git.kernel.dk/linux-block: (42 commits)
  MAINTAINERS: add entry for block io cgroup
  null_blk: fixup ->report_zones() for !CONFIG_BLK_DEV_ZONED
  block: Limit zone array allocation size
  sd_zbc: Fix report zones buffer allocation
  block: Kill gfp_t argument of blkdev_report_zones()
  block: Allow mapping of vmalloc-ed buffers
  block/bio-integrity: fix a memory leak bug
  nvme: fix NULL deref for fabrics options
  nbd: add netlink reconfigure resize support
  nbd: fix crash when the blksize is zero
  block: Disable write plugging for zoned block devices
  block: Fix elevator name declaration
  block: Remove unused definitions
  nvme: fix regression upon hot device removal and insertion
  blk-throttle: fix zero wait time for iops throttled group
  block: Fix potential overflow in blk_report_zones()
  blkcg: implement REQ_CGROUP_PUNT
  blkcg, writeback: Implement wbc_blkcg_css()
  blkcg, writeback: Add wbc->no_cgroup_owner
  blkcg, writeback: Rename wbc_account_io() to wbc_account_cgroup_owner()
  ...
This commit is contained in:
Linus Torvalds
2019-07-15 21:20:52 -07:00
50 changed files with 661 additions and 211 deletions

View File

@@ -48,6 +48,7 @@ extern spinlock_t bdi_lock;
extern struct list_head bdi_list;
extern struct workqueue_struct *bdi_wq;
extern struct workqueue_struct *bdi_async_bio_wq;
static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
{

View File

@@ -132,13 +132,17 @@ struct blkcg_gq {
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
struct rcu_head rcu_head;
spinlock_t async_bio_lock;
struct bio_list async_bios;
struct work_struct async_bio_work;
atomic_t use_delay;
atomic64_t delay_nsec;
atomic64_t delay_start;
u64 last_delay;
int last_use;
struct rcu_head rcu_head;
};
typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
@@ -701,6 +705,15 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
struct bio *bio) { return false; }
#endif
bool __blkcg_punt_bio_submit(struct bio *bio);
static inline bool blkcg_punt_bio_submit(struct bio *bio)
{
if (bio->bi_opf & REQ_CGROUP_PUNT)
return __blkcg_punt_bio_submit(bio);
else
return false;
}
static inline void blkcg_bio_issue_init(struct bio *bio)
{
@@ -848,6 +861,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio) { return true; }

View File

@@ -311,6 +311,14 @@ enum req_flag_bits {
__REQ_RAHEAD, /* read ahead, can fail anytime */
__REQ_BACKGROUND, /* background IO */
__REQ_NOWAIT, /* Don't wait if request will block */
/*
* When a shared kthread needs to issue a bio for a cgroup, doing
* so synchronously can lead to priority inversions as the kthread
* can be trapped waiting for that cgroup. CGROUP_PUNT flag makes
* submit_bio() punt the actual issuing to a dedicated per-blkcg
* work item to avoid such priority inversions.
*/
__REQ_CGROUP_PUNT,
/* command specific flags for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
@@ -337,6 +345,8 @@ enum req_flag_bits {
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
#define REQ_HIPRI (1ULL << __REQ_HIPRI)

View File

@@ -344,10 +344,15 @@ struct queue_limits {
#ifdef CONFIG_BLK_DEV_ZONED
/*
* Maximum number of zones to report with a single report zones command.
*/
#define BLK_ZONED_REPORT_MAX_ZONES 8192U
extern unsigned int blkdev_nr_zones(struct block_device *bdev);
extern int blkdev_report_zones(struct block_device *bdev,
sector_t sector, struct blk_zone *zones,
unsigned int *nr_zones, gfp_t gfp_mask);
unsigned int *nr_zones);
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
sector_t nr_sectors, gfp_t gfp_mask);
extern int blk_revalidate_disk_zones(struct gendisk *disk);
@@ -681,7 +686,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
}
}
static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
static inline sector_t blk_queue_zone_sectors(struct request_queue *q)
{
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
}
@@ -1418,7 +1423,7 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
return false;
}
static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
static inline sector_t bdev_zone_sectors(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -1673,8 +1678,7 @@ struct block_device_operations {
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
struct blk_zone *zones, unsigned int *nr_zones,
gfp_t gfp_mask);
struct blk_zone *zones, unsigned int *nr_zones);
struct module *owner;
const struct pr_ops *pr_ops;
};

View File

@@ -699,6 +699,7 @@ void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
struct cgroup_subsys_state;
struct cgroup;
static inline void css_get(struct cgroup_subsys_state *css) {}
static inline void css_put(struct cgroup_subsys_state *css) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
struct task_struct *t) { return 0; }

View File

@@ -95,8 +95,7 @@ typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **
typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector,
struct blk_zone *zones,
unsigned int *nr_zones,
gfp_t gfp_mask);
unsigned int *nr_zones);
/*
* These iteration functions are typically used to check (and combine)

View File

@@ -75,7 +75,7 @@ struct elevator_type
size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX];
const char *elevator_name;
const char *elevator_alias;
struct module *elevator_owner;
#ifdef CONFIG_BLK_DEBUG_FS
@@ -160,15 +160,6 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
#define ELEVATOR_INSERT_FLUSH 5
#define ELEVATOR_INSERT_SORT_MERGE 6
/*
* return values from elevator_may_queue_fn
*/
enum {
ELV_MQUEUE_MAY,
ELV_MQUEUE_NO,
ELV_MQUEUE_MUST,
};
#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)

View File

@@ -315,7 +315,7 @@ struct nvme_id_ns {
__u8 nmic;
__u8 rescap;
__u8 fpi;
__u8 rsvd33;
__u8 dlfeat;
__le16 nawun;
__le16 nawupf;
__le16 nacwu;
@@ -324,11 +324,17 @@ struct nvme_id_ns {
__le16 nabspf;
__le16 noiob;
__u8 nvmcap[16];
__u8 rsvd64[28];
__le16 npwg;
__le16 npwa;
__le16 npdg;
__le16 npda;
__le16 nows;
__u8 rsvd74[18];
__le32 anagrpid;
__u8 rsvd96[3];
__u8 nsattr;
__u8 rsvd100[4];
__le16 nvmsetid;
__le16 endgid;
__u8 nguid[16];
__u8 eui64[8];
struct nvme_lbaf lbaf[16];

View File

@@ -11,6 +11,7 @@
#include <linux/flex_proportions.h>
#include <linux/backing-dev-defs.h>
#include <linux/blk_types.h>
#include <linux/blk-cgroup.h>
struct bio;
@@ -68,6 +69,17 @@ struct writeback_control {
unsigned for_reclaim:1; /* Invoked from the page allocator */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
/*
* When writeback IOs are bounced through async layers, only the
* initial synchronous phase should be accounted towards inode
* cgroup ownership arbitration to avoid confusion. Later stages
* can set the following flag to disable the accounting.
*/
unsigned no_cgroup_owner:1;
unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */
#ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *wb; /* wb this writeback is issued under */
struct inode *inode; /* inode being written out */
@@ -84,12 +96,27 @@ struct writeback_control {
static inline int wbc_to_write_flags(struct writeback_control *wbc)
{
if (wbc->sync_mode == WB_SYNC_ALL)
return REQ_SYNC;
else if (wbc->for_kupdate || wbc->for_background)
return REQ_BACKGROUND;
int flags = 0;
return 0;
if (wbc->punt_to_cgroup)
flags = REQ_CGROUP_PUNT;
if (wbc->sync_mode == WB_SYNC_ALL)
flags |= REQ_SYNC;
else if (wbc->for_kupdate || wbc->for_background)
flags |= REQ_BACKGROUND;
return flags;
}
static inline struct cgroup_subsys_state *
wbc_blkcg_css(struct writeback_control *wbc)
{
#ifdef CONFIG_CGROUP_WRITEBACK
if (wbc->wb)
return wbc->wb->blkcg_css;
#endif
return blkcg_root_css;
}
/*
@@ -188,8 +215,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
struct inode *inode)
__releases(&inode->i_lock);
void wbc_detach_inode(struct writeback_control *wbc);
void wbc_account_io(struct writeback_control *wbc, struct page *page,
size_t bytes);
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
size_t bytes);
void cgroup_writeback_umount(void);
/**
@@ -291,8 +318,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
{
}
static inline void wbc_account_io(struct writeback_control *wbc,
struct page *page, size_t bytes)
static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
struct page *page, size_t bytes)
{
}