Merge tag 'for-5.7/io_uring-2020-03-29' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "Here are the io_uring changes for this merge window. Light on new
  features this time around (just splice + buffer selection), lots of
  cleanups, fixes, and improvements to existing support. In particular,
  this contains:

   - Cleanup fixed file update handling for stack fallback (Hillf)

   - Re-work of how pollable async IO is handled, we no longer require
     thread offload to handle that. Instead we rely using poll to drive
     this, with task_work execution.

   - In conjunction with the above, allow expendable buffer selection,
     so that poll+recv (for example) no longer has to be a split
     operation.

   - Make sure we honor RLIMIT_FSIZE for buffered writes

   - Add support for splice (Pavel)

   - Linked work inheritance fixes and optimizations (Pavel)

   - Async work fixes and cleanups (Pavel)

   - Improve io-wq locking (Pavel)

   - Hashed link write improvements (Pavel)

   - SETUP_IOPOLL|SETUP_SQPOLL improvements (Xiaoguang)"

* tag 'for-5.7/io_uring-2020-03-29' of git://git.kernel.dk/linux-block: (54 commits)
  io_uring: cleanup io_alloc_async_ctx()
  io_uring: fix missing 'return' in comment
  io-wq: handle hashed writes in chains
  io-uring: drop 'free_pfile' in struct io_file_put
  io-uring: drop completion when removing file
  io_uring: Fix ->data corruption on re-enqueue
  io-wq: close cancel gap for hashed linked work
  io_uring: make spdxcheck.py happy
  io_uring: honor original task RLIMIT_FSIZE
  io-wq: hash dependent work
  io-wq: split hashing and enqueueing
  io-wq: don't resched if there is no work
  io-wq: remove duplicated cancel code
  io_uring: fix truncated async read/readv and write/writev retry
  io_uring: dual license io_uring.h uapi header
  io_uring: io_uring_enter(2) don't poll while SETUP_IOPOLL|SETUP_SQPOLL enabled
  io_uring: Fix unused function warnings
  io_uring: add end-of-bits marker and build time verify it
  io_uring: provide means of removing buffers
  io_uring: add IOSQE_BUFFER_SELECT support for IORING_OP_RECVMSG
  ...
This commit is contained in:
Linus Torvalds
2020-03-30 12:18:49 -07:00
12 changed files with 1834 additions and 864 deletions

View File

@@ -1,4 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
/*
* Header file for the io_uring interface.
*
@@ -23,7 +23,10 @@ struct io_uring_sqe {
__u64 off; /* offset into file */
__u64 addr2;
};
__u64 addr; /* pointer to buffer or iovecs */
union {
__u64 addr; /* pointer to buffer or iovecs */
__u64 splice_off_in;
};
__u32 len; /* buffer size or number of iovecs */
union {
__kernel_rwf_t rw_flags;
@@ -37,14 +40,21 @@ struct io_uring_sqe {
__u32 open_flags;
__u32 statx_flags;
__u32 fadvise_advice;
__u32 splice_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
struct {
/* index into fixed buffers, if used */
__u16 buf_index;
/* pack this to avoid bogus arm OABI complaints */
union {
/* index into fixed buffers, if used */
__u16 buf_index;
/* for grouped buffer selection */
__u16 buf_group;
} __attribute__((packed));
/* personality to use, if used */
__u16 personality;
__s32 splice_fd_in;
};
__u64 __pad2[3];
};
@@ -56,6 +66,7 @@ enum {
IOSQE_IO_LINK_BIT,
IOSQE_IO_HARDLINK_BIT,
IOSQE_ASYNC_BIT,
IOSQE_BUFFER_SELECT_BIT,
};
/*
@@ -71,6 +82,8 @@ enum {
#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
/* always go async */
#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
/* select buffer from sqe->buf_group */
#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
/*
* io_uring_setup() flags
@@ -113,6 +126,9 @@ enum {
IORING_OP_RECV,
IORING_OP_OPENAT2,
IORING_OP_EPOLL_CTL,
IORING_OP_SPLICE,
IORING_OP_PROVIDE_BUFFERS,
IORING_OP_REMOVE_BUFFERS,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -128,6 +144,12 @@ enum {
*/
#define IORING_TIMEOUT_ABS (1U << 0)
/*
* sqe->splice_flags
* extends splice(2) flags
*/
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
/*
* IO completion data structure (Completion Queue Entry)
*/
@@ -137,6 +159,17 @@ struct io_uring_cqe {
__u32 flags;
};
/*
* cqe->flags
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
*/
#define IORING_CQE_F_BUFFER (1U << 0)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
};
/*
* Magic offsets for the application to mmap the data it needs
*/
@@ -204,6 +237,7 @@ struct io_uring_params {
#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
#define IORING_FEAT_RW_CUR_POS (1U << 3)
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
#define IORING_FEAT_FAST_POLL (1U << 5)
/*
* io_uring_register(2) opcodes and arguments