Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL

The poll() changes were not well thought out, and completely
unexplained.  They also caused a huge performance regression, because
"->poll()" was no longer a trivial file operation that just called down
to the underlying file operations, but instead did at least two indirect
calls.

Indirect calls are sadly slow now with the Spectre mitigation, but the
performance problem could at least be largely mitigated by changing the
"->get_poll_head()" operation to just have a per-file-descriptor pointer
to the poll head instead.  That gets rid of one of the new indirections.

But that doesn't fix the new complexity that is completely unwarranted
for the regular case.  The (undocumented) reason for the poll() changes
was some alleged AIO poll race fixing, but we don't make the common case
slower and more complex for some uncommon special case, so this all
really needs way more explanations and most likely a fundamental
redesign.

[ This revert is a revert of about 30 different commits, not reverted
  individually because that would just be unnecessarily messy  - Linus ]

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds
2018-06-28 09:43:44 -07:00
parent f57494321c
commit a11e1d432b
80 changed files with 302 additions and 451 deletions

View File

@@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
.poll_mask = tcp_poll_mask,
.poll = tcp_poll,
.ioctl = inet_ioctl,
.listen = inet_listen,
.shutdown = inet_shutdown,
@@ -1021,7 +1021,7 @@ const struct proto_ops inet_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
.poll_mask = udp_poll_mask,
.poll = udp_poll,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
@@ -1042,7 +1042,7 @@ EXPORT_SYMBOL(inet_dgram_ops);
/*
* For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
* udp_poll_mask
* udp_poll
*/
static const struct proto_ops inet_sockraw_ops = {
.family = PF_INET,
@@ -1053,7 +1053,7 @@ static const struct proto_ops inet_sockraw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
.poll_mask = datagram_poll_mask,
.poll = datagram_poll,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,

View File

@@ -494,21 +494,32 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
}
/*
* Socket is not locked. We are protected from async events by poll logic and
* correct handling of state changes made by other threads is impossible in
* any case.
* Wait for a TCP event.
*
* Note that we don't need to lock the socket, as the upper poll layers
* take care of normal races (between the test and the event) and we don't
* go look at any of the socket buffers directly.
*/
__poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
__poll_t mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
__poll_t mask = 0;
int state;
sock_poll_wait(file, sk_sleep(sk), wait);
state = inet_sk_state_load(sk);
if (state == TCP_LISTEN)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
* by poll logic and correct handling of state changes
* made by other threads is impossible in any case.
*/
mask = 0;
/*
* EPOLLHUP is certainly not done right. But poll() doesn't
* have a notion of HUP in just one direction, and for a
@@ -589,7 +600,7 @@ __poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
return mask;
}
EXPORT_SYMBOL(tcp_poll_mask);
EXPORT_SYMBOL(tcp_poll);
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{

View File

@@ -2591,7 +2591,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* udp_poll - wait for a UDP event.
* @file - file struct
* @sock - socket
* @events - events to wait for
* @wait - poll table
*
* This is same as datagram poll, except for the special case of
* blocking sockets. If application is using a blocking fd
@@ -2600,23 +2600,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* but then block when reading it. Add special case code
* to work around these arguably broken applications.
*/
__poll_t udp_poll_mask(struct socket *sock, __poll_t events)
__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
__poll_t mask = datagram_poll_mask(sock, events);
__poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Check for false positives due to checksum errors */
if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) &&
if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
mask &= ~(EPOLLIN | EPOLLRDNORM);
return mask;
}
EXPORT_SYMBOL(udp_poll_mask);
EXPORT_SYMBOL(udp_poll);
int udp_abort(struct sock *sk, int err)
{