net: Introduce recvmmsg socket syscall

Meaning receive multiple messages, reducing the number of syscalls and
net stack entry/exit operations.

Next patches will introduce mechanisms where protocols that want to
optimize this operation will provide an unlocked_recvmsg operation.

This takes into account comments made by:

. Paul Moore: sock_recvmsg is called only for the first datagram,
  sock_recvmsg_nosec is used for the rest.

. Caitlin Bestler: recvmmsg now has a struct timespec timeout, that
  works in the same fashion as the ppoll one.

  If the underlying protocol returns a datagram with MSG_OOB set, this
  will make recvmmsg return right away with as many datagrams (+ the OOB
  one) it has received so far.

. Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen
  datagrams and then recvmsg returns an error, recvmmsg will return
  the successfully received datagrams, store the error and return it
  in the next call.

This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg,
where we will be able to acquire the lock only at batch start and end, not at
every underlying recvmsg call.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Arnaldo Carvalho de Melo
2009-10-12 23:40:10 -07:00
committed by David S. Miller
parent c05e85a06e
commit a2e2725541
25 changed files with 261 additions and 50 deletions

View File

@@ -727,10 +727,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
/* Argument list sizes for compat_sys_socketcall */
#define AL(x) ((x) * sizeof(u32))
static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
AL(4)};
AL(4),AL(5)};
#undef AL
asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -755,13 +755,36 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen);
}
asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags,
struct timespec __user *timeout)
{
int datagrams;
struct timespec ktspec;
struct compat_timespec __user *utspec =
(struct compat_timespec __user *)timeout;
if (get_user(ktspec.tv_sec, &utspec->tv_sec) ||
get_user(ktspec.tv_nsec, &utspec->tv_nsec))
return -EFAULT;
datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, &ktspec);
if (datagrams > 0 &&
(put_user(ktspec.tv_sec, &utspec->tv_sec) ||
put_user(ktspec.tv_nsec, &utspec->tv_nsec)))
datagrams = -EFAULT;
return datagrams;
}
asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
{
int ret;
u32 a[6];
u32 a0, a1;
if (call < SYS_SOCKET || call > SYS_ACCEPT4)
if (call < SYS_SOCKET || call > SYS_RECVMMSG)
return -EINVAL;
if (copy_from_user(a, args, nas[call]))
return -EFAULT;
@@ -823,6 +846,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
case SYS_RECVMSG:
ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break;
case SYS_RECVMMSG:
ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
compat_ptr(a[4]));
break;
case SYS_ACCEPT4:
ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
break;