rds: Enable RDS IPv6 support

This patch enables RDS to use IPv6 addresses. For RDS/TCP, the
listener is now an IPv6 endpoint which accepts both IPv4 and IPv6
connection requests.  RDS/RDMA/IB uses a private data (struct
rds_ib_connect_private) exchange between endpoints at RDS connection
establishment time to support RDMA. This private data exchange uses a
32 bit integer to represent an IP address. This needs to be changed in
order to support IPv6. A new private data struct
rds6_ib_connect_private is introduced to handle this. To ensure
backward compatibility, an IPv6 capable RDS stack uses another RDMA
listener port (RDS_CM_PORT) to accept IPv6 connection. And it
continues to use the original RDS_PORT for IPv4 RDS connections. When
it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to
send the connection set up request.

v5: Fixed syntax problem (David Miller).

v4: Changed port history comments in rds.h (Sowmini Varadhan).

v3: Added support to set up IPv4 connection using mapped address
    (David Miller).
    Added support to set up connection between link local and non-link
    addresses.
    Various review comments from Santosh Shilimkar and Sowmini Varadhan.

v2: Fixed bound and peer address scope mismatched issue.
    Added back rds_connect() IPv6 changes.

Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ka-Cheong Poon
2018-07-23 20:51:22 -07:00
committed by David S. Miller
parent eee2fa6ab3
commit 1e2b44e78e
14 changed files with 459 additions and 114 deletions

View File

@@ -142,15 +142,32 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
uaddr_len = sizeof(*sin6);
}
} else {
/* If socket is not yet bound, set the return address family
* to be AF_UNSPEC (value 0) and the address size to be that
* of an IPv4 address.
/* If socket is not yet bound and the socket is connected,
* set the return address family to be the same as the
* connected address, but with 0 address value. If it is not
* connected, set the family to be AF_UNSPEC (value 0) and
* the address size to be that of an IPv4 address.
*/
if (ipv6_addr_any(&rs->rs_bound_addr)) {
sin = (struct sockaddr_in *)uaddr;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_UNSPEC;
return sizeof(*sin);
if (ipv6_addr_any(&rs->rs_conn_addr)) {
sin = (struct sockaddr_in *)uaddr;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_UNSPEC;
return sizeof(*sin);
}
if (ipv6_addr_type(&rs->rs_conn_addr) &
IPV6_ADDR_MAPPED) {
sin = (struct sockaddr_in *)uaddr;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_INET;
return sizeof(*sin);
}
sin6 = (struct sockaddr_in6 *)uaddr;
memset(sin6, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
return sizeof(*sin6);
}
if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
sin = (struct sockaddr_in *)uaddr;
@@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
{
struct sock *sk = sock->sk;
struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
struct rds_sock *rs = rds_sk_to_rs(sk);
int addr_type;
int ret = 0;
lock_sock(sk);
switch (addr_len) {
case sizeof(struct sockaddr_in):
switch (uaddr->sa_family) {
case AF_INET:
sin = (struct sockaddr_in *)uaddr;
if (sin->sin_family != AF_INET) {
ret = -EAFNOSUPPORT;
if (addr_len < sizeof(struct sockaddr_in)) {
ret = -EINVAL;
break;
}
if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
@@ -509,12 +528,56 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
rs->rs_conn_port = sin->sin_port;
break;
case sizeof(struct sockaddr_in6):
ret = -EPROTONOSUPPORT;
case AF_INET6:
sin6 = (struct sockaddr_in6 *)uaddr;
if (addr_len < sizeof(struct sockaddr_in6)) {
ret = -EINVAL;
break;
}
addr_type = ipv6_addr_type(&sin6->sin6_addr);
if (!(addr_type & IPV6_ADDR_UNICAST)) {
__be32 addr4;
if (!(addr_type & IPV6_ADDR_MAPPED)) {
ret = -EPROTOTYPE;
break;
}
/* It is a mapped address. Need to do some sanity
* checks.
*/
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(addr4))) {
ret = -EPROTOTYPE;
break;
}
}
if (addr_type & IPV6_ADDR_LINKLOCAL) {
/* If socket is arleady bound to a link local address,
* the peer address must be on the same link.
*/
if (sin6->sin6_scope_id == 0 ||
(!ipv6_addr_any(&rs->rs_bound_addr) &&
rs->rs_bound_scope_id &&
sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
ret = -EINVAL;
break;
}
/* Remember the connected address scope ID. It will
* be checked against the binding local address when
* the socket is bound.
*/
rs->rs_bound_scope_id = sin6->sin6_scope_id;
}
rs->rs_conn_addr = sin6->sin6_addr;
rs->rs_conn_port = sin6->sin6_port;
break;
default:
ret = -EINVAL;
ret = -EAFNOSUPPORT;
break;
}