rds: Enable RDS IPv6 support
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the listener is now an IPv6 endpoint which accepts both IPv4 and IPv6 connection requests. RDS/RDMA/IB uses a private data (struct rds_ib_connect_private) exchange between endpoints at RDS connection establishment time to support RDMA. This private data exchange uses a 32 bit integer to represent an IP address. This needs to be changed in order to support IPv6. A new private data struct rds6_ib_connect_private is introduced to handle this. To ensure backward compatibility, an IPv6 capable RDS stack uses another RDMA listener port (RDS_CM_PORT) to accept IPv6 connection. And it continues to use the original RDS_PORT for IPv4 RDS connections. When it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to send the connection set up request. v5: Fixed syntax problem (David Miller). v4: Changed port history comments in rds.h (Sowmini Varadhan). v3: Added support to set up IPv4 connection using mapped address (David Miller). Added support to set up connection between link local and non-link addresses. Various review comments from Santosh Shilimkar and Sowmini Varadhan. v2: Fixed bound and peer address scope mismatched issue. Added back rds_connect() IPv6 changes. Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

parent
eee2fa6ab3
commit
1e2b44e78e
@@ -142,15 +142,32 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
|
||||
uaddr_len = sizeof(*sin6);
|
||||
}
|
||||
} else {
|
||||
/* If socket is not yet bound, set the return address family
|
||||
* to be AF_UNSPEC (value 0) and the address size to be that
|
||||
* of an IPv4 address.
|
||||
/* If socket is not yet bound and the socket is connected,
|
||||
* set the return address family to be the same as the
|
||||
* connected address, but with 0 address value. If it is not
|
||||
* connected, set the family to be AF_UNSPEC (value 0) and
|
||||
* the address size to be that of an IPv4 address.
|
||||
*/
|
||||
if (ipv6_addr_any(&rs->rs_bound_addr)) {
|
||||
sin = (struct sockaddr_in *)uaddr;
|
||||
memset(sin, 0, sizeof(*sin));
|
||||
sin->sin_family = AF_UNSPEC;
|
||||
return sizeof(*sin);
|
||||
if (ipv6_addr_any(&rs->rs_conn_addr)) {
|
||||
sin = (struct sockaddr_in *)uaddr;
|
||||
memset(sin, 0, sizeof(*sin));
|
||||
sin->sin_family = AF_UNSPEC;
|
||||
return sizeof(*sin);
|
||||
}
|
||||
|
||||
if (ipv6_addr_type(&rs->rs_conn_addr) &
|
||||
IPV6_ADDR_MAPPED) {
|
||||
sin = (struct sockaddr_in *)uaddr;
|
||||
memset(sin, 0, sizeof(*sin));
|
||||
sin->sin_family = AF_INET;
|
||||
return sizeof(*sin);
|
||||
}
|
||||
|
||||
sin6 = (struct sockaddr_in6 *)uaddr;
|
||||
memset(sin6, 0, sizeof(*sin6));
|
||||
sin6->sin6_family = AF_INET6;
|
||||
return sizeof(*sin6);
|
||||
}
|
||||
if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
|
||||
sin = (struct sockaddr_in *)uaddr;
|
||||
@@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct sockaddr_in *sin;
|
||||
struct sockaddr_in6 *sin6;
|
||||
struct rds_sock *rs = rds_sk_to_rs(sk);
|
||||
int addr_type;
|
||||
int ret = 0;
|
||||
|
||||
lock_sock(sk);
|
||||
|
||||
switch (addr_len) {
|
||||
case sizeof(struct sockaddr_in):
|
||||
switch (uaddr->sa_family) {
|
||||
case AF_INET:
|
||||
sin = (struct sockaddr_in *)uaddr;
|
||||
if (sin->sin_family != AF_INET) {
|
||||
ret = -EAFNOSUPPORT;
|
||||
if (addr_len < sizeof(struct sockaddr_in)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
|
||||
@@ -509,12 +528,56 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
|
||||
rs->rs_conn_port = sin->sin_port;
|
||||
break;
|
||||
|
||||
case sizeof(struct sockaddr_in6):
|
||||
ret = -EPROTONOSUPPORT;
|
||||
case AF_INET6:
|
||||
sin6 = (struct sockaddr_in6 *)uaddr;
|
||||
if (addr_len < sizeof(struct sockaddr_in6)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
addr_type = ipv6_addr_type(&sin6->sin6_addr);
|
||||
if (!(addr_type & IPV6_ADDR_UNICAST)) {
|
||||
__be32 addr4;
|
||||
|
||||
if (!(addr_type & IPV6_ADDR_MAPPED)) {
|
||||
ret = -EPROTOTYPE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* It is a mapped address. Need to do some sanity
|
||||
* checks.
|
||||
*/
|
||||
addr4 = sin6->sin6_addr.s6_addr32[3];
|
||||
if (addr4 == htonl(INADDR_ANY) ||
|
||||
addr4 == htonl(INADDR_BROADCAST) ||
|
||||
IN_MULTICAST(ntohl(addr4))) {
|
||||
ret = -EPROTOTYPE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (addr_type & IPV6_ADDR_LINKLOCAL) {
|
||||
/* If socket is arleady bound to a link local address,
|
||||
* the peer address must be on the same link.
|
||||
*/
|
||||
if (sin6->sin6_scope_id == 0 ||
|
||||
(!ipv6_addr_any(&rs->rs_bound_addr) &&
|
||||
rs->rs_bound_scope_id &&
|
||||
sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
/* Remember the connected address scope ID. It will
|
||||
* be checked against the binding local address when
|
||||
* the socket is bound.
|
||||
*/
|
||||
rs->rs_bound_scope_id = sin6->sin6_scope_id;
|
||||
}
|
||||
rs->rs_conn_addr = sin6->sin6_addr;
|
||||
rs->rs_conn_port = sin6->sin6_port;
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
ret = -EAFNOSUPPORT;
|
||||
break;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user