Created
August 17, 2016 02:43
-
-
Save pietern/09329d1040cc0454c3d1618495bf7362 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 19917b7bc38c4ffe6975c25ae76d31fb9f582dfd | |
Author: Pieter Noordhuis <[email protected]> | |
Date: Tue Aug 16 19:29:19 2016 -0700 | |
Fix IPv6 supporting in OOB and TCP BTL | |
diff --git a/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_endpoint.c b/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_endpoint.c | |
index c444390..7be03a7 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_endpoint.c | |
+++ b/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_endpoint.c | |
@@ -778,7 +778,7 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo | |
BTL_PEER_ERROR( btl_endpoint->endpoint_proc->proc_ompi, | |
( "Unable to connect to the peer %s on port %d: %s\n", | |
address, | |
- btl_endpoint->endpoint_addr->addr_port, strerror(opal_socket_errno) ) ); | |
+ ntohs(btl_endpoint->endpoint_addr->addr_port), strerror(opal_socket_errno) ) ); | |
} | |
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; | |
diff --git a/1.10.2/src/openmpi-1.10.2/opal/mca/if/linux_ipv6/if_linux_ipv6.c b/1.10.2/src/openmpi-1.10.2/opal/mca/if/linux_ipv6/if_linux_ipv6.c | |
index 5e25a96..2832371 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/opal/mca/if/linux_ipv6/if_linux_ipv6.c | |
+++ b/1.10.2/src/openmpi-1.10.2/opal/mca/if/linux_ipv6/if_linux_ipv6.c | |
@@ -118,8 +118,8 @@ static int if_linux_ipv6_open(void) | |
addrbyte[8], addrbyte[9], addrbyte[10], addrbyte[11], | |
addrbyte[12], addrbyte[13], addrbyte[14], addrbyte[15], scope); | |
- /* we don't want any other scope less than link-local */ | |
- if (scope < 0x20) { | |
+ /* Only interested in global (0x00) scope */ | |
+ if (scope != 0x00) { | |
opal_output_verbose(1, opal_if_base_framework.framework_output, | |
"skipping interface %2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x scope %x\n", | |
addrbyte[0], addrbyte[1], addrbyte[2], addrbyte[3], | |
diff --git a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp.c b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp.c | |
index 0a8b5d2..2691c8d 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp.c | |
+++ b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp.c | |
@@ -219,40 +219,45 @@ static void accept_connection(const int accepted_fd, | |
static int parse_uri(const uint16_t af_family, | |
const char* host, | |
const char *port, | |
- struct sockaddr* inaddr) | |
+ struct sockaddr_storage* inaddr) | |
{ | |
struct sockaddr_in *in; | |
#if OPAL_ENABLE_IPV6 | |
+ struct sockaddr_in6 *in6; | |
struct addrinfo hints, *res; | |
int ret; | |
#endif | |
if (AF_INET == af_family) { | |
- memset(inaddr, 0, sizeof(struct sockaddr_in)); | |
+ memset(inaddr, 0, sizeof(*inaddr)); | |
in = (struct sockaddr_in*) inaddr; | |
in->sin_family = AF_INET; | |
in->sin_addr.s_addr = inet_addr(host); | |
if (in->sin_addr.s_addr == INADDR_NONE) { | |
return ORTE_ERR_BAD_PARAM; | |
} | |
- ((struct sockaddr_in*) inaddr)->sin_port = htons(atoi(port)); | |
+ in->sin_port = htons(atoi(port)); | |
} | |
#if OPAL_ENABLE_IPV6 | |
else if (AF_INET6 == af_family) { | |
- size_t len; | |
- memset(inaddr, 0, sizeof(struct sockaddr_in6)); | |
+ memset(inaddr, 0, sizeof(*inaddr)); | |
memset(&hints, 0, sizeof(hints)); | |
hints.ai_family = af_family; | |
hints.ai_socktype = SOCK_STREAM; | |
ret = getaddrinfo(host, NULL, &hints, &res); | |
- | |
if (ret) { | |
opal_output (0, "oob_tcp_parse_uri: Could not resolve %s. [Error: %s]\n", | |
host, gai_strerror (ret)); | |
return ORTE_ERR_BAD_PARAM; | |
} | |
- len = (res->ai_addrlen < sizeof(struct sockaddr_in6)) ? res->ai_addrlen : sizeof(struct sockaddr_in6); | |
- memcpy(inaddr, res->ai_addr, len); | |
+ if (res->ai_addrlen < sizeof(struct sockaddr_in6)) { | |
+ opal_output (0, "oob_tcp_parse_uri: addrlen unexpected for %s (%d)\n", | |
+ host, res->ai_addrlen); | |
+ return ORTE_ERR_BAD_PARAM; | |
+ } | |
+ in6 = (struct sockaddr_in6*) inaddr; | |
+ memcpy(in6, res->ai_addr, res->ai_addrlen); | |
+ in6->sin6_port = htons(atoi(port)); | |
freeaddrinfo(res); | |
} | |
#endif | |
@@ -271,7 +276,7 @@ static int parse_uri(const uint16_t af_family, | |
static void process_set_peer(int fd, short args, void *cbdata) | |
{ | |
mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; | |
- struct sockaddr inaddr; | |
+ struct sockaddr_storage inaddr; | |
mca_oob_tcp_peer_t *peer; | |
int rc=ORTE_SUCCESS; | |
uint64_t *ui64 = (uint64_t*)(&pop->peer); | |
@@ -281,12 +286,6 @@ static void process_set_peer(int fd, short args, void *cbdata) | |
"%s:tcp:processing set_peer cmd", | |
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); | |
- if (AF_INET != pop->af_family) { | |
- opal_output_verbose(20, orte_oob_base_framework.framework_output, | |
- "%s NOT AF_INET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); | |
- goto cleanup; | |
- } | |
- | |
if (NULL == (peer = mca_oob_tcp_peer_lookup(&pop->peer))) { | |
peer = OBJ_NEW(mca_oob_tcp_peer_t); | |
peer->name.jobid = pop->peer.jobid; | |
@@ -301,7 +300,7 @@ static void process_set_peer(int fd, short args, void *cbdata) | |
} | |
} | |
- if ((rc = parse_uri(pop->af_family, pop->net, pop->port, (struct sockaddr*) &inaddr)) != ORTE_SUCCESS) { | |
+ if ((rc = parse_uri(pop->af_family, pop->net, pop->port, &inaddr)) != ORTE_SUCCESS) { | |
ORTE_ERROR_LOG(rc); | |
goto cleanup; | |
} | |
diff --git a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_connection.c b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_connection.c | |
index 34075b8..124dc9d 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_connection.c | |
+++ b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_connection.c | |
@@ -104,7 +104,6 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) | |
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), | |
ORTE_NAME_PRINT(&(peer->name))); | |
- addrlen = sizeof(struct sockaddr_in); | |
OPAL_LIST_FOREACH(addr, &peer->addrs, mca_oob_tcp_addr_t) { | |
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, | |
"%s orte_tcp_peer_try_connect: " | |
@@ -135,7 +134,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) | |
while (addr->retries < mca_oob_tcp_component.max_retries) { | |
addr->retries++; | |
/* Create the new socket */ | |
- peer->sd = socket(AF_INET, SOCK_STREAM, 0); | |
+ peer->sd = socket(addr->addr.ss_family, SOCK_STREAM, 0); | |
/* Set this fd to be close-on-exec so that any subsequent children don't see it */ | |
if (opal_fd_set_cloexec(peer->sd) != OPAL_SUCCESS) { | |
opal_output(0, "%s unable to set socket to CLOEXEC", | |
@@ -145,9 +144,13 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) | |
continue; | |
} | |
- if (connect(peer->sd, (struct sockaddr*)&addr->addr, addrlen) < 0) { | |
+ addrlen = addr->addr.ss_family == AF_INET6 | |
+ ? sizeof(struct sockaddr_in6) | |
+ : sizeof(struct sockaddr_in); | |
+ rc = connect(peer->sd, (struct sockaddr*) &addr->addr, addrlen); | |
+ if (rc < 0) { | |
if (opal_socket_errno == ETIMEDOUT) { | |
- /* The server may be too busy to accept new connections */ | |
+ /* The server may be too busy to accept new connections */ | |
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, | |
"%s timeout connecting to %s", | |
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), | |
@@ -161,7 +164,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) | |
* attempt, without even trying to establish the | |
* connection. Handle that case in a semi-rational | |
* way by trying twice before giving up */ | |
- if (ECONNABORTED == opal_socket_errno) { | |
+ if (ECONNABORTED == opal_socket_errno) { | |
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, | |
"%s connection to %s aborted by OS - retrying", | |
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), | |
@@ -169,6 +172,15 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) | |
CLOSE_THE_SOCKET(peer->sd); | |
continue; | |
} | |
+ if (rc < 0) { | |
+ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, | |
+ "%s connection to %s returned %d (%d, %s)", | |
+ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), | |
+ ORTE_NAME_PRINT(&peer->name), | |
+ rc, errno, strerror(errno)); | |
+ CLOSE_THE_SOCKET(peer->sd); | |
+ continue; | |
+ } | |
/* connection succeeded */ | |
addr->retries = 0; | |
goto connected; | |
diff --git a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_listener.c b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_listener.c | |
index 1f218cb..4e25ab9 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_listener.c | |
+++ b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_listener.c | |
@@ -271,8 +271,9 @@ static int create_listen(void) | |
if (NULL == ports) { | |
return ORTE_ERROR; | |
} | |
- | |
+ | |
/* get the address info for this interface */ | |
+ memset(&inaddr, 0, sizeof(inaddr)); | |
((struct sockaddr_in*) &inaddr)->sin_family = AF_INET; | |
((struct sockaddr_in*) &inaddr)->sin_addr.s_addr = INADDR_ANY; | |
addrlen = sizeof(struct sockaddr_in); | |
@@ -511,8 +512,9 @@ static int create_listen6(void) | |
if (NULL == ports) { | |
return ORTE_ERROR; | |
} | |
- | |
+ | |
/* get the address info for this interface */ | |
+ memset(&inaddr, 0, sizeof(inaddr)); | |
((struct sockaddr_in6*) &inaddr)->sin6_family = AF_INET6; | |
((struct sockaddr_in6*) &inaddr)->sin6_addr = in6addr_any; | |
addrlen = sizeof(struct sockaddr_in6); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment