Created
August 17, 2016 02:48
-
-
Save pietern/21c6765849be89f862d93050df7acded to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 8d1f0719776e92837ef3ab3f2b895f057e4a9c36 | |
Author: Pieter Noordhuis <[email protected]> | |
Date: Tue Aug 16 19:31:14 2016 -0700 | |
Support IPv6 link-local addresses | |
Link-local addresses use a single prefix (fe:80) so a routing table | |
doesn't help figuring out which interface to transmit it on. The Linux | |
IPv6 implementation asks application developers to populate the | |
`sin6_scope_id` field on the `sockaddr_in6` struct with the index of the | |
interface to communicate on. | |
diff --git a/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp.h b/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp.h | |
index 5279c09..ae2c5c6 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp.h | |
+++ b/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp.h | |
@@ -78,8 +78,10 @@ struct mca_btl_tcp_component_t { | |
opal_event_t tcp6_recv_event; /**< recv event for IPv6 listen socket */ | |
int tcp6_listen_sd; /**< IPv6 listen socket for incoming connection requests */ | |
unsigned short tcp6_listen_port; /**< IPv6 listen port */ | |
- int tcp6_port_min; /**< IPv4 minimum port */ | |
- int tcp6_port_range; /**< IPv4 port range */ | |
+ int tcp6_port_min; /**< IPv6 minimum port */ | |
+ int tcp6_port_range; /**< IPv6 port range */ | |
+ bool tcp6_use_link_local; /**< Enable use of IPv6 link-local addresses */ | |
+ int tcp6_link_local_scope_id; /**< Kernel index of interface for link-local traffic */ | |
#endif | |
/* Port range restriction */ | |
diff --git a/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_component.c b/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_component.c | |
index 59a3a48..8bbbb96 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_component.c | |
+++ b/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_component.c | |
@@ -160,6 +160,20 @@ static inline unsigned int mca_btl_tcp_param_register_uint( | |
return *storage; | |
} | |
+static inline unsigned int mca_btl_tcp_param_register_bool( | |
+ const char* param_name, | |
+ const char* help_string, | |
+ bool default_value, | |
+ int level, | |
+ bool *storage) | |
+{ | |
+ *storage = default_value; | |
+ (void) mca_base_component_var_register(&mca_btl_tcp_component.super.btl_version, | |
+ param_name, help_string, MCA_BASE_VAR_TYPE_BOOL, | |
+ NULL, 0, 0, level, | |
+ MCA_BASE_VAR_SCOPE_READONLY, storage); | |
+ return *storage; | |
+} | |
/* | |
* Data structure for accepting connections. | |
@@ -254,6 +268,10 @@ static int mca_btl_tcp_component_register(void) | |
(0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1, | |
OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp6_port_range ); | |
free(message); | |
+ | |
+ mca_btl_tcp_param_register_bool("enable_ipv6_link_local", | |
+ "Whether to enable use of IPv6 link-local addresses (default: false)", false, | |
+ OPAL_INFO_LVL_2, &mca_btl_tcp_component.tcp6_use_link_local); | |
#endif | |
mca_btl_tcp_component.report_all_unfound_interfaces = false; | |
@@ -677,7 +695,7 @@ static int mca_btl_tcp_component_create_instances(void) | |
and therefore we're done. */ | |
if (mca_btl_tcp_component.tcp_num_btls > 0) { | |
ret = OMPI_SUCCESS; | |
- goto cleanup; | |
+ goto check; | |
} | |
/* if the interface list was not specified by the user, create | |
@@ -709,7 +727,91 @@ static int mca_btl_tcp_component_create_instances(void) | |
} | |
opal_argv_free(exclude); | |
- cleanup: | |
+ check: | |
+#if OPAL_ENABLE_IPV6 | |
+ /* If using IPv6 link-local addresses is OK, we need to verify | |
+ * we have only a single BTL instance with a link-local address, | |
+ * or they will be ambiguous. */ | |
+ if (!mca_btl_tcp_component.tcp6_use_link_local) { | |
+ goto cleanup; | |
+ } | |
+ | |
+ { | |
+ int link_local_ifkindex = -1; | |
+ unsigned int btl_index; | |
+ | |
+ for (btl_index = 0; | |
+ btl_index < mca_btl_tcp_component.tcp_num_btls; | |
+ btl_index++) { | |
+ for (if_index = opal_ifbegin(); | |
+ if_index >= 0; | |
+ if_index = opal_ifnext(if_index)) { | |
+ /* IF_NAMESIZE is defined in opal/util/if.h */ | |
+ char if_name[IF_NAMESIZE]; | |
+ struct sockaddr_in6 ss; | |
+ | |
+ if (opal_ifindextokindex(if_index) != | |
+ mca_btl_tcp_component.tcp_btls[btl_index]->tcp_ifkindex) { | |
+ continue; | |
+ } | |
+ | |
+ ret = opal_ifindextoaddr(if_index, | |
+ (struct sockaddr*) &ss, | |
+ sizeof(ss)); | |
+ if (ret != OPAL_SUCCESS) { | |
+ opal_output (0, | |
+ "btl_tcp_component: " | |
+ "unable to get address for " | |
+ "index %i (kernel index %i)", | |
+ if_index, | |
+ opal_ifindextokindex(if_index)); | |
+ goto cleanup; | |
+ } | |
+ | |
+ /* Ignore non-IPv6 addresses */ | |
+ if (ss.sin6_family != AF_INET6) { | |
+ continue; | |
+ } | |
+ | |
+ /* Ignore addresses other than link-local */ | |
+ if (ss.sin6_scope_id != 0x20) { | |
+ continue; | |
+ } | |
+ | |
+ /* | |
+ * Error if there are multiple interfaces with | |
+ * a link-local address (they will be ambiguous). | |
+ */ | |
+ if (link_local_ifkindex >= 0) { | |
+ opal_output (0, | |
+ "btl_tcp_component: " | |
+ "multiple link-local addresses found"); | |
+ ret = OMPI_ERROR; | |
+ goto cleanup; | |
+ } | |
+ | |
+ link_local_ifkindex = opal_ifindextokindex(if_index); | |
+ opal_ifindextoname(if_index, if_name, sizeof(if_name)); | |
+ opal_output (0, | |
+ "btl_tcp_component: " | |
+ "using %s for link-local traffic", | |
+ if_name); | |
+ } | |
+ } | |
+ | |
+ if (link_local_ifkindex < 0) { | |
+ opal_output (0, | |
+ "btl_tcp_component: " | |
+ "no link-local addresses found"); | |
+ ret = OMPI_ERROR; | |
+ goto cleanup; | |
+ } | |
+ | |
+ mca_btl_tcp_component.tcp6_link_local_scope_id = link_local_ifkindex; | |
+ } | |
+#endif | |
+ | |
+cleanup: | |
if (NULL != kindexes) { | |
free(kindexes); | |
} | |
diff --git a/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_proc.c b/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_proc.c | |
index 89aee88..21e4e50 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_proc.c | |
+++ b/1.10.2/src/openmpi-1.10.2/ompi/mca/btl/tcp/btl_tcp_proc.c | |
@@ -798,6 +798,18 @@ bool mca_btl_tcp_proc_tosocks(mca_btl_tcp_addr_t* proc_addr, | |
inaddr->sin6_port = proc_addr->addr_port; | |
inaddr->sin6_scope_id = 0; | |
inaddr->sin6_flowinfo = 0; | |
+ | |
+ /* | |
+ * If this is a link-local address AND the component is configured | |
+ * to allow link-local addresses for BTL traffic, set the | |
+ * scope_id so the kernel passes it to the right network interface. | |
+ */ | |
+ if ((inaddr->sin6_addr.s6_addr[0] & 0xff) == 0xfe && | |
+ (inaddr->sin6_addr.s6_addr[1] & 0xc0) == 0x80 && | |
+ mca_btl_tcp_component.tcp6_use_link_local) { | |
+ inaddr->sin6_scope_id = | |
+ mca_btl_tcp_component.tcp6_link_local_scope_id; | |
+ } | |
} | |
break; | |
#endif | |
diff --git a/1.10.2/src/openmpi-1.10.2/opal/mca/if/linux_ipv6/if_linux_ipv6.c b/1.10.2/src/openmpi-1.10.2/opal/mca/if/linux_ipv6/if_linux_ipv6.c | |
index 2832371..ffa0b8e 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/opal/mca/if/linux_ipv6/if_linux_ipv6.c | |
+++ b/1.10.2/src/openmpi-1.10.2/opal/mca/if/linux_ipv6/if_linux_ipv6.c | |
@@ -118,8 +118,8 @@ static int if_linux_ipv6_open(void) | |
addrbyte[8], addrbyte[9], addrbyte[10], addrbyte[11], | |
addrbyte[12], addrbyte[13], addrbyte[14], addrbyte[15], scope); | |
- /* Only interested in global (0x00) scope */ | |
- if (scope != 0x00) { | |
+ /* Only interested in global (0x00) and link-local (0x20) scope */ | |
+ if (scope != 0x00 && scope != 0x20) { | |
opal_output_verbose(1, opal_if_base_framework.framework_output, | |
"skipping interface %2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x scope %x\n", | |
addrbyte[0], addrbyte[1], addrbyte[2], addrbyte[3], | |
diff --git a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_component.c b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_component.c | |
index 498a42d..5b453ca 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_component.c | |
+++ b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_component.c | |
@@ -438,6 +438,17 @@ static int tcp_component_register(void) | |
OPAL_INFO_LVL_6, | |
MCA_BASE_VAR_SCOPE_READONLY, | |
&mca_oob_tcp_component.skip_version_check); | |
+ | |
+#if OPAL_ENABLE_IPV6 | |
+ mca_oob_tcp_component.tcp6_use_link_local = false; | |
+ mca_oob_tcp_component.tcp6_link_local_scope_id = -1; | |
+ (void)mca_base_component_var_register(component, "enable_ipv6_link_local", | |
+ "Whether to enable use of IPv6 link-local addresses (default: false)", | |
+ MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, | |
+ OPAL_INFO_LVL_2, | |
+ MCA_BASE_VAR_SCOPE_READONLY, | |
+ &mca_oob_tcp_component.tcp6_use_link_local); | |
+#endif | |
return ORTE_SUCCESS; | |
} | |
@@ -566,6 +577,12 @@ static bool component_available(void) | |
opal_argv_append_nosize(&mca_oob_tcp_component.ipv4conns, opal_net_get_hostname((struct sockaddr*) &my_ss)); | |
} else if (AF_INET6 == my_ss.ss_family) { | |
#if OPAL_ENABLE_IPV6 | |
+ /* If this address has link local scope, capture its interface index */ | |
+ if (mca_oob_tcp_component.tcp6_use_link_local && | |
+ ((struct sockaddr_in6*) &my_ss)->sin6_scope_id == 0x20) { | |
+ mca_oob_tcp_component.tcp6_link_local_scope_id = kindex; | |
+ } | |
+ | |
opal_output_verbose(10, orte_oob_base_framework.framework_output, | |
"%s oob:tcp:init adding %s to our list of %s connections", | |
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), | |
diff --git a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_component.h b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_component.h | |
index d8d47a2..03182b4 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_component.h | |
+++ b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_component.h | |
@@ -62,9 +62,11 @@ typedef struct { | |
#if OPAL_ENABLE_IPV6 | |
/* IPv6 support */ | |
- bool disable_ipv6_family; /**< disable this AF */ | |
- char** tcp6_static_ports; /**< Static ports - IPV6 */ | |
- char** tcp6_dyn_ports; /**< Dynamic ports - IPV6 */ | |
+ bool disable_ipv6_family; /**< disable this AF */ | |
+ char** tcp6_static_ports; /**< Static ports - IPV6 */ | |
+ char** tcp6_dyn_ports; /**< Dynamic ports - IPV6 */ | |
+ bool tcp6_use_link_local; /**< Enable use of IPv6 link-local addresses */ | |
+ int tcp6_link_local_scope_id; /**< Kernel index of interface for link-local traffic */ | |
char** ipv6conns; | |
char** ipv6ports; | |
#endif | |
diff --git a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_connection.c b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_connection.c | |
index 124dc9d..71565ee 100644 | |
--- a/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_connection.c | |
+++ b/1.10.2/src/openmpi-1.10.2/orte/mca/oob/tcp/oob_tcp_connection.c | |
@@ -144,6 +144,19 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) | |
continue; | |
} | |
+#if OPAL_ENABLE_IPV6 | |
+ if (addr->addr.ss_family == AF_INET6) { | |
+ struct sockaddr_in6* inaddr = (struct sockaddr_in6*) &addr->addr; | |
+ | |
+ if ((inaddr->sin6_addr.s6_addr[0] & 0xff) == 0xfe && | |
+ (inaddr->sin6_addr.s6_addr[1] & 0xc0) == 0x80 && | |
+ mca_oob_tcp_component.tcp6_use_link_local) { | |
+ inaddr->sin6_scope_id = | |
+ mca_oob_tcp_component.tcp6_link_local_scope_id; | |
+ } | |
+ } | |
+#endif | |
+ | |
addrlen = addr->addr.ss_family == AF_INET6 | |
? sizeof(struct sockaddr_in6) | |
: sizeof(struct sockaddr_in); | |
@@ -172,6 +185,25 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) | |
CLOSE_THE_SOCKET(peer->sd); | |
continue; | |
} | |
+ /* When testing use of IPv6 link-local addresses, the 4.0 | |
+ * kernel would immediately return EADDRNOTAVAIL when | |
+ * connecting to a link-local address on the same host. | |
+ * This appears to be a transient problem that only | |
+ * manifests for a short period of time after calling | |
+ * listen(2) on the server side of the socket. Therefore, | |
+ * inserting a small delay on the client side fixes the | |
+ * problem. Since establishing these connections only | |
+ * happens at initialization time, a delay is acceptable. | |
+ */ | |
+ if (EADDRNOTAVAIL == opal_socket_errno) { | |
+ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, | |
+ "%s connection to %s returned EADDRNOTAVAIL - retrying after delay", | |
+ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), | |
+ ORTE_NAME_PRINT(&peer->name)); | |
+ CLOSE_THE_SOCKET(peer->sd); | |
+ sleep(1); | |
+ continue; | |
+ } | |
if (rc < 0) { | |
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, | |
"%s connection to %s returned %d (%d, %s)", |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment