diff --git a/NEWS b/NEWS index 8fb9bc1efb1f..5caa03562a94 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,7 @@ * Fix performance degradation with cuda memory on self endpoint * Fix bug in JUCX listener connection handler. * Fixes in short reply protocol. +* Configurable loopback interface for TCP transport (disable by default). ## 1.10.0 (March 9, 2021) ### Features: diff --git a/contrib/test_jenkins.sh b/contrib/test_jenkins.sh index 9468f97c3189..c61184d2c83f 100755 --- a/contrib/test_jenkins.sh +++ b/contrib/test_jenkins.sh @@ -1655,6 +1655,7 @@ run_tests() { export UCX_ERROR_MAIL_FOOTER=$JOB_URL/$BUILD_NUMBER/console export UCX_TCP_PORT_RANGE="$((33000 + EXECUTOR_NUMBER * 100))"-"$((34000 + EXECUTOR_NUMBER * 100))" export UCX_TCP_CM_ALLOW_ADDR_INUSE=y + export UCX_TCP_LOOPBACK_ENABLE=y # test cuda build if cuda modules available do_distributed_task 2 4 build_cuda diff --git a/src/ucs/sys/sock.c b/src/ucs/sys/sock.c index b52ddcfb189c..d0822b36e068 100644 --- a/src/ucs/sys/sock.c +++ b/src/ucs/sys/sock.c @@ -115,6 +115,19 @@ int ucs_netif_is_active(const char *if_name) return ucs_netif_flags_is_active(ifr.ifr_flags); } +int ucs_netif_is_loopback(const char *if_name) +{ + ucs_status_t status; + struct ifreq ifr; + + status = ucs_netif_ioctl(if_name, SIOCGIFFLAGS, &ifr); + if (status != UCS_OK) { + return 0; + } + + return ifr.ifr_flags & IFF_LOOPBACK; +} + unsigned ucs_netif_bond_ad_num_ports(const char *bond_name) { ucs_status_t status; @@ -791,7 +804,7 @@ ucs_status_t ucs_sockaddr_get_ifname(int fd, char *ifname_str, size_t max_strlen return UCS_ERR_INVALID_PARAM; } - ucs_debug("check ifname for socket on %s", + ucs_debug("check ifname for socket on %s", ucs_sockaddr_str(my_addr, str_local_addr, UCS_SOCKADDR_STRING_LEN)); if (getifaddrs(&ifaddrs)) { @@ -807,7 +820,7 @@ ucs_status_t ucs_sockaddr_get_ifname(int fd, char *ifname_str, size_t max_strlen continue; } - if (((sa->sa_family == AF_INET) ||(sa->sa_family == AF_INET6)) && + if (((sa->sa_family == AF_INET) ||(sa->sa_family == AF_INET6)) && (!ucs_sockaddr_cmp(sa, my_addr, NULL))) { ucs_debug("matching ip found iface on %s", ifa->ifa_name); ucs_strncpy_safe(ifname_str, ifa->ifa_name, max_strlen); diff --git a/src/ucs/sys/sock.h b/src/ucs/sys/sock.h index a9fdab6e39a0..a31715ab167c 100644 --- a/src/ucs/sys/sock.h +++ b/src/ucs/sys/sock.h @@ -77,6 +77,16 @@ ucs_status_t ucs_netif_ioctl(const char *if_name, unsigned long request, int ucs_netif_is_active(const char *if_name); +/** + * Check if the given interface is a loopback. + * + * @param if_name Interface name to check. + * + * @return 1 if true, otherwise 0 + */ +int ucs_netif_is_loopback(const char *if_name); + + /** * Get number of active 802.3ad ports for a bond device. If the device is not * a bond device, or 802.3ad is not enabled, return 1. @@ -319,7 +329,7 @@ ucs_status_t ucs_socket_recv(int fd, void *data, size_t length); /** * Return size of a given sockaddr structure. - * + * * @param [in] addr Pointer to sockaddr structure. * @param [out] size_p Pointer to variable where size of * sockaddr_in/sockaddr_in6 structure will be written @@ -331,7 +341,7 @@ ucs_status_t ucs_sockaddr_sizeof(const struct sockaddr *addr, size_t *size_p); /** * Return port of a given sockaddr structure. - * + * * @param [in] addr Pointer to sockaddr structure. * @param [out] port_p Pointer to variable where port (host notation) * of sockaddr_in/sockaddr_in6 structure will be written @@ -343,7 +353,7 @@ ucs_status_t ucs_sockaddr_get_port(const struct sockaddr *addr, uint16_t *port_p /** * Set port to a given sockaddr structure. - * + * * @param [in] addr Pointer to sockaddr structure. * @param [in] port Port (host notation) that will be written * @@ -354,7 +364,7 @@ ucs_status_t ucs_sockaddr_set_port(struct sockaddr *addr, uint16_t port); /** * Return IP addr of a given sockaddr structure. - * + * * @param [in] addr Pointer to sockaddr structure. * * @return IP address of sockaddr_in/sockaddr_in6 structure @@ -437,7 +447,7 @@ int ucs_sockaddr_ip_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2); /** * Indicate if given IP addr is INADDR_ANY (IPV4) or in6addr_any (IPV6) - * + * * @param [in] addr Pointer to sockaddr structure. * * @return 1 if input is INADDR_ANY or in6addr_any diff --git a/src/uct/tcp/tcp.h b/src/uct/tcp/tcp.h index 852e12fc1f99..5b5e5341748a 100644 --- a/src/uct/tcp/tcp.h +++ b/src/uct/tcp/tcp.h @@ -413,6 +413,24 @@ typedef struct uct_tcp_iface_config { } uct_tcp_iface_config_t; +/** + * TCP md configuration + */ +typedef struct uct_tcp_md_config { + uct_md_config_t super; + int loopback_enable; +} uct_tcp_md_config_t; + + +/** + * TCP MD descriptor + */ +typedef struct uct_tcp_md { + uct_md_t super; + uint8_t loopback_enable; +} uct_tcp_md_t; + + extern uct_component_t uct_tcp_component; extern const char *uct_tcp_address_type_names[]; extern const uct_tcp_cm_state_t uct_tcp_ep_cm_state[]; diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c index 1e5e66c5e924..618464c1805b 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c @@ -29,7 +29,7 @@ static ucs_config_field_t uct_tcp_iface_config_table[] = { {"TX_SEG_SIZE", "8kb", "Size of send copy-out buffer", ucs_offsetof(uct_tcp_iface_config_t, tx_seg_size), UCS_CONFIG_TYPE_MEMUNITS}, - + {"RX_SEG_SIZE", "64kb", "Size of receive copy-out buffer", ucs_offsetof(uct_tcp_iface_config_t, rx_seg_size), UCS_CONFIG_TYPE_MEMUNITS}, @@ -727,6 +727,7 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, uct_tl_device_resource_t **devices_p, unsigned *num_devices_p) { + uct_tcp_md_t *tcp_md = ucs_derived_of(md, uct_tcp_md_t); uct_tl_device_resource_t *devices, *tmp; static const char *netdev_dir = "/sys/class/net"; struct dirent *entry; @@ -770,6 +771,10 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, continue; } + if (!tcp_md->loopback_enable && ucs_netif_is_loopback(entry->d_name)) { + continue; + } + tmp = ucs_realloc(devices, sizeof(*devices) * (num_devices + 1), "tcp devices"); if (tmp == NULL) { diff --git a/src/uct/tcp/tcp_md.c b/src/uct/tcp/tcp_md.c index 30cb77840840..2a35e16185e3 100644 --- a/src/uct/tcp/tcp_md.c +++ b/src/uct/tcp/tcp_md.c @@ -12,6 +12,17 @@ #include +static ucs_config_field_t uct_tcp_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_tcp_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {"LOOPBACK_ENABLE", "n", + "Enable loopback interface for TCP device selection.", + ucs_offsetof(uct_tcp_md_config_t, loopback_enable), UCS_CONFIG_TYPE_BOOL}, + + {NULL} +}; + static ucs_status_t uct_tcp_md_query(uct_md_h md, uct_md_attr_t *attr) { /* Dummy memory registration provided. No real memory handling exists */ @@ -37,24 +48,40 @@ static ucs_status_t uct_tcp_md_mem_reg(uct_md_h md, void *address, size_t length return UCS_OK; } +static void uct_tcp_md_close(uct_md_h uct_md) +{ + uct_tcp_md_t *md = ucs_derived_of(uct_md, uct_tcp_md_t); + ucs_free(md); +} + static ucs_status_t uct_tcp_md_open(uct_component_t *component, const char *md_name, const uct_md_config_t *md_config, uct_md_h *md_p) { + uct_tcp_md_config_t *tcp_md_config = + ucs_derived_of(md_config, uct_tcp_md_config_t); + uct_tcp_md_t *md; + static uct_md_ops_t md_ops = { - .close = ucs_empty_function, + .close = uct_tcp_md_close, .query = uct_tcp_md_query, .mkey_pack = ucs_empty_function_return_success, .mem_reg = uct_tcp_md_mem_reg, .mem_dereg = ucs_empty_function_return_success, .detect_memory_type = ucs_empty_function_return_unsupported }; - static uct_md_t md = { - .ops = &md_ops, - .component = &uct_tcp_component - }; - *md_p = &md; + md = ucs_malloc(sizeof(uct_tcp_md_t), "uct_tcp_md_t"); + if (NULL == md) { + ucs_error("failed to allocate memory for uct_tcp_md_t"); + return UCS_ERR_NO_MEMORY; + } + + md->super.ops = &md_ops; + md->super.component = &uct_tcp_component; + md->loopback_enable = tcp_md_config->loopback_enable; + + *md_p = (uct_md_h) md; return UCS_OK; } @@ -79,7 +106,12 @@ uct_component_t uct_tcp_component = { .rkey_ptr = ucs_empty_function_return_unsupported, .rkey_release = ucs_empty_function_return_success, .name = UCT_TCP_NAME, - .md_config = UCT_MD_DEFAULT_CONFIG_INITIALIZER, + .md_config = { + .name = "TCP memory domain", + .prefix = "TCP_", + .table = uct_tcp_md_config_table, + .size = sizeof(uct_tcp_md_config_t), + }, .cm_config = { .name = "TCP-SOCKCM connection manager", .prefix = "TCP_CM_",