diff --git a/NEWS b/NEWS index 434fbd1a5e9..0ca4d57242d 100644 --- a/NEWS +++ b/NEWS @@ -168,6 +168,7 @@ * Fixes in reachability of loopback ifaces * Fixes addressing possible uninitialized memory accesses * Fixes in error flow for endpoints created upon receiving connection request +* Fixes in TCP keepalive to avoid false-positive error detection #### UCM * Fixes addressing heap corruption caused by ucp_set_event_handler() * Fixes in mmap events test diff --git a/src/uct/tcp/tcp.h b/src/uct/tcp/tcp.h index 1c0233157b6..67811fb0015 100644 --- a/src/uct/tcp/tcp.h +++ b/src/uct/tcp/tcp.h @@ -65,7 +65,7 @@ #define UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE 10 /* The seconds between individual keepalive probes */ -#define UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL 1 +#define UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL 2 /** @@ -402,7 +402,7 @@ typedef struct uct_tcp_iface { ucs_time_t idle; /* The time the connection needs to remain * idle before TCP starts sending keepalive * probes (TCP_KEEPIDLE socket option) */ - unsigned cnt; /* The maximum number of keepalive probes TCP + unsigned long cnt; /* The maximum number of keepalive probes TCP * should send before dropping the connection * (TCP_KEEPCNT socket option). */ ucs_time_t intvl; /* The time between individual keepalive @@ -440,7 +440,7 @@ typedef struct uct_tcp_iface_config { ucs_range_spec_t port_range; struct { ucs_time_t idle; - unsigned cnt; + unsigned long cnt; ucs_time_t intvl; } keepalive; } uct_tcp_iface_config_t; diff --git a/src/uct/tcp/tcp_ep.c b/src/uct/tcp/tcp_ep.c index 22d1d5f85e0..f3fe50e48bc 100644 --- a/src/uct/tcp/tcp_ep.c +++ b/src/uct/tcp/tcp_ep.c @@ -492,13 +492,16 @@ static inline void uct_tcp_ep_ctx_move(uct_tcp_ep_ctx_t *to_ctx, memset(from_ctx, 0, sizeof(*from_ctx)); } -static int uct_tcp_ep_time_seconds(ucs_time_t time_val, int auto_val) +static int uct_tcp_ep_time_seconds(ucs_time_t time_val, int auto_val, + int max_val) { if (time_val == UCS_TIME_AUTO) { return auto_val; + } else if (time_val == UCS_TIME_INFINITY) { + return max_val; } - return ucs_max(1, (int)ucs_time_to_sec(time_val)); + return ucs_min(max_val, ucs_max(1l, ucs_time_to_sec(time_val))); } static ucs_status_t uct_tcp_ep_keepalive_enable(uct_tcp_ep_t *ep) @@ -509,6 +512,7 @@ static ucs_status_t uct_tcp_ep_keepalive_enable(uct_tcp_ep_t *ep) const int optval = 1; int idle_sec; int intvl_sec; + int keepalive_cnt; ucs_status_t status; if (!uct_tcp_keepalive_is_enabled(iface)) { @@ -516,9 +520,11 @@ static ucs_status_t uct_tcp_ep_keepalive_enable(uct_tcp_ep_t *ep) } idle_sec = uct_tcp_ep_time_seconds(iface->config.keepalive.idle, - UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE); + UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE, + INT16_MAX); intvl_sec = uct_tcp_ep_time_seconds(iface->config.keepalive.intvl, - UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL); + UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL, + INT16_MAX); status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl_sec, sizeof(intvl_sec)); @@ -526,11 +532,18 @@ static ucs_status_t uct_tcp_ep_keepalive_enable(uct_tcp_ep_t *ep) return status; } - status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPCNT, - &iface->config.keepalive.cnt, - sizeof(iface->config.keepalive.cnt)); - if (status != UCS_OK) { - return status; + if (iface->config.keepalive.cnt != UCS_ULUNITS_AUTO) { + if (iface->config.keepalive.cnt == UCS_ULUNITS_INF) { + keepalive_cnt = INT8_MAX; + } else { + keepalive_cnt = ucs_min(INT8_MAX, iface->config.keepalive.cnt); + } + + status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPCNT, + &keepalive_cnt, sizeof(keepalive_cnt)); + if (status != UCS_OK) { + return status; + } } status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPIDLE, diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c index 2fe30597c71..0ecd7b5840f 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c @@ -89,18 +89,19 @@ static ucs_config_field_t uct_tcp_iface_config_table[] = { #ifdef UCT_TCP_EP_KEEPALIVE {"KEEPIDLE", UCS_PP_MAKE_STRING(UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE) "s", "The time the connection needs to remain idle before TCP starts sending " - "keepalive probes.", + "keepalive probes. Specifying \"inf\" disables keepalive.", ucs_offsetof(uct_tcp_iface_config_t, keepalive.idle), UCS_CONFIG_TYPE_TIME_UNITS}, - {"KEEPCNT", "3", + {"KEEPCNT", "auto", "The maximum number of keepalive probes TCP should send before " - "dropping the connection.", + "dropping the connection. Specifying \"inf\" disables keepalive.", ucs_offsetof(uct_tcp_iface_config_t, keepalive.cnt), - UCS_CONFIG_TYPE_UINT}, + UCS_CONFIG_TYPE_ULUNITS}, {"KEEPINTVL", UCS_PP_MAKE_STRING(UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL) "s", - "The time between individual keepalive probes.", + "The time between individual keepalive probes. Specifying \"inf\" disables" + " keepalive.", ucs_offsetof(uct_tcp_iface_config_t, keepalive.intvl), UCS_CONFIG_TYPE_TIME_UNITS}, #endif /* UCT_TCP_EP_KEEPALIVE */ @@ -872,7 +873,7 @@ int uct_tcp_keepalive_is_enabled(uct_tcp_iface_t *iface) { #ifdef UCT_TCP_EP_KEEPALIVE return (iface->config.keepalive.idle != UCS_TIME_INFINITY) && - (iface->config.keepalive.cnt != 0) && + (iface->config.keepalive.cnt != UCS_ULUNITS_INF) && (iface->config.keepalive.intvl != UCS_TIME_INFINITY); #else /* UCT_TCP_EP_KEEPALIVE */ return 0;