diff --git a/src/uct/tcp/tcp.h b/src/uct/tcp/tcp.h index 1c0233157b6f..90ab12dc579a 100644 --- a/src/uct/tcp/tcp.h +++ b/src/uct/tcp/tcp.h @@ -65,7 +65,7 @@ #define UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE 10 /* The seconds between individual keepalive probes */ -#define UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL 1 +#define UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL 2 /** @@ -402,7 +402,7 @@ typedef struct uct_tcp_iface { ucs_time_t idle; /* The time the connection needs to remain * idle before TCP starts sending keepalive * probes (TCP_KEEPIDLE socket option) */ - unsigned cnt; /* The maximum number of keepalive probes TCP + size_t cnt; /* The maximum number of keepalive probes TCP * should send before dropping the connection * (TCP_KEEPCNT socket option). */ ucs_time_t intvl; /* The time between individual keepalive @@ -440,7 +440,7 @@ typedef struct uct_tcp_iface_config { ucs_range_spec_t port_range; struct { ucs_time_t idle; - unsigned cnt; + size_t cnt; ucs_time_t intvl; } keepalive; } uct_tcp_iface_config_t; diff --git a/src/uct/tcp/tcp_ep.c b/src/uct/tcp/tcp_ep.c index 22d1d5f85e03..69fdeb8e2dc3 100644 --- a/src/uct/tcp/tcp_ep.c +++ b/src/uct/tcp/tcp_ep.c @@ -526,11 +526,13 @@ static ucs_status_t uct_tcp_ep_keepalive_enable(uct_tcp_ep_t *ep) return status; } - status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPCNT, - &iface->config.keepalive.cnt, - sizeof(iface->config.keepalive.cnt)); - if (status != UCS_OK) { - return status; + if (iface->config.keepalive.cnt != UCS_ULUNITS_AUTO) { + status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPCNT, + &iface->config.keepalive.cnt, + sizeof(iface->config.keepalive.cnt)); + if (status != UCS_OK) { + return status; + } } status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPIDLE, diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c index 2fe30597c713..814f8e38b05b 100644 --- a/src/uct/tcp/tcp_iface.c +++ b/src/uct/tcp/tcp_iface.c @@ -93,11 +93,11 @@ static ucs_config_field_t uct_tcp_iface_config_table[] = { ucs_offsetof(uct_tcp_iface_config_t, keepalive.idle), UCS_CONFIG_TYPE_TIME_UNITS}, - {"KEEPCNT", "3", + {"KEEPCNT", "auto", "The maximum number of keepalive probes TCP should send before " "dropping the connection.", ucs_offsetof(uct_tcp_iface_config_t, keepalive.cnt), - UCS_CONFIG_TYPE_UINT}, + UCS_CONFIG_TYPE_ULUNITS}, {"KEEPINTVL", UCS_PP_MAKE_STRING(UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL) "s", "The time between individual keepalive probes.", @@ -872,7 +872,7 @@ int uct_tcp_keepalive_is_enabled(uct_tcp_iface_t *iface) { #ifdef UCT_TCP_EP_KEEPALIVE return (iface->config.keepalive.idle != UCS_TIME_INFINITY) && - (iface->config.keepalive.cnt != 0) && + (iface->config.keepalive.cnt != UCS_ULUNITS_INF) && (iface->config.keepalive.intvl != UCS_TIME_INFINITY); #else /* UCT_TCP_EP_KEEPALIVE */ return 0;