Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UCT/TCP: Increase keepalive interval from 1 to 2 seconds and use system default for keepalive retries count [v1.11.x] #7117

Merged
merged 1 commit into from
Jul 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
* Fixes in reachability of loopback ifaces
* Fixes addressing possible uninitialized memory accesses
* Fixes in error flow for endpoints created upon receiving connection request
* Fixes in TCP keepalive to avoid false-positive error detection
#### UCM
* Fixes addressing heap corruption caused by ucp_set_event_handler()
* Fixes in mmap events test
Expand Down
6 changes: 3 additions & 3 deletions src/uct/tcp/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
#define UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE 10

/* The seconds between individual keepalive probes */
#define UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL 1
#define UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL 2


/**
Expand Down Expand Up @@ -402,7 +402,7 @@ typedef struct uct_tcp_iface {
ucs_time_t idle; /* The time the connection needs to remain
* idle before TCP starts sending keepalive
* probes (TCP_KEEPIDLE socket option) */
unsigned cnt; /* The maximum number of keepalive probes TCP
unsigned long cnt; /* The maximum number of keepalive probes TCP
* should send before dropping the connection
* (TCP_KEEPCNT socket option). */
ucs_time_t intvl; /* The time between individual keepalive
Expand Down Expand Up @@ -440,7 +440,7 @@ typedef struct uct_tcp_iface_config {
ucs_range_spec_t port_range;
struct {
ucs_time_t idle;
unsigned cnt;
unsigned long cnt;
ucs_time_t intvl;
} keepalive;
} uct_tcp_iface_config_t;
Expand Down
31 changes: 22 additions & 9 deletions src/uct/tcp/tcp_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -492,13 +492,16 @@ static inline void uct_tcp_ep_ctx_move(uct_tcp_ep_ctx_t *to_ctx,
memset(from_ctx, 0, sizeof(*from_ctx));
}

static int uct_tcp_ep_time_seconds(ucs_time_t time_val, int auto_val)
static int uct_tcp_ep_time_seconds(ucs_time_t time_val, int auto_val,
int max_val)
{
if (time_val == UCS_TIME_AUTO) {
return auto_val;
} else if (time_val == UCS_TIME_INFINITY) {
return max_val;
}

return ucs_max(1, (int)ucs_time_to_sec(time_val));
return ucs_min(max_val, ucs_max(1l, ucs_time_to_sec(time_val)));
}

static ucs_status_t uct_tcp_ep_keepalive_enable(uct_tcp_ep_t *ep)
Expand All @@ -509,28 +512,38 @@ static ucs_status_t uct_tcp_ep_keepalive_enable(uct_tcp_ep_t *ep)
const int optval = 1;
int idle_sec;
int intvl_sec;
int keepalive_cnt;
ucs_status_t status;

if (!uct_tcp_keepalive_is_enabled(iface)) {
return UCS_OK;
}

idle_sec = uct_tcp_ep_time_seconds(iface->config.keepalive.idle,
UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE);
UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE,
INT16_MAX);
intvl_sec = uct_tcp_ep_time_seconds(iface->config.keepalive.intvl,
UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL);
UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL,
INT16_MAX);

status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPINTVL,
&intvl_sec, sizeof(intvl_sec));
if (status != UCS_OK) {
return status;
}

status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPCNT,
&iface->config.keepalive.cnt,
sizeof(iface->config.keepalive.cnt));
if (status != UCS_OK) {
return status;
if (iface->config.keepalive.cnt != UCS_ULUNITS_AUTO) {
if (iface->config.keepalive.cnt == UCS_ULUNITS_INF) {
keepalive_cnt = INT8_MAX;
} else {
keepalive_cnt = ucs_min(INT8_MAX, iface->config.keepalive.cnt);
}

status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPCNT,
&keepalive_cnt, sizeof(keepalive_cnt));
if (status != UCS_OK) {
return status;
}
}

status = ucs_socket_setopt(ep->fd, IPPROTO_TCP, TCP_KEEPIDLE,
Expand Down
13 changes: 7 additions & 6 deletions src/uct/tcp/tcp_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,19 @@ static ucs_config_field_t uct_tcp_iface_config_table[] = {
#ifdef UCT_TCP_EP_KEEPALIVE
{"KEEPIDLE", UCS_PP_MAKE_STRING(UCT_TCP_EP_DEFAULT_KEEPALIVE_IDLE) "s",
"The time the connection needs to remain idle before TCP starts sending "
"keepalive probes.",
"keepalive probes. Specifying \"inf\" disables keepalive.",
ucs_offsetof(uct_tcp_iface_config_t, keepalive.idle),
UCS_CONFIG_TYPE_TIME_UNITS},

{"KEEPCNT", "3",
{"KEEPCNT", "auto",
"The maximum number of keepalive probes TCP should send before "
"dropping the connection.",
"dropping the connection. Specifying \"inf\" disables keepalive.",
ucs_offsetof(uct_tcp_iface_config_t, keepalive.cnt),
UCS_CONFIG_TYPE_UINT},
UCS_CONFIG_TYPE_ULUNITS},

{"KEEPINTVL", UCS_PP_MAKE_STRING(UCT_TCP_EP_DEFAULT_KEEPALIVE_INTVL) "s",
"The time between individual keepalive probes.",
"The time between individual keepalive probes. Specifying \"inf\" disables"
" keepalive.",
ucs_offsetof(uct_tcp_iface_config_t, keepalive.intvl),
UCS_CONFIG_TYPE_TIME_UNITS},
#endif /* UCT_TCP_EP_KEEPALIVE */
Expand Down Expand Up @@ -872,7 +873,7 @@ int uct_tcp_keepalive_is_enabled(uct_tcp_iface_t *iface)
{
#ifdef UCT_TCP_EP_KEEPALIVE
return (iface->config.keepalive.idle != UCS_TIME_INFINITY) &&
(iface->config.keepalive.cnt != 0) &&
(iface->config.keepalive.cnt != UCS_ULUNITS_INF) &&
(iface->config.keepalive.intvl != UCS_TIME_INFINITY);
#else /* UCT_TCP_EP_KEEPALIVE */
return 0;
Expand Down