diff --git a/src/uct/ib/rc/accel/rc_mlx5_ep.c b/src/uct/ib/rc/accel/rc_mlx5_ep.c index db6d6b9cd9e..a0cbe3b773d 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_ep.c +++ b/src/uct/ib/rc/accel/rc_mlx5_ep.c @@ -589,7 +589,7 @@ ucs_status_t uct_rc_mlx5_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, NULL, 0, UCT_RC_EP_FC_PURE_GRANT, 0, 0, 0, 0, - NULL, NULL, 0, 0, + NULL, NULL, 0, MLX5_WQE_CTRL_CQ_UPDATE, INT_MAX); return UCS_OK; } diff --git a/src/uct/ib/rc/accel/rc_mlx5_iface.c b/src/uct/ib/rc/accel/rc_mlx5_iface.c index ce8082e2a80..ddb8a6685a8 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_iface.c +++ b/src/uct/ib/rc/accel/rc_mlx5_iface.c @@ -173,10 +173,13 @@ uct_rc_mlx5_common_ka_progress(uct_rc_mlx5_iface_common_t *iface) ucs_spin_lock(&iface->super.ep_list_lock); ucs_list_for_each(ep, &iface->super.ep_list, super.list) { - if (ep->super.txqp.available < ep->tx.wq.bb_max) { - /* have outstanding operations */ + if ((ep->super.txqp.available < ep->tx.wq.bb_max) && + (ep->super.txqp.unsignaled == 0)) { + /* Have outstanding uncompleted operations - no need to send + keepalive message */ continue; } + ucs_trace("send keepalive grant on ep %p", ep); uct_rc_ep_fc_send_grant(&ep->super); }