Skip to content

Commit

Permalink
UCT/IB/DC: Always schedule DCI allocation during FC_HARD_REQ progress
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitrygx committed Jun 7, 2022
1 parent a9f2569 commit 28b0f30
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 10 deletions.
7 changes: 4 additions & 3 deletions src/uct/ib/dc/dc_mlx5.inl
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,16 @@ uct_dc_mlx5_get_arbiter_params(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
}

static UCS_F_ALWAYS_INLINE void
uct_dc_mlx5_ep_schedule(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
uct_dc_mlx5_ep_schedule(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
int force)
{
if (ep->dci == UCT_DC_MLX5_EP_NO_DCI) {
/* no dci:
* Do not grab dci here. Instead put the group on dci allocation
* arbiter. This way we can assure fairness between all eps waiting for
* dci allocation. Relevant for dcs and dcs_quota policies.
*/
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep);
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep, force);
} else {
uct_dc_mlx5_iface_dci_sched_tx(iface, ep);
}
Expand Down Expand Up @@ -83,5 +84,5 @@ uct_dc_mlx5_ep_pending_common(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
return;
}

uct_dc_mlx5_ep_schedule(iface, ep);
uct_dc_mlx5_ep_schedule(iface, ep, 0);
}
14 changes: 12 additions & 2 deletions src/uct/ib/dc/dc_mlx5_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1504,6 +1504,7 @@ static unsigned uct_dc_mlx5_ep_fc_hard_req_progress(void *arg)
ucs_time_t now = ucs_get_time();
uint64_t ep_key;
uct_dc_mlx5_ep_t *ep;
ucs_status_t UCS_V_UNUSED status;

if (ucs_likely(now < iface->tx.fc_hard_req_resend_time)) {
return 0;
Expand All @@ -1516,7 +1517,16 @@ static unsigned uct_dc_mlx5_ep_fc_hard_req_progress(void *arg)
* packet in case of failure on the remote FC endpoint */
kh_foreach_key(&iface->tx.fc_hash, ep_key, {
ep = (uct_dc_mlx5_ep_t*)ep_key;
uct_dc_mlx5_ep_schedule(iface, ep);

/* Allocate DCI for the endpoint to schedule the endpoint to DCI wait
* queue if there is free DCI */
status = uct_dc_mlx5_iface_dci_get(iface, ep);
ucs_assertv((status == UCS_OK) || (status == UCS_ERR_NO_RESOURCE),
"%s", ucs_status_string(status));

/* Force DCI scheduling, since FC resources may never become available
* unless we send FC_HARD_REQ packet */
uct_dc_mlx5_ep_schedule(iface, ep, 1);
})

return 1;
Expand Down Expand Up @@ -1636,7 +1646,7 @@ void uct_dc_mlx5_ep_handle_failure(uct_dc_mlx5_ep_t *ep, void *arg,
/* Since DCI isn't assigned for the FC endpoint, schedule DCI
* allocation for progressing possible FC_PURE_GRANT re-sending
* operation which are scheduled on the pending queue */
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep);
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep, 0);
}
}

Expand Down
11 changes: 6 additions & 5 deletions src/uct/ib/dc/dc_mlx5_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,13 +381,14 @@ static inline int uct_dc_mlx5_iface_dci_ep_can_send(uct_dc_mlx5_ep_t *ep)
}

static UCS_F_ALWAYS_INLINE
void uct_dc_mlx5_iface_schedule_dci_alloc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
void uct_dc_mlx5_iface_schedule_dci_alloc(uct_dc_mlx5_iface_t *iface,
uct_dc_mlx5_ep_t *ep, int force)
{
ucs_arbiter_t *waitq;

/* If FC window is empty the group will be scheduled when
* grant is received */
if (uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) {
/* If FC window is empty and force scheduling wasn't requested, the group
* will be scheduled when grant is received */
if (force || uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) {
waitq = uct_dc_mlx5_iface_dci_waitq(iface, uct_dc_mlx5_ep_pool_index(ep));
ucs_arbiter_group_schedule(waitq, &ep->arb_group);
}
Expand Down Expand Up @@ -475,7 +476,7 @@ uct_dc_mlx5_iface_dci_put(uct_dc_mlx5_iface_t *iface, uint8_t dci_index)
* move the group to the 'wait for dci alloc' state
*/
ucs_arbiter_group_desched(uct_dc_mlx5_iface_tx_waitq(iface), &ep->arb_group);
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep);
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep, 0);
}

static inline void uct_dc_mlx5_iface_dci_alloc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
Expand Down

0 comments on commit 28b0f30

Please sign in to comment.