Skip to content

Commit

Permalink
UD: Fix hang during connect in case of CREP retransmission.
Browse files Browse the repository at this point in the history
 Fix issue introduced by openucx#953 - when getting duplicate CREP with lower
psn, the current expected psn should not be affected. Otherwise packets
already acknowledged would be considered missing.
  • Loading branch information
yosefe committed Sep 11, 2016
1 parent eda2c58 commit ae3c786
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
3 changes: 2 additions & 1 deletion src/uct/ib/base/ib_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ static void uct_ib_dump_wr(struct ibv_qp *qp, uct_ib_opcode_t *op,
char *ends = buf + max;

if (qp->qp_type == IBV_QPT_UD) {
snprintf(s, ends - s, " [rqpn 0x%x]", wr->wr.ud.remote_qpn);
snprintf(s, ends - s, " [rqpn 0x%x ah %p]", wr->wr.ud.remote_qpn,
wr->wr.ud.ah);
s += strlen(s);
}

Expand Down
20 changes: 13 additions & 7 deletions src/uct/ib/ud/base/ud_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -478,18 +478,25 @@ static void uct_ud_ep_rx_creq(uct_ud_iface_t *iface, uct_ud_neth_t *neth)
uct_ud_ep_set_state(ep, UCT_UD_EP_FLAG_CREQ_RCVD);
}

static void uct_ud_ep_rx_ctl(uct_ud_iface_t *iface, uct_ud_ep_t *ep, uct_ud_neth_t *neth)
static void uct_ud_ep_rx_ctl(uct_ud_iface_t *iface, uct_ud_ep_t *ep,
uct_ud_neth_t *neth, uct_ud_recv_skb_t *skb)
{
uct_ud_ctl_hdr_t *ctl = (uct_ud_ctl_hdr_t*)(neth + 1);
ucs_frag_list_ooo_type_t ooo_type;

ucs_trace_func("");
ucs_assert_always(ctl->type == UCT_UD_PACKET_CREP);
/* note that duplicate creps are discared earlier */
ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID ||
ep->dest_ep_id == ctl->conn_rep.src_ep_id);
ep->dest_ep_id = ctl->conn_rep.src_ep_id;

/* No need to track duplications, CREP always goes
* with ACK_REQ flag */
/* Discard duplicate CREP */
ooo_type = ucs_frag_list_insert(&ep->rx.ooo_pkts, &skb->u.ooo.elem, neth->psn);
if (ooo_type != UCS_FRAG_LIST_INSERT_FAST) {
ucs_assertv(ooo_type == UCS_FRAG_LIST_INSERT_DUP, "OOO unsupported");
return;
}

ep->dest_ep_id = ctl->conn_rep.src_ep_id;
ep->rx.ooo_pkts.head_sn = neth->psn;
ucs_arbiter_group_schedule(&iface->tx.pending_q, &ep->tx.pending.group);
uct_ud_peer_copy(&ep->peer, &ctl->peer);
Expand Down Expand Up @@ -584,7 +591,7 @@ void uct_ud_ep_process_rx(uct_ud_iface_t *iface, uct_ud_neth_t *neth, unsigned b
goto out;
}
if (neth->packet_type & UCT_UD_PACKET_FLAG_CTL) {
uct_ud_ep_rx_ctl(iface, ep, neth);
uct_ud_ep_rx_ctl(iface, ep, neth, skb);
goto out;
}
}
Expand All @@ -600,7 +607,6 @@ void uct_ud_ep_process_rx(uct_ud_iface_t *iface, uct_ud_neth_t *neth, unsigned b
uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_ACK);
goto out;
}


if (ucs_unlikely(!is_am && (neth->packet_type & UCT_UD_PACKET_FLAG_PUT))) {
/* TODO: remove once ucp implements put */
Expand Down

0 comments on commit ae3c786

Please sign in to comment.