Skip to content

Commit

Permalink
Merge pull request #3677 from hoopoepg/topic/add-support-max-inline-v1.5
Browse files Browse the repository at this point in the history
UD/RC VERBS: added support of max_inline==0 - v1.5
  • Loading branch information
yosefe authored Jun 10, 2019
2 parents b03b274 + 5cfec83 commit f4e3e2b
Show file tree
Hide file tree
Showing 19 changed files with 221 additions and 39 deletions.
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#

## 1.5.2 (TBD)
Features:
- Added support for OmniPath (using Verbs)

Bugfixes:
- Fix segfault when libuct.so is reloaded - issue #3558
- Fix ucx_info crash when printing configuration alias
Expand Down
4 changes: 3 additions & 1 deletion contrib/test_jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ echo "==== Running on $(hostname), worker $worker / $nworkers ===="
module_load() {
set +x
module=$1
if [ -n "$(module avail $module 2>&1)" ]
m_avail="$(module avail $module 2>&1)" || true

if module avail -t 2>&1 | grep -q "^$module\$"
then
module load $module
set -x
Expand Down
11 changes: 11 additions & 0 deletions src/uct/ib/base/ib_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,11 @@ static size_t uct_ib_device_get_ib_gid_index(uct_ib_md_t *md)
}
}

static int uct_ib_device_is_iwarp(uct_ib_device_t *dev)
{
return dev->ibv_context->device->transport_type == IBV_TRANSPORT_IWARP;
}

ucs_status_t uct_ib_device_port_check(uct_ib_device_t *dev, uint8_t port_num,
unsigned flags)
{
Expand All @@ -433,6 +438,12 @@ ucs_status_t uct_ib_device_port_check(uct_ib_device_t *dev, uint8_t port_num,
return UCS_ERR_UNREACHABLE;
}

if (uct_ib_device_is_iwarp(dev)) {
/* TODO: enable it when support is ready */
ucs_debug("iWarp device %s is not supported", uct_ib_device_name(dev));
return UCS_ERR_UNSUPPORTED;
}

if (!uct_ib_device_is_port_ib(dev, port_num) && (flags & UCT_IB_DEVICE_FLAG_LINK_IB)) {
ucs_debug("%s:%d is not IB link layer", uct_ib_device_name(dev),
port_num);
Expand Down
6 changes: 6 additions & 0 deletions src/uct/ib/base/ib_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,4 +546,10 @@ struct ibv_pd *uct_ib_iface_qp_pd(uct_ib_iface_t *iface)
return pd;
}

static UCS_F_ALWAYS_INLINE
size_t uct_ib_iface_hdr_size(size_t max_inline, size_t min_size)
{
return (size_t)ucs_max((ssize_t)(max_inline - min_size), 0);
}

#endif
30 changes: 18 additions & 12 deletions src/uct/ib/rc/base/rc_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,17 +214,15 @@ ucs_status_t uct_rc_iface_query(uct_rc_iface_t *iface,
iface_attr->iface_addr_len = 0;
iface_attr->ep_addr_len = sizeof(uct_rc_ep_address_t);
iface_attr->max_conn_priv = 0;
iface_attr->cap.flags = UCT_IFACE_FLAG_AM_SHORT |
UCT_IFACE_FLAG_AM_BCOPY |
UCT_IFACE_FLAG_AM_ZCOPY |
UCT_IFACE_FLAG_PUT_SHORT |
UCT_IFACE_FLAG_PUT_BCOPY |
UCT_IFACE_FLAG_PUT_ZCOPY |
UCT_IFACE_FLAG_GET_BCOPY |
UCT_IFACE_FLAG_GET_ZCOPY |
UCT_IFACE_FLAG_PENDING |
UCT_IFACE_FLAG_CONNECT_TO_EP |
UCT_IFACE_FLAG_CB_SYNC |
iface_attr->cap.flags = UCT_IFACE_FLAG_AM_BCOPY |
UCT_IFACE_FLAG_AM_ZCOPY |
UCT_IFACE_FLAG_PUT_BCOPY |
UCT_IFACE_FLAG_PUT_ZCOPY |
UCT_IFACE_FLAG_GET_BCOPY |
UCT_IFACE_FLAG_GET_ZCOPY |
UCT_IFACE_FLAG_PENDING |
UCT_IFACE_FLAG_CONNECT_TO_EP |
UCT_IFACE_FLAG_CB_SYNC |
UCT_IFACE_FLAG_EVENT_SEND_COMP |
UCT_IFACE_FLAG_EVENT_RECV;

Expand Down Expand Up @@ -278,7 +276,7 @@ ucs_status_t uct_rc_iface_query(uct_rc_iface_t *iface,
iface_attr->cap.get.max_iov = uct_ib_iface_get_max_iov(&iface->super);

/* AM */
iface_attr->cap.am.max_short = max_inline - sizeof(uct_rc_hdr_t);
iface_attr->cap.am.max_short = uct_ib_iface_hdr_size(max_inline, sizeof(uct_rc_hdr_t));
iface_attr->cap.am.max_bcopy = iface->super.config.seg_size - sizeof(uct_rc_hdr_t);
iface_attr->cap.am.min_zcopy = 0;
iface_attr->cap.am.max_zcopy = iface->super.config.seg_size - sizeof(uct_rc_hdr_t);
Expand All @@ -291,6 +289,14 @@ ucs_status_t uct_rc_iface_query(uct_rc_iface_t *iface,
/* Tag Offload */
uct_rc_iface_tag_query(iface, iface_attr, max_inline, tag_max_iov);

if (iface_attr->cap.am.max_short) {
iface_attr->cap.flags |= UCT_IFACE_FLAG_AM_SHORT;
}

if (iface_attr->cap.put.max_short) {
iface_attr->cap.flags |= UCT_IFACE_FLAG_PUT_SHORT;
}

return UCS_OK;
}

Expand Down
1 change: 1 addition & 0 deletions src/uct/ib/rc/verbs/rc_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ typedef struct uct_rc_verbs_iface {
struct ibv_send_wr inl_am_wr;
struct ibv_send_wr inl_rwrite_wr;
uct_rc_verbs_iface_common_t verbs_common;
uct_rc_iface_send_desc_t *fc_desc; /* used when max_inline is zero */
struct {
unsigned tx_max_wr;
} config;
Expand Down
28 changes: 20 additions & 8 deletions src/uct/ib/rc/verbs/rc_verbs_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,27 +537,39 @@ ucs_status_t uct_rc_verbs_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op,
uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface,
uct_rc_verbs_iface_t);
uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t);
uct_rc_hdr_t *hdr = &iface->verbs_common.am_inl_hdr.rc_hdr;
uct_rc_hdr_t *hdr;
struct ibv_sge sge;
int flags;

if (!iface->fc_desc) {
hdr = &iface->verbs_common.am_inl_hdr.rc_hdr;
flags = IBV_SEND_INLINE;
hdr->am_id = UCT_RC_EP_FC_PURE_GRANT;
fc_wr.sg_list = iface->verbs_common.inl_sge;
iface->verbs_common.inl_sge[0].addr = (uintptr_t)hdr;
iface->verbs_common.inl_sge[0].length = sizeof(*hdr);
} else {
hdr = (uct_rc_hdr_t*)(iface->fc_desc + 1);
sge.addr = (uintptr_t)hdr;
sge.length = sizeof(*hdr);
sge.lkey = iface->fc_desc->lkey;
fc_wr.sg_list = &sge;
flags = 0;
}

/* In RC only PURE grant is sent as a separate message. Other FC
* messages are bundled with AM. */
ucs_assert(op == UCT_RC_EP_FC_PURE_GRANT);

/* Do not check FC WND here to avoid head-to-head deadlock.
* Credits grant should be sent regardless of FC wnd state. */
ucs_assert(sizeof(*hdr) <= iface->verbs_common.config.max_inline);
UCT_RC_CHECK_RES(&iface->super, &ep->super);

hdr->am_id = UCT_RC_EP_FC_PURE_GRANT;
fc_wr.sg_list = iface->verbs_common.inl_sge;
fc_wr.opcode = IBV_WR_SEND;
fc_wr.next = NULL;
fc_wr.num_sge = 1;

iface->verbs_common.inl_sge[0].addr = (uintptr_t)hdr;
iface->verbs_common.inl_sge[0].length = sizeof(*hdr);

uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, IBV_SEND_INLINE, INT_MAX);
uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, flags, INT_MAX);
return UCS_OK;
}

Expand Down
12 changes: 12 additions & 0 deletions src/uct/ib/rc/verbs/rc_verbs_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_md_h md, uct_worker_h worke
uct_ib_iface_init_attr_t init_attr = {};
struct ibv_qp_cap cap;
struct ibv_qp *qp;
uct_rc_hdr_t *hdr;

init_attr.res_domain_key = UCT_IB_IFACE_NULL_RES_DOMAIN_KEY;
init_attr.tm_cap_bit = IBV_EXP_TM_CAP_RC;
Expand Down Expand Up @@ -270,6 +271,14 @@ static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_md_h md, uct_worker_h worke
self->verbs_common.config.max_inline = cap.max_inline_data;
uct_ib_iface_set_max_iov(&self->super.super, cap.max_send_sge);

if (self->verbs_common.config.max_inline < sizeof(*hdr)) {
self->fc_desc = ucs_mpool_get(&self->verbs_common.short_desc_mp);
ucs_assert_always(self->fc_desc != NULL);
hdr = (uct_rc_hdr_t*)(self->fc_desc + 1);
hdr->am_id = UCT_RC_EP_FC_PURE_GRANT;
} else {
self->fc_desc = NULL;
}

return UCS_OK;

Expand All @@ -283,6 +292,9 @@ static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_md_h md, uct_worker_h worke

static UCS_CLASS_CLEANUP_FUNC(uct_rc_verbs_iface_t)
{
if (self->fc_desc != NULL) {
ucs_mpool_put(self->fc_desc);
}
uct_base_iface_progress_disable(&self->super.super.super.super,
UCT_PROGRESS_SEND | UCT_PROGRESS_RECV);
uct_rc_verbs_iface_common_cleanup(&self->verbs_common);
Expand Down
6 changes: 6 additions & 0 deletions src/uct/ib/rdmacm/rdmacm_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ static int uct_rdmacm_is_addr_route_resolved(struct rdma_cm_id *cm_id,
return 0;
}

if (cm_id->verbs->device->transport_type == IBV_TRANSPORT_IWARP) {
ucs_debug("%s: iWarp support is not implemented",
ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN));
return 0;
}

if (rdma_resolve_route(cm_id, timeout_ms)) {
ucs_debug("rdma_resolve_route(addr = %s) failed: %m",
ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN));
Expand Down
14 changes: 12 additions & 2 deletions src/uct/ib/ud/base/ud_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,12 @@ static void uct_ud_ep_do_pending_ctl(uct_ud_ep_t *ep, uct_ud_iface_t *iface)
skb = uct_ud_ep_resend(ep);
} else if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_ACK)) {
if (uct_ud_ep_is_connected(ep)) {
skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super);
if (iface->config.max_inline >= sizeof(uct_ud_neth_t)) {
skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super);
} else {
skb = uct_ud_iface_resend_skb_get(iface);
skb->len = sizeof(uct_ud_neth_t);
}
uct_ud_neth_ctl_ack(ep, skb->neth);
} else {
/* Do not send ACKs if not connected yet. It may happen if
Expand All @@ -1013,7 +1018,12 @@ static void uct_ud_ep_do_pending_ctl(uct_ud_ep_t *ep, uct_ud_iface_t *iface)
}
uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK);
} else if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_ACK_REQ)) {
skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super);
if (iface->config.max_inline >= sizeof(uct_ud_neth_t)) {
skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super);
} else {
skb = uct_ud_iface_resend_skb_get(iface);
skb->len = sizeof(uct_ud_neth_t);
}
uct_ud_neth_ctl_ack_req(ep, skb->neth);
uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK_REQ);
} else if (uct_ud_ep_ctl_op_isany(ep)) {
Expand Down
21 changes: 13 additions & 8 deletions src/uct/ib/ud/base/ud_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ uct_ud_iface_create_qp(uct_ud_iface_t *self, const uct_ud_iface_config_t *config
}

self->config.max_inline = qp_init_attr.cap.max_inline_data;
ucs_assert_always(qp_init_attr.cap.max_inline_data >= UCT_UD_MIN_INLINE);
uct_ib_iface_set_max_iov(&self->super, qp_init_attr.cap.max_send_sge);

memset(&qp_attr, 0, sizeof(qp_attr));
Expand Down Expand Up @@ -474,7 +473,9 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
goto err_rx_mpool;
}

self->tx.skb = NULL;
ucs_assert_always(data_size >= UCT_UD_MIN_INLINE);

self->tx.skb = NULL;
self->tx.skb_inl.super.len = sizeof(uct_ud_neth_t);

ucs_queue_head_init(&self->tx.resend_skbs);
Expand Down Expand Up @@ -564,8 +565,7 @@ ucs_status_t uct_ud_iface_query(uct_ud_iface_t *iface, uct_iface_attr_t *iface_a
return status;
}

iface_attr->cap.flags = UCT_IFACE_FLAG_AM_SHORT |
UCT_IFACE_FLAG_AM_BCOPY |
iface_attr->cap.flags = UCT_IFACE_FLAG_AM_BCOPY |
UCT_IFACE_FLAG_AM_ZCOPY |
UCT_IFACE_FLAG_CONNECT_TO_EP |
UCT_IFACE_FLAG_CONNECT_TO_IFACE |
Expand All @@ -576,18 +576,19 @@ ucs_status_t uct_ud_iface_query(uct_ud_iface_t *iface, uct_iface_attr_t *iface_a
UCT_IFACE_FLAG_EVENT_RECV |
UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE;

iface_attr->cap.am.max_short = iface->config.max_inline - sizeof(uct_ud_neth_t);
iface_attr->cap.am.max_short = uct_ib_iface_hdr_size(iface->config.max_inline,
sizeof(uct_ud_neth_t));
iface_attr->cap.am.max_bcopy = iface->super.config.seg_size - sizeof(uct_ud_neth_t);
iface_attr->cap.am.min_zcopy = 0;
iface_attr->cap.am.max_zcopy = iface->super.config.seg_size - sizeof(uct_ud_neth_t);
iface_attr->cap.am.align_mtu = uct_ib_mtu_value(uct_ib_iface_port_attr(&iface->super)->active_mtu);
iface_attr->cap.am.opt_zcopy_align = UCS_SYS_PCI_MAX_PAYLOAD;
iface_attr->cap.am.max_hdr = iface->config.max_inline - sizeof(uct_ud_neth_t);
/* The first iov is reserved for the header */
iface_attr->cap.am.max_iov = uct_ib_iface_get_max_iov(&iface->super) - 1;

iface_attr->cap.put.max_short = iface->config.max_inline -
sizeof(uct_ud_neth_t) - sizeof(uct_ud_put_hdr_t);
iface_attr->cap.put.max_short = uct_ib_iface_hdr_size(iface->config.max_inline,
sizeof(uct_ud_neth_t) +
sizeof(uct_ud_put_hdr_t));

iface_attr->iface_addr_len = sizeof(uct_ud_iface_addr_t);
iface_attr->ep_addr_len = sizeof(uct_ud_ep_addr_t);
Expand All @@ -596,6 +597,10 @@ ucs_status_t uct_ud_iface_query(uct_ud_iface_t *iface, uct_iface_attr_t *iface_a
/* UD lacks of scatter to CQE support */
iface_attr->latency.overhead += 10e-9;

if (iface_attr->cap.am.max_short) {
iface_attr->cap.flags |= UCT_IFACE_FLAG_AM_SHORT;
}

return UCS_OK;
}

Expand Down
10 changes: 7 additions & 3 deletions src/uct/ib/ud/verbs/ud_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,9 @@ uct_ud_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header,

UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super) - 1,
"uct_ud_verbs_ep_am_zcopy");
UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + header_length,
0, iface->super.config.max_inline, "am_zcopy header");

UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(uct_ud_zcopy_desc_t) + header_length,
0, iface->super.super.config.seg_size, "am_zcopy header");

UCT_UD_CHECK_ZCOPY_LENGTH(&iface->super, header_length,
uct_iov_total_length(iov, iovcnt));
Expand Down Expand Up @@ -402,7 +403,10 @@ uct_ud_verbs_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr)
return status;
}

iface_attr->overhead = 105e-9; /* Software overhead */
iface_attr->overhead = 105e-9; /* Software overhead */
iface_attr->cap.am.max_hdr = uct_ib_iface_hdr_size(iface->super.config.seg_size,
sizeof(uct_ud_neth_t) +
sizeof(uct_ud_zcopy_desc_t));

return UCS_OK;
}
Expand Down
Loading

0 comments on commit f4e3e2b

Please sign in to comment.