Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UD/RC VERBS: added support of max_inline==0 - v1.5 #3677

Merged
merged 9 commits into from
Jun 10, 2019
11 changes: 11 additions & 0 deletions src/uct/ib/base/ib_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,11 @@ static size_t uct_ib_device_get_ib_gid_index(uct_ib_md_t *md)
}
}

static int uct_ib_device_is_iwarp(uct_ib_device_t *dev)
{
return dev->ibv_context->device->transport_type == IBV_TRANSPORT_IWARP;
}

ucs_status_t uct_ib_device_port_check(uct_ib_device_t *dev, uint8_t port_num,
unsigned flags)
{
Expand All @@ -433,6 +438,12 @@ ucs_status_t uct_ib_device_port_check(uct_ib_device_t *dev, uint8_t port_num,
return UCS_ERR_UNREACHABLE;
}

if (uct_ib_device_is_iwarp(dev)) {
/* TODO: enable it when support is ready */
ucs_debug("iWarp device %s is not supported", uct_ib_device_name(dev));
return UCS_ERR_UNSUPPORTED;
}

if (!uct_ib_device_is_port_ib(dev, port_num) && (flags & UCT_IB_DEVICE_FLAG_LINK_IB)) {
ucs_debug("%s:%d is not IB link layer", uct_ib_device_name(dev),
port_num);
Expand Down
6 changes: 6 additions & 0 deletions src/uct/ib/base/ib_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,4 +546,10 @@ struct ibv_pd *uct_ib_iface_qp_pd(uct_ib_iface_t *iface)
return pd;
}

static UCS_F_ALWAYS_INLINE
size_t uct_ib_iface_hdr_size(size_t max_inline, size_t min_size)
{
return (size_t)ucs_max((ssize_t)(max_inline - min_size), 0);
}

#endif
30 changes: 18 additions & 12 deletions src/uct/ib/rc/base/rc_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,17 +214,15 @@ ucs_status_t uct_rc_iface_query(uct_rc_iface_t *iface,
iface_attr->iface_addr_len = 0;
iface_attr->ep_addr_len = sizeof(uct_rc_ep_address_t);
iface_attr->max_conn_priv = 0;
iface_attr->cap.flags = UCT_IFACE_FLAG_AM_SHORT |
UCT_IFACE_FLAG_AM_BCOPY |
UCT_IFACE_FLAG_AM_ZCOPY |
UCT_IFACE_FLAG_PUT_SHORT |
UCT_IFACE_FLAG_PUT_BCOPY |
UCT_IFACE_FLAG_PUT_ZCOPY |
UCT_IFACE_FLAG_GET_BCOPY |
UCT_IFACE_FLAG_GET_ZCOPY |
UCT_IFACE_FLAG_PENDING |
UCT_IFACE_FLAG_CONNECT_TO_EP |
UCT_IFACE_FLAG_CB_SYNC |
iface_attr->cap.flags = UCT_IFACE_FLAG_AM_BCOPY |
UCT_IFACE_FLAG_AM_ZCOPY |
UCT_IFACE_FLAG_PUT_BCOPY |
UCT_IFACE_FLAG_PUT_ZCOPY |
UCT_IFACE_FLAG_GET_BCOPY |
UCT_IFACE_FLAG_GET_ZCOPY |
UCT_IFACE_FLAG_PENDING |
UCT_IFACE_FLAG_CONNECT_TO_EP |
UCT_IFACE_FLAG_CB_SYNC |
UCT_IFACE_FLAG_EVENT_SEND_COMP |
UCT_IFACE_FLAG_EVENT_RECV;

Expand Down Expand Up @@ -278,7 +276,7 @@ ucs_status_t uct_rc_iface_query(uct_rc_iface_t *iface,
iface_attr->cap.get.max_iov = uct_ib_iface_get_max_iov(&iface->super);

/* AM */
iface_attr->cap.am.max_short = max_inline - sizeof(uct_rc_hdr_t);
iface_attr->cap.am.max_short = uct_ib_iface_hdr_size(max_inline, sizeof(uct_rc_hdr_t));
iface_attr->cap.am.max_bcopy = iface->super.config.seg_size - sizeof(uct_rc_hdr_t);
iface_attr->cap.am.min_zcopy = 0;
iface_attr->cap.am.max_zcopy = iface->super.config.seg_size - sizeof(uct_rc_hdr_t);
Expand All @@ -291,6 +289,14 @@ ucs_status_t uct_rc_iface_query(uct_rc_iface_t *iface,
/* Tag Offload */
uct_rc_iface_tag_query(iface, iface_attr, max_inline, tag_max_iov);

if (iface_attr->cap.am.max_short) {
iface_attr->cap.flags |= UCT_IFACE_FLAG_AM_SHORT;
}

if (iface_attr->cap.put.max_short) {
iface_attr->cap.flags |= UCT_IFACE_FLAG_PUT_SHORT;
}

return UCS_OK;
}

Expand Down
1 change: 1 addition & 0 deletions src/uct/ib/rc/verbs/rc_verbs.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ typedef struct uct_rc_verbs_iface {
struct ibv_send_wr inl_am_wr;
struct ibv_send_wr inl_rwrite_wr;
uct_rc_verbs_iface_common_t verbs_common;
uct_rc_iface_send_desc_t *fc_desc; /* used when max_inline is zero */
struct {
unsigned tx_max_wr;
} config;
Expand Down
28 changes: 20 additions & 8 deletions src/uct/ib/rc/verbs/rc_verbs_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,27 +537,39 @@ ucs_status_t uct_rc_verbs_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op,
uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface,
uct_rc_verbs_iface_t);
uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t);
uct_rc_hdr_t *hdr = &iface->verbs_common.am_inl_hdr.rc_hdr;
uct_rc_hdr_t *hdr;
struct ibv_sge sge;
int flags;

if (!iface->fc_desc) {
hdr = &iface->verbs_common.am_inl_hdr.rc_hdr;
flags = IBV_SEND_INLINE;
hdr->am_id = UCT_RC_EP_FC_PURE_GRANT;
fc_wr.sg_list = iface->verbs_common.inl_sge;
iface->verbs_common.inl_sge[0].addr = (uintptr_t)hdr;
iface->verbs_common.inl_sge[0].length = sizeof(*hdr);
} else {
hdr = (uct_rc_hdr_t*)(iface->fc_desc + 1);
sge.addr = (uintptr_t)hdr;
sge.length = sizeof(*hdr);
sge.lkey = iface->fc_desc->lkey;
fc_wr.sg_list = &sge;
flags = 0;
}

/* In RC only PURE grant is sent as a separate message. Other FC
* messages are bundled with AM. */
ucs_assert(op == UCT_RC_EP_FC_PURE_GRANT);

/* Do not check FC WND here to avoid head-to-head deadlock.
* Credits grant should be sent regardless of FC wnd state. */
ucs_assert(sizeof(*hdr) <= iface->verbs_common.config.max_inline);
UCT_RC_CHECK_RES(&iface->super, &ep->super);

hdr->am_id = UCT_RC_EP_FC_PURE_GRANT;
fc_wr.sg_list = iface->verbs_common.inl_sge;
fc_wr.opcode = IBV_WR_SEND;
fc_wr.next = NULL;
fc_wr.num_sge = 1;

iface->verbs_common.inl_sge[0].addr = (uintptr_t)hdr;
iface->verbs_common.inl_sge[0].length = sizeof(*hdr);

uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, IBV_SEND_INLINE, INT_MAX);
uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, flags, INT_MAX);
return UCS_OK;
}

Expand Down
12 changes: 12 additions & 0 deletions src/uct/ib/rc/verbs/rc_verbs_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_md_h md, uct_worker_h worke
uct_ib_iface_init_attr_t init_attr = {};
struct ibv_qp_cap cap;
struct ibv_qp *qp;
uct_rc_hdr_t *hdr;

init_attr.res_domain_key = UCT_IB_IFACE_NULL_RES_DOMAIN_KEY;
init_attr.tm_cap_bit = IBV_EXP_TM_CAP_RC;
Expand Down Expand Up @@ -270,6 +271,14 @@ static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_md_h md, uct_worker_h worke
self->verbs_common.config.max_inline = cap.max_inline_data;
uct_ib_iface_set_max_iov(&self->super.super, cap.max_send_sge);

if (self->verbs_common.config.max_inline < sizeof(*hdr)) {
self->fc_desc = ucs_mpool_get(&self->verbs_common.short_desc_mp);
ucs_assert_always(self->fc_desc != NULL);
hdr = (uct_rc_hdr_t*)(self->fc_desc + 1);
hdr->am_id = UCT_RC_EP_FC_PURE_GRANT;
} else {
self->fc_desc = NULL;
}

return UCS_OK;

Expand All @@ -283,6 +292,9 @@ static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_md_h md, uct_worker_h worke

static UCS_CLASS_CLEANUP_FUNC(uct_rc_verbs_iface_t)
{
if (self->fc_desc != NULL) {
ucs_mpool_put(self->fc_desc);
}
uct_base_iface_progress_disable(&self->super.super.super.super,
UCT_PROGRESS_SEND | UCT_PROGRESS_RECV);
uct_rc_verbs_iface_common_cleanup(&self->verbs_common);
Expand Down
6 changes: 6 additions & 0 deletions src/uct/ib/rdmacm/rdmacm_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ static int uct_rdmacm_is_addr_route_resolved(struct rdma_cm_id *cm_id,
return 0;
}

if (cm_id->verbs->device->transport_type == IBV_TRANSPORT_IWARP) {
ucs_debug("%s: iWarp support is not implemented",
ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN));
return 0;
}

if (rdma_resolve_route(cm_id, timeout_ms)) {
ucs_debug("rdma_resolve_route(addr = %s) failed: %m",
ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN));
Expand Down
14 changes: 12 additions & 2 deletions src/uct/ib/ud/base/ud_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1003,7 +1003,12 @@ static void uct_ud_ep_do_pending_ctl(uct_ud_ep_t *ep, uct_ud_iface_t *iface)
skb = uct_ud_ep_resend(ep);
} else if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_ACK)) {
if (uct_ud_ep_is_connected(ep)) {
skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super);
if (iface->config.max_inline >= sizeof(uct_ud_neth_t)) {
skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super);
} else {
skb = uct_ud_iface_resend_skb_get(iface);
skb->len = sizeof(uct_ud_neth_t);
}
uct_ud_neth_ctl_ack(ep, skb->neth);
} else {
/* Do not send ACKs if not connected yet. It may happen if
Expand All @@ -1013,7 +1018,12 @@ static void uct_ud_ep_do_pending_ctl(uct_ud_ep_t *ep, uct_ud_iface_t *iface)
}
uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK);
} else if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_ACK_REQ)) {
skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super);
if (iface->config.max_inline >= sizeof(uct_ud_neth_t)) {
skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super);
} else {
skb = uct_ud_iface_resend_skb_get(iface);
skb->len = sizeof(uct_ud_neth_t);
}
uct_ud_neth_ctl_ack_req(ep, skb->neth);
uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK_REQ);
} else if (uct_ud_ep_ctl_op_isany(ep)) {
Expand Down
21 changes: 13 additions & 8 deletions src/uct/ib/ud/base/ud_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ uct_ud_iface_create_qp(uct_ud_iface_t *self, const uct_ud_iface_config_t *config
}

self->config.max_inline = qp_init_attr.cap.max_inline_data;
ucs_assert_always(qp_init_attr.cap.max_inline_data >= UCT_UD_MIN_INLINE);
uct_ib_iface_set_max_iov(&self->super, qp_init_attr.cap.max_send_sge);

memset(&qp_attr, 0, sizeof(qp_attr));
Expand Down Expand Up @@ -474,7 +473,9 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
goto err_rx_mpool;
}

self->tx.skb = NULL;
ucs_assert_always(data_size >= UCT_UD_MIN_INLINE);

self->tx.skb = NULL;
self->tx.skb_inl.super.len = sizeof(uct_ud_neth_t);

ucs_queue_head_init(&self->tx.resend_skbs);
Expand Down Expand Up @@ -564,8 +565,7 @@ ucs_status_t uct_ud_iface_query(uct_ud_iface_t *iface, uct_iface_attr_t *iface_a
return status;
}

iface_attr->cap.flags = UCT_IFACE_FLAG_AM_SHORT |
UCT_IFACE_FLAG_AM_BCOPY |
iface_attr->cap.flags = UCT_IFACE_FLAG_AM_BCOPY |
UCT_IFACE_FLAG_AM_ZCOPY |
UCT_IFACE_FLAG_CONNECT_TO_EP |
UCT_IFACE_FLAG_CONNECT_TO_IFACE |
Expand All @@ -576,18 +576,19 @@ ucs_status_t uct_ud_iface_query(uct_ud_iface_t *iface, uct_iface_attr_t *iface_a
UCT_IFACE_FLAG_EVENT_RECV |
UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE;

iface_attr->cap.am.max_short = iface->config.max_inline - sizeof(uct_ud_neth_t);
iface_attr->cap.am.max_short = uct_ib_iface_hdr_size(iface->config.max_inline,
sizeof(uct_ud_neth_t));
iface_attr->cap.am.max_bcopy = iface->super.config.seg_size - sizeof(uct_ud_neth_t);
iface_attr->cap.am.min_zcopy = 0;
iface_attr->cap.am.max_zcopy = iface->super.config.seg_size - sizeof(uct_ud_neth_t);
iface_attr->cap.am.align_mtu = uct_ib_mtu_value(uct_ib_iface_port_attr(&iface->super)->active_mtu);
iface_attr->cap.am.opt_zcopy_align = UCS_SYS_PCI_MAX_PAYLOAD;
iface_attr->cap.am.max_hdr = iface->config.max_inline - sizeof(uct_ud_neth_t);
/* The first iov is reserved for the header */
iface_attr->cap.am.max_iov = uct_ib_iface_get_max_iov(&iface->super) - 1;

iface_attr->cap.put.max_short = iface->config.max_inline -
sizeof(uct_ud_neth_t) - sizeof(uct_ud_put_hdr_t);
iface_attr->cap.put.max_short = uct_ib_iface_hdr_size(iface->config.max_inline,
sizeof(uct_ud_neth_t) +
sizeof(uct_ud_put_hdr_t));

iface_attr->iface_addr_len = sizeof(uct_ud_iface_addr_t);
iface_attr->ep_addr_len = sizeof(uct_ud_ep_addr_t);
Expand All @@ -596,6 +597,10 @@ ucs_status_t uct_ud_iface_query(uct_ud_iface_t *iface, uct_iface_attr_t *iface_a
/* UD lacks of scatter to CQE support */
iface_attr->latency.overhead += 10e-9;

if (iface_attr->cap.am.max_short) {
iface_attr->cap.flags |= UCT_IFACE_FLAG_AM_SHORT;
}

return UCS_OK;
}

Expand Down
10 changes: 7 additions & 3 deletions src/uct/ib/ud/verbs/ud_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,9 @@ uct_ud_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header,

UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super) - 1,
"uct_ud_verbs_ep_am_zcopy");
UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + header_length,
0, iface->super.config.max_inline, "am_zcopy header");

UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(uct_ud_zcopy_desc_t) + header_length,
0, iface->super.super.config.seg_size, "am_zcopy header");

UCT_UD_CHECK_ZCOPY_LENGTH(&iface->super, header_length,
uct_iov_total_length(iov, iovcnt));
Expand Down Expand Up @@ -402,7 +403,10 @@ uct_ud_verbs_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr)
return status;
}

iface_attr->overhead = 105e-9; /* Software overhead */
iface_attr->overhead = 105e-9; /* Software overhead */
iface_attr->cap.am.max_hdr = uct_ib_iface_hdr_size(iface->super.config.seg_size,
sizeof(uct_ud_neth_t) +
sizeof(uct_ud_zcopy_desc_t));

return UCS_OK;
}
Expand Down
19 changes: 17 additions & 2 deletions test/gtest/uct/ib/test_rc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ class test_rc_max_wr : public test_rc {
/* Check that max_wr stops from sending */
UCS_TEST_P(test_rc_max_wr, send_limit)
{
check_caps(UCT_IFACE_FLAG_AM_SHORT);

/* first 32 messages should be OK */
send_am_messages(m_e1, 32, UCS_OK);

Expand Down Expand Up @@ -162,6 +164,8 @@ void test_rc_flow_control::validate_grant(entity *e)
void test_rc_flow_control::test_general(int wnd, int soft_thresh,
int hard_thresh, bool is_fc_enabled)
{
check_caps(UCT_IFACE_FLAG_AM_SHORT);

set_fc_attributes(m_e1, is_fc_enabled, wnd, soft_thresh, hard_thresh);

send_am_messages(m_e1, wnd, UCS_OK);
Expand All @@ -179,6 +183,8 @@ void test_rc_flow_control::test_general(int wnd, int soft_thresh,

void test_rc_flow_control::test_pending_grant(int wnd)
{
check_caps(UCT_IFACE_FLAG_AM_SHORT);

/* Block send capabilities of m_e2 for fc grant to be
* added to the pending queue. */
disable_entity(m_e2);
Expand All @@ -204,6 +210,8 @@ void test_rc_flow_control::test_pending_grant(int wnd)

void test_rc_flow_control::test_flush_fc_disabled()
{
check_caps(UCT_IFACE_FLAG_AM_SHORT);

set_fc_disabled(m_e1);
ucs_status_t status;

Expand All @@ -214,8 +222,7 @@ void test_rc_flow_control::test_flush_fc_disabled()

/* send active message should be OK */
get_fc_ptr(m_e1)->fc_wnd = 1;
status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0);
EXPECT_EQ(UCS_OK, status);
send_am_message(m_e1, 1, UCS_OK);
EXPECT_EQ(0, get_fc_ptr(m_e1)->fc_wnd);

/* flush must have resources */
Expand All @@ -227,6 +234,8 @@ void test_rc_flow_control::test_pending_purge(int wnd, int num_pend_sends)
{
pending_send_request_t reqs[num_pend_sends];

check_caps(UCT_IFACE_FLAG_AM_SHORT);

disable_entity(m_e2);
set_fc_attributes(m_e1, true, wnd, wnd, 1);

Expand Down Expand Up @@ -264,6 +273,8 @@ UCS_TEST_P(test_rc_flow_control, pending_only_fc)
{
int wnd = 2;

check_caps(UCT_IFACE_FLAG_AM_SHORT);

disable_entity(m_e2);
set_fc_attributes(m_e1, true, wnd, wnd, 1);

Expand Down Expand Up @@ -300,6 +311,8 @@ void test_rc_flow_control_stats::test_general(int wnd, int soft_thresh,
{
uint64_t v;

check_caps(UCT_IFACE_FLAG_AM_SHORT);

set_fc_attributes(m_e1, true, wnd, soft_thresh, hard_thresh);

send_am_messages(m_e1, wnd, UCS_OK);
Expand Down Expand Up @@ -332,6 +345,8 @@ UCS_TEST_P(test_rc_flow_control_stats, soft_request)
int s_thresh = 4;
int h_thresh = 1;

check_caps(UCT_IFACE_FLAG_AM_SHORT);

set_fc_attributes(m_e1, true, wnd, s_thresh, h_thresh);
send_am_and_flush(m_e1, wnd - (s_thresh - 1));

Expand Down
2 changes: 1 addition & 1 deletion test/gtest/uct/ib/test_rc.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class test_rc : public uct_test {
void send_am_messages(entity *e, int wnd, ucs_status_t expected,
uint8_t am_id = 0, int ep_idx = 0) {
for (int i = 0; i < wnd; i++) {
EXPECT_EQ(expected, uct_ep_am_short(e->ep(ep_idx), am_id, 0, NULL, 0));
EXPECT_EQ(expected, send_am_message(e, wnd, am_id, ep_idx));
}
}

Expand Down
Loading