Skip to content

Commit

Permalink
UCT/IB: Fixes for SL selection
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitrygx committed Dec 16, 2020
1 parent 1aa7505 commit 7c81ab1
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 39 deletions.
14 changes: 9 additions & 5 deletions src/uct/ib/base/ib_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ void uct_ib_iface_fill_ah_attr_from_gid_lid(uct_ib_iface_t *iface, uint16_t lid,

memset(ah_attr, 0, sizeof(*ah_attr));

ucs_assert(iface->config.sl != UCT_IB_SL_INVALID);
ucs_assert(iface->config.sl < UCT_IB_SL_NUM);

ah_attr->sl = iface->config.sl;
ah_attr->port_num = iface->config.port_num;
Expand Down Expand Up @@ -1179,9 +1179,12 @@ static void uct_ib_iface_set_path_mtu(uct_ib_iface_t *iface,

uint8_t uct_ib_iface_config_select_sl(const uct_ib_iface_config_t *ib_config)
{
ucs_assert((ib_config->sl <= UCT_IB_SL_MAX) ||
(ib_config->sl == UCS_ULUNITS_AUTO));
return (ib_config->sl == UCS_ULUNITS_AUTO) ? 0 : (uint8_t)ib_config->sl;
if (ib_config->sl == UCS_ULUNITS_AUTO) {
return 0;
}

ucs_assert(ib_config->sl < UCT_IB_SL_NUM);
return (uint8_t)ib_config->sl;
}

UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
Expand Down Expand Up @@ -1244,7 +1247,8 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
self->config.rx_max_batch = ucs_min(config->rx.max_batch,
config->rx.queue_len / 4);
self->config.port_num = port_num;
self->config.sl = UCT_IB_SL_INVALID;
/* initialize to invalid value */
self->config.sl = UCT_IB_SL_NUM;
self->config.hop_limit = config->hop_limit;
self->release_desc.cb = uct_ib_iface_release_desc;
self->config.enable_res_domain = config->enable_res_domain;
Expand Down
3 changes: 1 addition & 2 deletions src/uct/ib/base/ib_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
#define UCT_IB_ADDRESS_INVALID_PATH_MTU ((enum ibv_mtu)0)
#define UCT_IB_ADDRESS_INVALID_PKEY 0
#define UCT_IB_ADDRESS_DEFAULT_PKEY 0xffff
#define UCT_IB_SL_MAX 15
#define UCT_IB_SL_INVALID (UCT_IB_SL_MAX + 1)
#define UCT_IB_SL_NUM 16

/* Forward declarations */
typedef struct uct_ib_iface_config uct_ib_iface_config_t;
Expand Down
24 changes: 19 additions & 5 deletions src/uct/ib/mlx5/ib_mlx5.c
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,7 @@ uct_ib_mlx5_select_sl(const uct_ib_iface_config_t *ib_config,

/* which SLs are allowed by user config */
sl_allow_mask = (ib_config->sl == UCS_ULUNITS_AUTO) ?
UCS_MASK(UCT_IB_SL_MAX) : UCS_BIT(ib_config->sl);
UCS_MASK(UCT_IB_SL_NUM) : UCS_BIT(ib_config->sl);

if (have_sl_mask_cap) {
sls_with_ar = sl_allow_mask & hw_sl_mask;
Expand Down Expand Up @@ -814,8 +814,19 @@ uct_ib_mlx5_iface_select_sl(uct_ib_iface_t *iface,
#endif
uint16_t ooo_sl_mask = 0;
ucs_status_t status;
ucs_ternary_auto_value_t ar_enable;
int have_ooo_sl_mask;

ucs_assert(iface->config.sl == UCT_IB_SL_INVALID);
ucs_assert(iface->config.sl == UCT_IB_SL_NUM);

if (uct_ib_device_is_port_roce(uct_ib_iface_device(iface),
iface->config.port_num)) {
/* Ethernet priority for RoCE devices can't be selected regardless
* AR support requested by user, pass empty ooo_sl_mask */
ar_enable = UCS_NO;
have_ooo_sl_mask = 1;
goto select_sl;
}

#if HAVE_DEVX
status = uct_ib_mlx5_devx_query_ooo_sl_mask(md, iface->config.port_num,
Expand All @@ -827,8 +838,11 @@ uct_ib_mlx5_iface_select_sl(uct_ib_iface_t *iface,
status = UCS_ERR_UNSUPPORTED;
#endif

return uct_ib_mlx5_select_sl(ib_config, ib_mlx5_config->ar_enable,
ooo_sl_mask, status == UCS_OK,
UCT_IB_IFACE_ARG(iface),
ar_enable = ib_mlx5_config->ar_enable;
have_ooo_sl_mask = status == UCS_OK;

select_sl:
return uct_ib_mlx5_select_sl(ib_config, ar_enable, ooo_sl_mask,
have_ooo_sl_mask, UCT_IB_IFACE_ARG(iface),
&iface->config.sl);
}
105 changes: 78 additions & 27 deletions test/gtest/uct/ib/test_ib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,39 @@ class test_uct_ib_sl_utils : public test_uct_ib_utils {
return status;
}

static ucs_log_func_rc_t
wrap_errors_check_sl_masks_logger(const char *file, unsigned line,
const char *function,
ucs_log_level_t level,
const ucs_log_component_config_t *
comp_conf,
const char *message, va_list ap)
{
if (level == UCS_LOG_LEVEL_ERROR) {
std::string err_str = format_message(message, ap);

for (uint8_t sl = 0; sl < UCT_IB_SL_NUM; ++sl) {
std::string sl_val = ucs::to_string(static_cast<uint16_t>(sl));

if ((err_str.find(sl_val + ", ") == std::string::npos) &&
(err_str.find(sl_val + " }") == std::string::npos)) {
return UCS_LOG_FUNC_RC_CONTINUE;
}
}

return UCS_LOG_FUNC_RC_STOP;
}

return UCS_LOG_FUNC_RC_CONTINUE;
}

ucs_status_t select_sl_nok(ucs_ternary_auto_value_t ar_enable,
uint64_t ooo_sl_mask,
unsigned long config_sl, uint64_t ooo_sl_mask,
const uct_ib_iface_config_t &config) const {
scoped_log_handler slh(wrap_errors_logger);
scoped_log_handler slh(((ooo_sl_mask != m_ooo_sl_mask_not_detected) &&
(config_sl == UCS_ULUNITS_AUTO)) ?
wrap_errors_check_sl_masks_logger :
wrap_errors_logger);
uint8_t sl;

EXPECT_NE(UCS_AUTO, ar_enable);
Expand All @@ -540,7 +569,7 @@ class test_uct_ib_sl_utils : public test_uct_ib_utils {
if (exp_status == UCS_OK) {
status = select_sl_ok(ar_enable, config_sl, ooo_sl_mask, config);
} else {
status = select_sl_nok(ar_enable, ooo_sl_mask, config);
status = select_sl_nok(ar_enable, config_sl, ooo_sl_mask, config);
}
EXPECT_EQ(exp_status, status);
}
Expand Down Expand Up @@ -574,30 +603,52 @@ UCS_TEST_F(test_uct_ib_sl_utils, sl_selection) {
/* select the default SL, with OOO SL mask { 4 } */
select_sl(ar_enable, UCS_ULUNITS_AUTO, UCS_BIT(4), UCS_OK);

/* select SL=8, with empty OOO SL mask */
select_sl(ar_enable, 8, 0,
(ar_enable == UCS_YES) ? err_status : UCS_OK);

/* select SL=8, without OOO SL mask (not detected) */
select_sl(ar_enable, 8, m_ooo_sl_mask_not_detected,
((ar_enable != UCS_TRY) && (ar_enable != UCS_AUTO)) ?
err_status : UCS_OK);

/* select SL=8, with OOO SL mask { 8 } */
select_sl(ar_enable, 8, UCS_BIT(8),
(ar_enable == UCS_NO) ? err_status : UCS_OK);

/* select SL=8, with OOO SL mask { 0 } */
select_sl(ar_enable, 8, UCS_BIT(0),
(ar_enable == UCS_YES) ? err_status : UCS_OK);

/* select SL=8, with OOO SL mask { 0, 4, 8 } */
select_sl(ar_enable, 8, UCS_BIT(0) | UCS_BIT(4) | UCS_BIT(8),
(ar_enable == UCS_NO) ? err_status : UCS_OK);

/* select SL=8, with OOO SL mask { 0, 8, 11 } */
select_sl(ar_enable, 8, UCS_BIT(0) | UCS_BIT(4) | UCS_BIT(11),
(ar_enable == UCS_YES) ? err_status : UCS_OK);
for (uint8_t sl = 0; sl < UCT_IB_SL_NUM; ++sl) {
/* select SL=<sl>, with empty OOO SL mask */
select_sl(ar_enable, sl, 0,
(ar_enable == UCS_YES) ? err_status : UCS_OK);

/* select SL=<sl>, without OOO SL mask (not detected) */
select_sl(ar_enable, sl, m_ooo_sl_mask_not_detected,
((ar_enable != UCS_TRY) && (ar_enable != UCS_AUTO)) ?
err_status : UCS_OK);

/* select SL=<sl>, with OOO SL mask { sl } */
select_sl(ar_enable, sl, UCS_BIT(sl),
(ar_enable == UCS_NO) ? err_status : UCS_OK);

/* select SL=<sl>, with OOO SL mask { UCT_IB_SL_NUM - 1 - sl },
* i.e. OOO SL mask that doesn't contain <sl> */
select_sl(ar_enable, sl, UCS_BIT(UCT_IB_SL_NUM - 1 - sl),
(ar_enable == UCS_YES) ? err_status : UCS_OK);

/* select SL=<sl>, with OOO SL mask { (sl - 1) % UCT_IB_SL_NUM,
sl,
(sl + 1) % UCT_IB_SL_NUM } */
select_sl(ar_enable, sl,
UCS_BIT((sl - 1) % UCT_IB_SL_NUM) |
UCS_BIT(sl) |
UCS_BIT((sl + 1) % UCT_IB_SL_NUM),
(ar_enable == UCS_NO) ? err_status : UCS_OK);

/* select SL=<sl>, with OOO SL mask { (sl - 1) % UCT_IB_SL_NUM,
UCT_IB_SL_NUM - 1 - sl,
(sl + 1) % UCT_IB_SL_NUM },
* i.e. OOO SL mask that doesn't contain <sl> */
select_sl(ar_enable, sl,
UCS_BIT((sl - 1) % UCT_IB_SL_NUM) |
UCS_BIT(UCT_IB_SL_NUM - 1 - sl) |
UCS_BIT((sl + 1) % UCT_IB_SL_NUM),
(ar_enable == UCS_YES) ? err_status : UCS_OK);

/* select SL=<sl>, with full OOO SL mask */
select_sl(ar_enable, sl, UCS_MASK(UCT_IB_SL_NUM),
(ar_enable == UCS_NO) ? err_status : UCS_OK);

/* select SL=<sl>, with full OOO SL mask, except <sl> */
select_sl(ar_enable, sl, UCS_MASK(UCT_IB_SL_NUM) & ~UCS_BIT(sl),
(ar_enable == UCS_YES) ? err_status : UCS_OK);
}
}
}

Expand Down

0 comments on commit 7c81ab1

Please sign in to comment.