Skip to content

Commit

Permalink
UCT/IB/BASE: use random roce path factor to achieve high reliability.
Browse files Browse the repository at this point in the history
  • Loading branch information
binl committed May 12, 2021
1 parent 311cdd0 commit e717c86
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/uct/ib/base/ib_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#define UCT_IB_SITE_LOCAL_MASK be64toh(0xffffffffffff0000ul) /* IBTA 4.1.1 12b */
#define UCT_IB_DEFAULT_ROCEV2_DSCP 106 /* Default DSCP for RoCE v2 */
#define UCT_IB_ROCE_UDP_SRC_PORT_BASE 0xC000
#define UCT_IB_ROCE_MAX_PATH_FACTOR 0x400
#define UCT_IB_DEVICE_SYSFS_PFX "/sys/class/infiniband/%s"
#define UCT_IB_DEVICE_SYSFS_FMT UCT_IB_DEVICE_SYSFS_PFX "/device/%s"
#define UCT_IB_DEVICE_SYSFS_GID_ATTR_PFX UCT_IB_DEVICE_SYSFS_PFX "/ports/%d/gid_attrs"
Expand Down
26 changes: 25 additions & 1 deletion src/uct/ib/base/ib_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@ ucs_config_field_t uct_ib_iface_config_table[] = {
"path for the same pair of endpoints.",
ucs_offsetof(uct_ib_iface_config_t, roce_path_factor), UCS_CONFIG_TYPE_UINT},

{"ROCE_RANDOM_PATH", "n",
"Enable/Disable random RoCE path generation.",
ucs_offsetof(uct_ib_iface_config_t, roce_random_path), UCS_CONFIG_TYPE_BOOL},

{"LID_PATH_BITS", "0",
"List of IB Path bits separated by comma (a,b,c) "
"which will be the low portion of the LID, according to the LMC in the fabric.",
Expand Down Expand Up @@ -529,7 +533,17 @@ void uct_ib_iface_fill_ah_attr_from_gid_lid(uct_ib_iface_t *iface, uint16_t lid,
ah_attr->grh.traffic_class = iface->config.traffic_class;

if (uct_ib_iface_is_roce(iface)) {
udp_sport = iface->config.roce_path_factor * path_index;
if (iface->config.roce_random_path) {
if (path_index == 0) {
rand_r(&iface->rand_value);
}
udp_sport = (iface->rand_value %
(UCT_IB_ROCE_MAX_PATH_FACTOR + 1 -
iface->config.roce_path_factor)) +
iface->config.roce_path_factor * path_index;
} else {
udp_sport = iface->config.roce_path_factor * path_index;
}
/* older drivers use dlid for udp.sport, new drivers use flow_label when
its nonzero */
ah_attr->dlid = UCT_IB_ROCE_UDP_SRC_PORT_BASE | udp_sport;
Expand Down Expand Up @@ -1003,6 +1017,11 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
uint8_t port_num;
size_t inl;

if (config->roce_path_factor > UCT_IB_ROCE_MAX_PATH_FACTOR) {
status = UCS_ERR_INVALID_PARAM;
goto err;
}

if (!(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE)) {
return UCS_ERR_UNSUPPORTED;
}
Expand Down Expand Up @@ -1041,6 +1060,7 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
self->config.rx_headroom_offset = self->config.rx_payload_offset -
rx_headroom;
self->config.seg_size = init_attr->seg_size;
self->config.roce_random_path = config->roce_random_path;
self->config.roce_path_factor = config->roce_path_factor;
self->config.tx_max_poll = config->tx.max_poll;
self->config.rx_max_poll = config->rx.max_poll;
Expand All @@ -1053,6 +1073,10 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
self->config.enable_res_domain = config->enable_res_domain;
self->config.qp_type = init_attr->qp_type;

if (config->roce_random_path) {
self->rand_value = ucs_generate_uuid(0);
}

if (ucs_derived_of(worker, uct_priv_worker_t)->thread_mode == UCS_THREAD_MODE_MULTI) {
ucs_error("IB transports do not support multi-threaded worker");
return UCS_ERR_INVALID_PARAM;
Expand Down
5 changes: 5 additions & 0 deletions src/uct/ib/base/ib_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ struct uct_ib_iface_config {
/* Multiplier for RoCE LAG UDP source port calculation */
unsigned roce_path_factor;

/* Enable random path factor for RoCE LAG UDP source port calculation */
int roce_random_path;

/* Ranges of path bits */
UCS_CONFIG_ARRAY_FIELD(ucs_range_spec_t, ranges) lid_path_bits;

Expand Down Expand Up @@ -195,6 +198,7 @@ struct uct_ib_iface {
uint16_t pkey_value;
uint8_t addr_size;
uct_ib_device_gid_info_t gid_info;
unsigned rand_value;

struct {
unsigned rx_payload_offset; /* offset from desc to payload */
Expand All @@ -205,6 +209,7 @@ struct uct_ib_iface {
unsigned tx_max_poll;
unsigned seg_size;
unsigned roce_path_factor;
uint8_t roce_random_path;
uint8_t max_inl_resp;
uint8_t port_num;
uint8_t sl;
Expand Down

0 comments on commit e717c86

Please sign in to comment.