Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UCT/IB/BASE: use random roce path factor to achieve high reliability. #127

Merged
merged 1 commit into from
May 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/uct/ib/base/ib_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#define UCT_IB_SITE_LOCAL_MASK be64toh(0xffffffffffff0000ul) /* IBTA 4.1.1 12b */
#define UCT_IB_DEFAULT_ROCEV2_DSCP 106 /* Default DSCP for RoCE v2 */
#define UCT_IB_ROCE_UDP_SRC_PORT_BASE 0xC000
#define UCT_IB_ROCE_MAX_PATH_FACTOR 0x400
#define UCT_IB_DEVICE_SYSFS_PFX "/sys/class/infiniband/%s"
#define UCT_IB_DEVICE_SYSFS_FMT UCT_IB_DEVICE_SYSFS_PFX "/device/%s"
#define UCT_IB_DEVICE_SYSFS_GID_ATTR_PFX UCT_IB_DEVICE_SYSFS_PFX "/ports/%d/gid_attrs"
Expand Down
26 changes: 25 additions & 1 deletion src/uct/ib/base/ib_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@ ucs_config_field_t uct_ib_iface_config_table[] = {
"path for the same pair of endpoints.",
ucs_offsetof(uct_ib_iface_config_t, roce_path_factor), UCS_CONFIG_TYPE_UINT},

{"ROCE_RANDOM_PATH", "n",
"Enable/Disable random RoCE path generation.",
ucs_offsetof(uct_ib_iface_config_t, roce_random_path), UCS_CONFIG_TYPE_BOOL},

{"LID_PATH_BITS", "0",
"List of IB Path bits separated by comma (a,b,c) "
"which will be the low portion of the LID, according to the LMC in the fabric.",
Expand Down Expand Up @@ -529,7 +533,17 @@ void uct_ib_iface_fill_ah_attr_from_gid_lid(uct_ib_iface_t *iface, uint16_t lid,
ah_attr->grh.traffic_class = iface->config.traffic_class;

if (uct_ib_iface_is_roce(iface)) {
udp_sport = iface->config.roce_path_factor * path_index;
if (iface->config.roce_random_path) {
if (path_index == 0) {
rand_r(&iface->rand_value);
}
udp_sport = (iface->rand_value %
(UCT_IB_ROCE_MAX_PATH_FACTOR + 1 -
iface->config.roce_path_factor)) +
iface->config.roce_path_factor * path_index;
} else {
udp_sport = iface->config.roce_path_factor * path_index;
}
/* older drivers use dlid for udp.sport, new drivers use flow_label when
its nonzero */
ah_attr->dlid = UCT_IB_ROCE_UDP_SRC_PORT_BASE | udp_sport;
Expand Down Expand Up @@ -1003,6 +1017,11 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
uint8_t port_num;
size_t inl;

if (config->roce_path_factor > UCT_IB_ROCE_MAX_PATH_FACTOR) {
status = UCS_ERR_INVALID_PARAM;
goto err;
}

if (!(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE)) {
return UCS_ERR_UNSUPPORTED;
}
Expand Down Expand Up @@ -1041,6 +1060,7 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
self->config.rx_headroom_offset = self->config.rx_payload_offset -
rx_headroom;
self->config.seg_size = init_attr->seg_size;
self->config.roce_random_path = config->roce_random_path;
self->config.roce_path_factor = config->roce_path_factor;
self->config.tx_max_poll = config->tx.max_poll;
self->config.rx_max_poll = config->rx.max_poll;
Expand All @@ -1053,6 +1073,10 @@ UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
self->config.enable_res_domain = config->enable_res_domain;
self->config.qp_type = init_attr->qp_type;

if (config->roce_random_path) {
self->rand_value = ucs_generate_uuid(0);
}

if (ucs_derived_of(worker, uct_priv_worker_t)->thread_mode == UCS_THREAD_MODE_MULTI) {
ucs_error("IB transports do not support multi-threaded worker");
return UCS_ERR_INVALID_PARAM;
Expand Down
5 changes: 5 additions & 0 deletions src/uct/ib/base/ib_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ struct uct_ib_iface_config {
/* Multiplier for RoCE LAG UDP source port calculation */
unsigned roce_path_factor;

/* Enable random path factor for RoCE LAG UDP source port calculation */
int roce_random_path;

/* Ranges of path bits */
UCS_CONFIG_ARRAY_FIELD(ucs_range_spec_t, ranges) lid_path_bits;

Expand Down Expand Up @@ -195,6 +198,7 @@ struct uct_ib_iface {
uint16_t pkey_value;
uint8_t addr_size;
uct_ib_device_gid_info_t gid_info;
unsigned rand_value;

struct {
unsigned rx_payload_offset; /* offset from desc to payload */
Expand All @@ -205,6 +209,7 @@ struct uct_ib_iface {
unsigned tx_max_poll;
unsigned seg_size;
unsigned roce_path_factor;
uint8_t roce_random_path;
uint8_t max_inl_resp;
uint8_t port_num;
uint8_t sl;
Expand Down