From 2546953bea3ee00b6e05ce7d6cd1530ea6b64b6d Mon Sep 17 00:00:00 2001 From: binl Date: Mon, 7 Dec 2020 15:27:21 +0200 Subject: [PATCH] UCT/IB: get roce ndev name according to right gid but not fixed gid 0 --- src/uct/ib/base/ib_device.c | 11 ++++++----- src/uct/ib/base/ib_device.h | 3 ++- src/uct/ib/base/ib_iface.c | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/uct/ib/base/ib_device.c b/src/uct/ib/base/ib_device.c index 33528e80341..3a6483e3b1d 100644 --- a/src/uct/ib/base/ib_device.c +++ b/src/uct/ib/base/ib_device.c @@ -1329,7 +1329,7 @@ int uct_ib_get_cqe_size(int cqe_size_min) static ucs_status_t uct_ib_device_get_roce_ndev_name(uct_ib_device_t *dev, uint8_t port_num, - char *ndev_name, size_t max) + uint8_t gid_index, char *ndev_name, size_t max) { ssize_t nread; @@ -1338,7 +1338,7 @@ uct_ib_device_get_roce_ndev_name(uct_ib_device_t *dev, uint8_t port_num, /* get the network device name which corresponds to a RoCE port */ nread = ucs_read_file_str(ndev_name, max, 1, UCT_IB_DEVICE_SYSFS_GID_NDEV_FMT, - uct_ib_device_name(dev), port_num, 0); + uct_ib_device_name(dev), port_num, gid_index); if (nread < 0) { ucs_diag("failed to read " UCT_IB_DEVICE_SYSFS_GID_NDEV_FMT": %m", uct_ib_device_name(dev), port_num, 0); @@ -1349,14 +1349,15 @@ uct_ib_device_get_roce_ndev_name(uct_ib_device_t *dev, uint8_t port_num, return UCS_OK; } -unsigned uct_ib_device_get_roce_lag_level(uct_ib_device_t *dev, uint8_t port_num) +unsigned uct_ib_device_get_roce_lag_level(uct_ib_device_t *dev, uint8_t port_num, + uint8_t gid_index) { char ndev_name[IFNAMSIZ]; unsigned roce_lag_level; ucs_status_t status; - status = uct_ib_device_get_roce_ndev_name(dev, port_num, ndev_name, - sizeof(ndev_name)); + status = uct_ib_device_get_roce_ndev_name(dev, port_num, gid_index, + ndev_name, sizeof(ndev_name)); if (status != UCS_OK) { return 1; } diff --git a/src/uct/ib/base/ib_device.h b/src/uct/ib/base/ib_device.h index d11b358cb5e..0f2c25f7f88 100644 --- a/src/uct/ib/base/ib_device.h +++ b/src/uct/ib/base/ib_device.h @@ -367,7 +367,8 @@ ucs_status_t uct_ib_device_create_ah_cached(uct_ib_device_t *dev, void uct_ib_device_cleanup_ah_cached(uct_ib_device_t *dev); unsigned uct_ib_device_get_roce_lag_level(uct_ib_device_t *dev, - uint8_t port_num); + uint8_t port_num, + uint8_t gid_index); static inline struct ibv_port_attr* diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 11a29b50c2d..f998a70184a 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -1085,7 +1085,8 @@ static void uct_ib_iface_set_num_paths(uct_ib_iface_t *iface, if (uct_ib_iface_is_roce(iface)) { /* RoCE - number of paths is RoCE LAG level */ iface->num_paths = - uct_ib_device_get_roce_lag_level(dev, iface->config.port_num); + uct_ib_device_get_roce_lag_level(dev, iface->config.port_num, + iface->gid_info.gid_index); } else { /* IB - number of paths is LMC level */ ucs_assert(iface->path_bits_count > 0);