From 412cfc61be80ac201fe773fe68edb669770528c7 Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Sun, 23 Jun 2019 18:50:19 +0300 Subject: [PATCH 01/10] UCM/MMAP: Fix race condition updating fired_events from multiple threads --- src/ucm/mmap/install.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/ucm/mmap/install.c b/src/ucm/mmap/install.c index 49fe9c2d05d..a57506b6513 100644 --- a/src/ucm/mmap/install.c +++ b/src/ucm/mmap/install.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -36,11 +37,12 @@ #define UCM_FIRE_EVENT(_event, _mask, _data, _call) \ do { \ + int exp_events = (_event) & (_mask); \ (_data)->fired_events = 0; \ _call; \ - ucm_trace("after %s: fired events = 0x%x", UCS_PP_MAKE_STRING(_call), \ - (_data)->fired_events); \ - (_data)->out_events &= ~((_event) & (_mask)) | (_data)->fired_events; \ + ucm_trace("after %s: got 0x%x/0x%x", UCS_PP_MAKE_STRING(_call), \ + (_data)->fired_events, exp_events); \ + (_data)->out_events &= ~exp_events | (_data)->fired_events; \ } while(0) extern const char *ucm_mmap_hook_modes[]; @@ -59,8 +61,8 @@ typedef struct ucm_mmap_func { } ucm_mmap_func_t; typedef struct ucm_mmap_test_events_data { - int fired_events; - int out_events; + uint32_t fired_events; + int out_events; } ucm_mmap_test_events_data_t; static ucm_mmap_func_t ucm_mmap_funcs[] = { @@ -81,9 +83,16 @@ static pthread_mutex_t ucm_mmap_install_mutex = PTHREAD_MUTEX_INITIALIZER; static int ucm_mmap_installed_events = 0; /* events that were reported as installed */ static void ucm_mmap_event_test_callback(ucm_event_type_t event_type, - ucm_event_t *event, void *fired_events) + ucm_event_t *event, void *arg) { - *(int*)fired_events |= event_type; + ucm_mmap_test_events_data_t *data = arg; + + /* This callback may be called from multiple threads, which are just calling + * memory allocations/release, and not testing mmap hooks at the moment. + * So in order to ensure the thread which tests events sees all fired + * events, use atomic OR operation. + */ + ucs_atomic_or32(&data->fired_events, event_type); } /* Fire events with pre/post action. The problem is in call sequence: we @@ -182,7 +191,7 @@ static ucs_status_t ucm_mmap_test_events(int events) handler.events = events; handler.priority = -1; handler.cb = ucm_mmap_event_test_callback; - handler.arg = &data.fired_events; + handler.arg = &data; data.out_events = events; ucm_event_handler_add(&handler); From 1c47ad5861e62c63dbec1c7352697feb3582e30f Mon Sep 17 00:00:00 2001 From: "Pavel Shamis (Pasha)" Date: Mon, 17 Jun 2019 17:44:35 -0500 Subject: [PATCH 02/10] CONFIG: Fixing broken configure logic The enable/disable logic was busted in quite a few places. It "almost" work with the exception that the --disable didn't really work. Since all the options are disabled by default, we didn't notice this. --- configure.ac | 62 ++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/configure.ac b/configure.ac index d8046030b1b..a7a3a37fee4 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. # Copyright (C) The University of Tennessee and The University # of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. -# Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016-2019. ALL RIGHTS RESERVED. # See file LICENSE for terms. # AC_PREREQ([2.63]) @@ -217,7 +217,6 @@ AS_IF([test "x$with_docs_only" = xyes], [Compile with frame pointer, useful for profiling, default: NO]), [], [enable_frame_pointer=no]) - AS_IF([test "x$enable_frame_pointer" = xyes], [AS_MESSAGE([compiling with frame pointer]) BASE_CFLAGS="$BASE_CFLAGS -fno-omit-frame-pointer"], @@ -235,8 +234,7 @@ AS_IF([test "x$with_docs_only" = xyes], AS_IF([test "x$enable_fault_injection" = xyes], [AS_MESSAGE([enabling with fault injection code]) AC_DEFINE([ENABLE_FAULT_INJECTION], [1], [Enable fault injection code])], - [:] - ) + [:]) # @@ -245,9 +243,11 @@ AS_IF([test "x$with_docs_only" = xyes], AC_ARG_ENABLE([params-check], AS_HELP_STRING([--disable-params-check], [Disable checking user parameters passed to API, default: NO]), - [AC_DEFINE([ENABLE_PARAMS_CHECK], [0])], - [AC_DEFINE([ENABLE_PARAMS_CHECK], [1], [Enable checking user parameters])]) - + [], + [enable_params_check=no]) + AS_IF([test "x$enable_param_check" = xyes], + [AC_DEFINE([ENABLE_PARAMS_CHECK], [0])], + [AC_DEFINE([ENABLE_PARAMS_CHECK], [1], [Enable checking user parameters])]) # # Enable collecting data to ease debugging @@ -255,12 +255,13 @@ AS_IF([test "x$with_docs_only" = xyes], AC_ARG_ENABLE([debug-data], AS_HELP_STRING([--enable-debug-data], [Enable collecting data to ease debugging, default: NO]), - [ - AC_DEFINE([ENABLE_DEBUG_DATA], [1], [Enable collecting data]) - AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [1], - [Enable packet header inspection/rewriting in UCT/UD]) - ], - [AC_DEFINE([ENABLE_DEBUG_DATA], [0])]) + [], + [enable_debug_data=no]) + AS_IF([test "x$enable_debug_data" = xyes], + [AC_DEFINE([ENABLE_DEBUG_DATA], [1], [Enable collecting data]) + AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [1], + [Enable packet header inspection/rewriting in UCT/UD])], + [AC_DEFINE([ENABLE_DEBUG_DATA], [0])]) # @@ -269,10 +270,13 @@ AS_IF([test "x$with_docs_only" = xyes], AC_ARG_ENABLE([mt], AS_HELP_STRING([--enable-mt], [Enable thread support in UCP and UCT, default: NO]), - [AC_DEFINE([ENABLE_MT], [1], [Enable thread support in UCP and UCT]) - mt_enable=Enabled], - [AC_DEFINE([ENABLE_MT], [0]) - mt_enable=Disabled]) + [], + [enable_mt=no]) + AS_IF([test "x$enable_mt" = xyes], + [AC_DEFINE([ENABLE_MT], [1], [Enable thread support in UCP and UCT]) + mt_enable=Enabled], + [AC_DEFINE([ENABLE_MT], [0]) + mt_enable=Disabled]) # @@ -280,8 +284,10 @@ AS_IF([test "x$with_docs_only" = xyes], # AC_ARG_ENABLE([experimental-api], AS_HELP_STRING([--enable-experimental-api], - [Enable installing experimental APIs, default: NO])) - AM_CONDITIONAL([ENABLE_EXPERIMENTAL_API], [test "x$enable_experimental_api" = "xyes"]) + [Enable installing experimental APIs, default: NO]), + [], + [enable_experimental_api=no]) + AM_CONDITIONAL([ENABLE_EXPERIMENTAL_API], [test "x$enable_experimental_api" = "xyes"]) # @@ -289,9 +295,10 @@ AS_IF([test "x$with_docs_only" = xyes], # AC_ARG_ENABLE([devel-headers], AS_HELP_STRING([--enable-devel-headers], - [Enable installing development headers, default: NO])) - AM_CONDITIONAL([INSTALL_DEVEL_HEADERS], - [test "x$enable_devel_headers" = "xyes"]) + [Enable installing development headers, default: NO]) + [], + [enable_debug_headers=no]) + AM_CONDITIONAL([INSTALL_DEVEL_HEADERS], [test "x$enable_devel_headers" = "xyes"]) # @@ -304,17 +311,16 @@ AS_IF([test "x$with_docs_only" = xyes], # Enable examples build # AC_ARG_ENABLE([examples], - [AS_HELP_STRING([--enable-examples], - [Enable examples build])], - [AM_CONDITIONAL([HAVE_EXAMPLES], [test "x$enable_examples" = "xyes"])], - [AM_CONDITIONAL([HAVE_EXAMPLES], [false])]) - + AS_HELP_STRING([--enable-examples], + [Enable examples build, default: NO]), + [], + [enable_examples=no]) + AM_CONDITIONAL([HAVE_EXAMPLES], [test "x$enable_examples" = "xyes"]) ]) # Docs only # # Print which transports are built # -AC_MSG_NOTICE([Supported transports: $transports]) build_modules="${uct_modules}" build_modules+="${uct_ib_modules}" build_modules+="${uct_cuda_modules}" From 369cf9053862d8926e2debbdc85412b67489d98a Mon Sep 17 00:00:00 2001 From: "Pavel Shamis (Pasha)" Date: Tue, 18 Jun 2019 09:54:51 -0500 Subject: [PATCH 03/10] CONFIG: Addressing reviewer comments - Alignment fixes - Updating default configuration options Signed-off-by: Pavel Shamis (Pasha) --- configure.ac | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index a7a3a37fee4..f6aebc3890e 100644 --- a/configure.ac +++ b/configure.ac @@ -244,7 +244,7 @@ AS_IF([test "x$with_docs_only" = xyes], AS_HELP_STRING([--disable-params-check], [Disable checking user parameters passed to API, default: NO]), [], - [enable_params_check=no]) + [enable_params_check=yes]) AS_IF([test "x$enable_param_check" = xyes], [AC_DEFINE([ENABLE_PARAMS_CHECK], [0])], [AC_DEFINE([ENABLE_PARAMS_CHECK], [1], [Enable checking user parameters])]) @@ -261,7 +261,8 @@ AS_IF([test "x$with_docs_only" = xyes], [AC_DEFINE([ENABLE_DEBUG_DATA], [1], [Enable collecting data]) AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [1], [Enable packet header inspection/rewriting in UCT/UD])], - [AC_DEFINE([ENABLE_DEBUG_DATA], [0])]) + [AC_DEFINE([ENABLE_DEBUG_DATA], [0]) + AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [0])]) # @@ -273,10 +274,10 @@ AS_IF([test "x$with_docs_only" = xyes], [], [enable_mt=no]) AS_IF([test "x$enable_mt" = xyes], - [AC_DEFINE([ENABLE_MT], [1], [Enable thread support in UCP and UCT]) + [AC_DEFINE([ENABLE_MT], [1], [Enable thread support in UCP and UCT]) mt_enable=Enabled], [AC_DEFINE([ENABLE_MT], [0]) - mt_enable=Disabled]) + mt_enable=Disabled]) # @@ -312,7 +313,7 @@ AS_IF([test "x$with_docs_only" = xyes], # AC_ARG_ENABLE([examples], AS_HELP_STRING([--enable-examples], - [Enable examples build, default: NO]), + [Enable examples build, default: NO]), [], [enable_examples=no]) AM_CONDITIONAL([HAVE_EXAMPLES], [test "x$enable_examples" = "xyes"]) From ef8d9d4e5b2439e103cff52c7463bf40c0a84495 Mon Sep 17 00:00:00 2001 From: "Pavel Shamis (Pasha)" Date: Wed, 19 Jun 2019 11:04:47 -0500 Subject: [PATCH 04/10] CONFIG: Fixing parameter check config option Signed-off-by: Pavel Shamis (Pasha) --- configure.ac | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index f6aebc3890e..c6b97f400a5 100644 --- a/configure.ac +++ b/configure.ac @@ -245,9 +245,9 @@ AS_IF([test "x$with_docs_only" = xyes], [Disable checking user parameters passed to API, default: NO]), [], [enable_params_check=yes]) - AS_IF([test "x$enable_param_check" = xyes], - [AC_DEFINE([ENABLE_PARAMS_CHECK], [0])], - [AC_DEFINE([ENABLE_PARAMS_CHECK], [1], [Enable checking user parameters])]) + AS_IF([test "x$enable_params_check" = xyes], + [AC_DEFINE([ENABLE_PARAMS_CHECK], [1], [Enable checking user parameters])], + [AC_DEFINE([ENABLE_PARAMS_CHECK], [0])]) # # Enable collecting data to ease debugging From 7158b26c28d420aebb3e12d128dfccd85bf22976 Mon Sep 17 00:00:00 2001 From: "Pavel Shamis (Pasha)" Date: Thu, 20 Jun 2019 15:53:51 -0500 Subject: [PATCH 05/10] IB: Fixing UCT_UD_EP_DEBUG_HOOKS ifdef Replaced ifdef with if Signed-off-by: Pavel Shamis (Pasha) --- src/uct/ib/ud/base/ud_ep.h | 2 +- src/uct/ib/ud/base/ud_iface.h | 2 +- test/gtest/uct/ib/test_ud.cc | 4 ++-- test/gtest/uct/ib/test_ud_slow_timer.cc | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/uct/ib/ud/base/ud_ep.h b/src/uct/ib/ud/base/ud_ep.h index f4835a901ac..7100347b9a3 100644 --- a/src/uct/ib/ud/base/ud_ep.h +++ b/src/uct/ib/ud/base/ud_ep.h @@ -20,7 +20,7 @@ #define UCT_UD_EP_ID_MAX UCT_UD_EP_NULL_ID #define UCT_UD_EP_CONN_ID_MAX UCT_UD_EP_ID_MAX -#ifdef UCT_UD_EP_DEBUG_HOOKS +#if UCT_UD_EP_DEBUG_HOOKS /* Hooks that allow packet header inspection and rewriting. UCT user can set functions that will be called just before packet is put on wire diff --git a/src/uct/ib/ud/base/ud_iface.h b/src/uct/ib/ud/base/ud_iface.h index 1619a2f5eae..40aaf4aebe3 100644 --- a/src/uct/ib/ud/base/ud_iface.h +++ b/src/uct/ib/ud/base/ud_iface.h @@ -72,7 +72,7 @@ SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(uct_ud_iface_peer_t, UCT_UD_HASH_SIZE, -#ifdef UCT_UD_EP_DEBUG_HOOKS +#if UCT_UD_EP_DEBUG_HOOKS typedef ucs_status_t (*uct_ud_iface_hook_t)(uct_ud_iface_t *iface, uct_ud_neth_t *neth); diff --git a/test/gtest/uct/ib/test_ud.cc b/test/gtest/uct/ib/test_ud.cc index 757db550109..ebe5ad64314 100644 --- a/test/gtest/uct/ib/test_ud.cc +++ b/test/gtest/uct/ib/test_ud.cc @@ -291,7 +291,7 @@ UCS_TEST_P(test_ud, flush_iface) { validate_flush(); } -#ifdef UCT_UD_EP_DEBUG_HOOKS +#if UCT_UD_EP_DEBUG_HOOKS /* disable ack req, * send full window, @@ -929,7 +929,7 @@ UCS_TEST_P(test_ud, res_skb_tx) { } } -#ifdef UCT_UD_EP_DEBUG_HOOKS +#if UCT_UD_EP_DEBUG_HOOKS /* Simulate loss of ctl packets during simultaneous CREQs. * Use-case: CREQ and CREP packets from m_e2 to m_e1 are lost. * Check: that both eps (m_e1 and m_e2) are connected finally */ diff --git a/test/gtest/uct/ib/test_ud_slow_timer.cc b/test/gtest/uct/ib/test_ud_slow_timer.cc index 39efbc21936..d2555d6e638 100644 --- a/test/gtest/uct/ib/test_ud_slow_timer.cc +++ b/test/gtest/uct/ib/test_ud_slow_timer.cc @@ -118,7 +118,7 @@ UCS_TEST_P(test_ud_slow_timer, ep_destroy, "UD_TIMEOUT=1s") { EXPECT_FALSE(ucs_ptr_array_lookup(&iface->eps, ep_idx, ud_ep_tmp)); } -#ifdef UCT_UD_EP_DEBUG_HOOKS +#if UCT_UD_EP_DEBUG_HOOKS /* no traffic - no ticks */ UCS_TEST_P(test_ud_slow_timer, tick1) { connect(); From 12fd69e82ab2f07f0d4fae2fc386de8924150aa0 Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Thu, 20 Jun 2019 16:40:50 +0300 Subject: [PATCH 06/10] TOOLS/INFO: Fix printing atomic capabilities --- src/tools/info/tl_info.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/src/tools/info/tl_info.c b/src/tools/info/tl_info.c index e25705bad91..b3c97d61470 100644 --- a/src/tools/info/tl_info.c +++ b/src/tools/info/tl_info.c @@ -34,33 +34,13 @@ PRINT_ZCAP_NO_CHECK(_name, _min, _max, _max_iov) \ } -#define PRINT_ATOMIC_CAP(_name, _cap_flags) \ - if ((_cap_flags) & (UCT_IFACE_FLAG_##_name##32 | UCT_IFACE_FLAG_##_name##64)) { \ - char *s = strduplower(#_name); \ - char *domain = ""; \ - if ((_cap_flags) & UCT_IFACE_FLAG_ATOMIC_CPU) { \ - domain = ", cpu"; \ - } else if ((_cap_flags) & UCT_IFACE_FLAG_ATOMIC_DEVICE) { \ - domain = ", device"; \ - } \ - if (ucs_test_all_flags(_cap_flags, \ - UCT_IFACE_FLAG_##_name##32 | UCT_IFACE_FLAG_##_name##64)) \ - { \ - printf("# %12s: 32, 64 bit%s (deprecated)\n", s, domain); \ - } else { \ - printf("# %12s: %d bit%s (deprecated)\n", s, \ - ((_cap_flags) & UCT_IFACE_FLAG_##_name##32) ? 32 : 64, domain); \ - } \ - free(s); \ - } - #define PRINT_ATOMIC_POST(_name, _cap) \ print_atomic_info(UCT_ATOMIC_OP_##_name, #_name, "", \ - _cap.atomic32.op_flags, _cap.atomic32.op_flags); + _cap.atomic32.op_flags, _cap.atomic64.op_flags); #define PRINT_ATOMIC_FETCH(_name, _cap, _suffix) \ print_atomic_info(UCT_ATOMIC_OP_##_name, #_name, _suffix, \ - _cap.atomic32.fop_flags, _cap.atomic32.fop_flags); + _cap.atomic32.fop_flags, _cap.atomic64.fop_flags); static char *strduplower(const char *str) { @@ -242,7 +222,8 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md, iface_attr.cap.atomic64.fop_flags) { if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_DEVICE) { printf("# domain: device\n"); - } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_CPU) { + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_CPU) { printf("# domain: cpu\n"); } From 204933f6eac472e924e19756edabcd90cafd312b Mon Sep 17 00:00:00 2001 From: Devendar Bureddy Date: Fri, 21 Jun 2019 20:13:17 +0300 Subject: [PATCH 07/10] PERFTEST: Fix warmup iters to be non-zero --- src/tools/perf/lib/libperf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/perf/lib/libperf.c b/src/tools/perf/lib/libperf.c index 2a956d8ad37..53a5f4ff022 100644 --- a/src/tools/perf/lib/libperf.c +++ b/src/tools/perf/lib/libperf.c @@ -1189,7 +1189,7 @@ static void ucp_perf_test_cleanup_endpoints(ucx_perf_context_t *perf) static void ucx_perf_set_warmup(ucx_perf_context_t* perf, ucx_perf_params_t* params) { - perf->max_iter = ucs_min(params->warmup_iter, params->max_iter / 10); + perf->max_iter = ucs_min(params->warmup_iter, ucs_div_round_up(params->max_iter, 10)); perf->report_interval = -1; } From 324bdf89d14a42ce045ff5d325e8f54ce1730634 Mon Sep 17 00:00:00 2001 From: Devendar Bureddy Date: Fri, 21 Jun 2019 19:42:09 +0300 Subject: [PATCH 08/10] CONFIG: Fix gdrcopy libdir config value --- config/m4/gdrcopy.m4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/m4/gdrcopy.m4 b/config/m4/gdrcopy.m4 index 555e731f9cf..40ea489785f 100644 --- a/config/m4/gdrcopy.m4 +++ b/config/m4/gdrcopy.m4 @@ -28,7 +28,7 @@ AS_IF([test "x$with_gdrcopy" != "xno"], LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS" ]) AS_IF([test ! -z "$with_gdrcopy_libdir" -a "x$with_gdrcopy_libdir" != "xyes"], - [ucx_check_gdrcopy_libdir="$with_nccl_libdir" + [ucx_check_gdrcopy_libdir="$with_gdrcopy_libdir" LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS"]) AC_CHECK_HEADERS([gdrapi.h], From aff9c459a466f7dc0caf675c6c186754d9536a38 Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Tue, 25 Jun 2019 12:21:24 +0300 Subject: [PATCH 09/10] NEWS: Update with v1.6.0-rc4 --- NEWS | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index c34f677b84b..284507a6c70 100644 --- a/NEWS +++ b/NEWS @@ -7,7 +7,7 @@ ## # -## 1.6.0-rc3 (June 19, 2019) +## 1.6.0-rc4 (June 27, 2019) Features: - Modular architecture for UCT transports - ROCm transport re-design: support for managed memory, direct copy, ROCm GDR @@ -35,6 +35,11 @@ Bugfixes: - Fallback to ibv_create_cq() if ibv_create_cq_ex() returns ENOSYS - Fix malloc hooks test - Fix checking return status in ucp_client_server example +- Fix gdrcopy libdir config value +- Fix printing atomic capabilities in ucx_info +- Fix perftest warmup iterations to be non-zero +- Fixing default values for configure logic +- Fix race condition updating fired_events from multiple threads Tested configurations: - RDMA: MLNX_OFED 4.5, distribution inbox drivers, rdma-core 22.1 From d0bc960c362f01cc43d940ccab6f9257b46454dd Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Mon, 24 Jun 2019 12:02:14 +0300 Subject: [PATCH 10/10] TEST/APPS: Fix extracting driver name for IB device On new kernels, the RDMA device name is not indicative of driver name. --- test/apps/test_ucx_tls.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/apps/test_ucx_tls.py b/test/apps/test_ucx_tls.py index 07d2059cde3..cf4d643c440 100755 --- a/test/apps/test_ucx_tls.py +++ b/test/apps/test_ucx_tls.py @@ -112,17 +112,23 @@ def find_am_transport(dev, neps, override = 0) : status, dev_attrs = commands.getstatusoutput("ibv_devinfo -d " + dev + " -i " + port) if dev_attrs.find("PORT_ACTIVE") == -1: continue - + + driver_name = os.path.basename(os.readlink("/sys/class/infiniband/%s/device/driver" % dev)) + dev_name = driver_name.split("_")[0] # should be mlx4 or mlx5 + if not dev_name in ['mlx4', 'mlx5']: + print "Invalid device name: ", dev_name + sys.exit(1) + if dev_attrs.find("Ethernet") == -1: - dev_tl_map = am_tls[dev[0:dev.index('_')]] - dev_tl_override_map = am_tls[dev[0:dev.index('_')] + "_override"] + dev_tl_map = am_tls[dev_name] + dev_tl_override_map = am_tls[dev_name + "_override"] override = 1 else: fw_ver = open("/sys/class/infiniband/%s/fw_ver" % dev).read() if LooseVersion(fw_ver) >= LooseVersion("16.23.0"): - dev_tl_map = am_tls[dev[0:dev.index('_')]+"_roce_dc"] + dev_tl_map = am_tls[dev_name+"_roce_dc"] else: - dev_tl_map = am_tls[dev[0:dev.index('_')]+"_roce_no_dc"] + dev_tl_map = am_tls[dev_name+"_roce_no_dc"] override = 0 for n_eps in sorted(dev_tl_map):