Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port recent bugfixes to v1.6.x and update NEWS to v1.6-rc4 #3770

Merged
merged 10 commits into from
Jun 26, 2019
7 changes: 6 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
##
#

## 1.6.0-rc3 (June 19, 2019)
## 1.6.0-rc4 (June 27, 2019)
Features:
- Modular architecture for UCT transports
- ROCm transport re-design: support for managed memory, direct copy, ROCm GDR
Expand Down Expand Up @@ -35,6 +35,11 @@ Bugfixes:
- Fallback to ibv_create_cq() if ibv_create_cq_ex() returns ENOSYS
- Fix malloc hooks test
- Fix checking return status in ucp_client_server example
- Fix gdrcopy libdir config value
- Fix printing atomic capabilities in ucx_info
- Fix perftest warmup iterations to be non-zero
- Fixing default values for configure logic
- Fix race condition updating fired_events from multiple threads

Tested configurations:
- RDMA: MLNX_OFED 4.5, distribution inbox drivers, rdma-core 22.1
Expand Down
2 changes: 1 addition & 1 deletion config/m4/gdrcopy.m4
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ AS_IF([test "x$with_gdrcopy" != "xno"],
LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS"
])
AS_IF([test ! -z "$with_gdrcopy_libdir" -a "x$with_gdrcopy_libdir" != "xyes"],
[ucx_check_gdrcopy_libdir="$with_nccl_libdir"
[ucx_check_gdrcopy_libdir="$with_gdrcopy_libdir"
LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS"])

AC_CHECK_HEADERS([gdrapi.h],
Expand Down
63 changes: 35 additions & 28 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED.
# Copyright (C) The University of Tennessee and The University
# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
# Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED.
# Copyright (C) ARM Ltd. 2016-2019. ALL RIGHTS RESERVED.
# See file LICENSE for terms.
#
AC_PREREQ([2.63])
Expand Down Expand Up @@ -217,7 +217,6 @@ AS_IF([test "x$with_docs_only" = xyes],
[Compile with frame pointer, useful for profiling, default: NO]),
[],
[enable_frame_pointer=no])

AS_IF([test "x$enable_frame_pointer" = xyes],
[AS_MESSAGE([compiling with frame pointer])
BASE_CFLAGS="$BASE_CFLAGS -fno-omit-frame-pointer"],
Expand All @@ -235,8 +234,7 @@ AS_IF([test "x$with_docs_only" = xyes],
AS_IF([test "x$enable_fault_injection" = xyes],
[AS_MESSAGE([enabling with fault injection code])
AC_DEFINE([ENABLE_FAULT_INJECTION], [1], [Enable fault injection code])],
[:]
)
[:])


#
Expand All @@ -245,22 +243,26 @@ AS_IF([test "x$with_docs_only" = xyes],
AC_ARG_ENABLE([params-check],
AS_HELP_STRING([--disable-params-check],
[Disable checking user parameters passed to API, default: NO]),
[AC_DEFINE([ENABLE_PARAMS_CHECK], [0])],
[AC_DEFINE([ENABLE_PARAMS_CHECK], [1], [Enable checking user parameters])])

[],
[enable_params_check=yes])
AS_IF([test "x$enable_params_check" = xyes],
[AC_DEFINE([ENABLE_PARAMS_CHECK], [1], [Enable checking user parameters])],
[AC_DEFINE([ENABLE_PARAMS_CHECK], [0])])

#
# Enable collecting data to ease debugging
#
AC_ARG_ENABLE([debug-data],
AS_HELP_STRING([--enable-debug-data],
[Enable collecting data to ease debugging, default: NO]),
[
AC_DEFINE([ENABLE_DEBUG_DATA], [1], [Enable collecting data])
AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [1],
[Enable packet header inspection/rewriting in UCT/UD])
],
[AC_DEFINE([ENABLE_DEBUG_DATA], [0])])
[],
[enable_debug_data=no])
AS_IF([test "x$enable_debug_data" = xyes],
[AC_DEFINE([ENABLE_DEBUG_DATA], [1], [Enable collecting data])
AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [1],
[Enable packet header inspection/rewriting in UCT/UD])],
[AC_DEFINE([ENABLE_DEBUG_DATA], [0])
AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [0])])


#
Expand All @@ -269,29 +271,35 @@ AS_IF([test "x$with_docs_only" = xyes],
AC_ARG_ENABLE([mt],
AS_HELP_STRING([--enable-mt],
[Enable thread support in UCP and UCT, default: NO]),
[AC_DEFINE([ENABLE_MT], [1], [Enable thread support in UCP and UCT])
mt_enable=Enabled],
[AC_DEFINE([ENABLE_MT], [0])
mt_enable=Disabled])
[],
[enable_mt=no])
AS_IF([test "x$enable_mt" = xyes],
[AC_DEFINE([ENABLE_MT], [1], [Enable thread support in UCP and UCT])
mt_enable=Enabled],
[AC_DEFINE([ENABLE_MT], [0])
mt_enable=Disabled])


#
# Enable experimental header
#
AC_ARG_ENABLE([experimental-api],
AS_HELP_STRING([--enable-experimental-api],
[Enable installing experimental APIs, default: NO]))
AM_CONDITIONAL([ENABLE_EXPERIMENTAL_API], [test "x$enable_experimental_api" = "xyes"])
[Enable installing experimental APIs, default: NO]),
[],
[enable_experimental_api=no])
AM_CONDITIONAL([ENABLE_EXPERIMENTAL_API], [test "x$enable_experimental_api" = "xyes"])


#
# Install development headers
#
AC_ARG_ENABLE([devel-headers],
AS_HELP_STRING([--enable-devel-headers],
[Enable installing development headers, default: NO]))
AM_CONDITIONAL([INSTALL_DEVEL_HEADERS],
[test "x$enable_devel_headers" = "xyes"])
[Enable installing development headers, default: NO])
[],
[enable_debug_headers=no])
AM_CONDITIONAL([INSTALL_DEVEL_HEADERS], [test "x$enable_devel_headers" = "xyes"])


#
Expand All @@ -304,17 +312,16 @@ AS_IF([test "x$with_docs_only" = xyes],
# Enable examples build
#
AC_ARG_ENABLE([examples],
[AS_HELP_STRING([--enable-examples],
[Enable examples build])],
[AM_CONDITIONAL([HAVE_EXAMPLES], [test "x$enable_examples" = "xyes"])],
[AM_CONDITIONAL([HAVE_EXAMPLES], [false])])

AS_HELP_STRING([--enable-examples],
[Enable examples build, default: NO]),
[],
[enable_examples=no])
AM_CONDITIONAL([HAVE_EXAMPLES], [test "x$enable_examples" = "xyes"])
]) # Docs only

#
# Print which transports are built
#
AC_MSG_NOTICE([Supported transports: $transports])
build_modules="${uct_modules}"
build_modules+="${uct_ib_modules}"
build_modules+="${uct_cuda_modules}"
Expand Down
27 changes: 4 additions & 23 deletions src/tools/info/tl_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,33 +34,13 @@
PRINT_ZCAP_NO_CHECK(_name, _min, _max, _max_iov) \
}

#define PRINT_ATOMIC_CAP(_name, _cap_flags) \
if ((_cap_flags) & (UCT_IFACE_FLAG_##_name##32 | UCT_IFACE_FLAG_##_name##64)) { \
char *s = strduplower(#_name); \
char *domain = ""; \
if ((_cap_flags) & UCT_IFACE_FLAG_ATOMIC_CPU) { \
domain = ", cpu"; \
} else if ((_cap_flags) & UCT_IFACE_FLAG_ATOMIC_DEVICE) { \
domain = ", device"; \
} \
if (ucs_test_all_flags(_cap_flags, \
UCT_IFACE_FLAG_##_name##32 | UCT_IFACE_FLAG_##_name##64)) \
{ \
printf("# %12s: 32, 64 bit%s (deprecated)\n", s, domain); \
} else { \
printf("# %12s: %d bit%s (deprecated)\n", s, \
((_cap_flags) & UCT_IFACE_FLAG_##_name##32) ? 32 : 64, domain); \
} \
free(s); \
}

#define PRINT_ATOMIC_POST(_name, _cap) \
print_atomic_info(UCT_ATOMIC_OP_##_name, #_name, "", \
_cap.atomic32.op_flags, _cap.atomic32.op_flags);
_cap.atomic32.op_flags, _cap.atomic64.op_flags);

#define PRINT_ATOMIC_FETCH(_name, _cap, _suffix) \
print_atomic_info(UCT_ATOMIC_OP_##_name, #_name, _suffix, \
_cap.atomic32.fop_flags, _cap.atomic32.fop_flags);
_cap.atomic32.fop_flags, _cap.atomic64.fop_flags);

static char *strduplower(const char *str)
{
Expand Down Expand Up @@ -242,7 +222,8 @@ static void print_iface_info(uct_worker_h worker, uct_md_h md,
iface_attr.cap.atomic64.fop_flags) {
if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_DEVICE) {
printf("# domain: device\n");
} else if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_CPU) {
}
if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_CPU) {
printf("# domain: cpu\n");
}

Expand Down
2 changes: 1 addition & 1 deletion src/tools/perf/lib/libperf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1189,7 +1189,7 @@ static void ucp_perf_test_cleanup_endpoints(ucx_perf_context_t *perf)

static void ucx_perf_set_warmup(ucx_perf_context_t* perf, ucx_perf_params_t* params)
{
perf->max_iter = ucs_min(params->warmup_iter, params->max_iter / 10);
perf->max_iter = ucs_min(params->warmup_iter, ucs_div_round_up(params->max_iter, 10));
perf->report_interval = -1;
}

Expand Down
25 changes: 17 additions & 8 deletions src/ucm/mmap/install.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <ucm/util/sys.h>
#include <ucm/bistro/bistro.h>
#include <ucs/sys/preprocessor.h>
#include <ucs/arch/atomic.h>
#include <ucs/sys/math.h>
#include <ucs/sys/checker.h>
#include <ucs/arch/bitops.h>
Expand All @@ -36,11 +37,12 @@

#define UCM_FIRE_EVENT(_event, _mask, _data, _call) \
do { \
int exp_events = (_event) & (_mask); \
(_data)->fired_events = 0; \
_call; \
ucm_trace("after %s: fired events = 0x%x", UCS_PP_MAKE_STRING(_call), \
(_data)->fired_events); \
(_data)->out_events &= ~((_event) & (_mask)) | (_data)->fired_events; \
ucm_trace("after %s: got 0x%x/0x%x", UCS_PP_MAKE_STRING(_call), \
(_data)->fired_events, exp_events); \
(_data)->out_events &= ~exp_events | (_data)->fired_events; \
} while(0)

extern const char *ucm_mmap_hook_modes[];
Expand All @@ -59,8 +61,8 @@ typedef struct ucm_mmap_func {
} ucm_mmap_func_t;

typedef struct ucm_mmap_test_events_data {
int fired_events;
int out_events;
uint32_t fired_events;
int out_events;
} ucm_mmap_test_events_data_t;

static ucm_mmap_func_t ucm_mmap_funcs[] = {
Expand All @@ -81,9 +83,16 @@ static pthread_mutex_t ucm_mmap_install_mutex = PTHREAD_MUTEX_INITIALIZER;
static int ucm_mmap_installed_events = 0; /* events that were reported as installed */

static void ucm_mmap_event_test_callback(ucm_event_type_t event_type,
ucm_event_t *event, void *fired_events)
ucm_event_t *event, void *arg)
{
*(int*)fired_events |= event_type;
ucm_mmap_test_events_data_t *data = arg;

/* This callback may be called from multiple threads, which are just calling
* memory allocations/release, and not testing mmap hooks at the moment.
* So in order to ensure the thread which tests events sees all fired
* events, use atomic OR operation.
*/
ucs_atomic_or32(&data->fired_events, event_type);
}

/* Fire events with pre/post action. The problem is in call sequence: we
Expand Down Expand Up @@ -182,7 +191,7 @@ static ucs_status_t ucm_mmap_test_events(int events)
handler.events = events;
handler.priority = -1;
handler.cb = ucm_mmap_event_test_callback;
handler.arg = &data.fired_events;
handler.arg = &data;
data.out_events = events;

ucm_event_handler_add(&handler);
Expand Down
2 changes: 1 addition & 1 deletion src/uct/ib/ud/base/ud_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#define UCT_UD_EP_ID_MAX UCT_UD_EP_NULL_ID
#define UCT_UD_EP_CONN_ID_MAX UCT_UD_EP_ID_MAX

#ifdef UCT_UD_EP_DEBUG_HOOKS
#if UCT_UD_EP_DEBUG_HOOKS
/*
Hooks that allow packet header inspection and rewriting. UCT user can
set functions that will be called just before packet is put on wire
Expand Down
2 changes: 1 addition & 1 deletion src/uct/ib/ud/base/ud_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(uct_ud_iface_peer_t, UCT_UD_HASH_SIZE,



#ifdef UCT_UD_EP_DEBUG_HOOKS
#if UCT_UD_EP_DEBUG_HOOKS

typedef ucs_status_t (*uct_ud_iface_hook_t)(uct_ud_iface_t *iface, uct_ud_neth_t *neth);

Expand Down
16 changes: 11 additions & 5 deletions test/apps/test_ucx_tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,23 @@ def find_am_transport(dev, neps, override = 0) :
status, dev_attrs = commands.getstatusoutput("ibv_devinfo -d " + dev + " -i " + port)
if dev_attrs.find("PORT_ACTIVE") == -1:
continue


driver_name = os.path.basename(os.readlink("/sys/class/infiniband/%s/device/driver" % dev))
dev_name = driver_name.split("_")[0] # should be mlx4 or mlx5
if not dev_name in ['mlx4', 'mlx5']:
print "Invalid device name: ", dev_name
sys.exit(1)

if dev_attrs.find("Ethernet") == -1:
dev_tl_map = am_tls[dev[0:dev.index('_')]]
dev_tl_override_map = am_tls[dev[0:dev.index('_')] + "_override"]
dev_tl_map = am_tls[dev_name]
dev_tl_override_map = am_tls[dev_name + "_override"]
override = 1
else:
fw_ver = open("/sys/class/infiniband/%s/fw_ver" % dev).read()
if LooseVersion(fw_ver) >= LooseVersion("16.23.0"):
dev_tl_map = am_tls[dev[0:dev.index('_')]+"_roce_dc"]
dev_tl_map = am_tls[dev_name+"_roce_dc"]
else:
dev_tl_map = am_tls[dev[0:dev.index('_')]+"_roce_no_dc"]
dev_tl_map = am_tls[dev_name+"_roce_no_dc"]
override = 0

for n_eps in sorted(dev_tl_map):
Expand Down
4 changes: 2 additions & 2 deletions test/gtest/uct/ib/test_ud.cc
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ UCS_TEST_P(test_ud, flush_iface) {
validate_flush();
}

#ifdef UCT_UD_EP_DEBUG_HOOKS
#if UCT_UD_EP_DEBUG_HOOKS

/* disable ack req,
* send full window,
Expand Down Expand Up @@ -929,7 +929,7 @@ UCS_TEST_P(test_ud, res_skb_tx) {
}
}

#ifdef UCT_UD_EP_DEBUG_HOOKS
#if UCT_UD_EP_DEBUG_HOOKS
/* Simulate loss of ctl packets during simultaneous CREQs.
* Use-case: CREQ and CREP packets from m_e2 to m_e1 are lost.
* Check: that both eps (m_e1 and m_e2) are connected finally */
Expand Down
2 changes: 1 addition & 1 deletion test/gtest/uct/ib/test_ud_slow_timer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ UCS_TEST_P(test_ud_slow_timer, ep_destroy, "UD_TIMEOUT=1s") {
EXPECT_FALSE(ucs_ptr_array_lookup(&iface->eps, ep_idx, ud_ep_tmp));
}

#ifdef UCT_UD_EP_DEBUG_HOOKS
#if UCT_UD_EP_DEBUG_HOOKS
/* no traffic - no ticks */
UCS_TEST_P(test_ud_slow_timer, tick1) {
connect();
Expand Down