From 313bfaeb99dd8e54a83b24324deacfff1aed6466 Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Thu, 1 Mar 2018 18:27:04 +0200 Subject: [PATCH 1/3] UCS/UCM: Set UCM configuration from UCS during init time libucm should not use libucs symbols, because it's marked as persistent. So in order to set UCM configuration from the config parser in UCS, and add its table to the global list, need to parse the UCM config from UCS. --- src/tools/info/ucx_info.c | 1 - src/ucm/Makefile.am | 2 - src/ucm/api/ucm.h | 42 +++---- src/ucm/cuda/install.c | 3 +- src/ucm/event/event.c | 3 +- src/ucm/malloc/malloc_hook.c | 17 +-- src/ucm/mmap/install.c | 3 +- src/ucm/util/log.c | 1 + src/ucm/util/log.h | 5 +- src/ucm/util/reloc.c | 1 - src/ucm/util/sys.c | 10 ++ src/ucm/util/ucm_config.c | 222 ---------------------------------- src/ucm/util/ucm_config.h | 31 ----- src/ucs/Makefile.am | 1 + src/ucs/config/ucm_opts.c | 67 ++++++++++ test/gtest/common/main.cc | 4 +- test/gtest/ucm/malloc_hook.cc | 13 +- test/mpi/test_memhooks.c | 18 +-- 18 files changed, 128 insertions(+), 316 deletions(-) delete mode 100644 src/ucm/util/ucm_config.c delete mode 100644 src/ucm/util/ucm_config.h create mode 100644 src/ucs/config/ucm_opts.c diff --git a/src/tools/info/ucx_info.c b/src/tools/info/ucx_info.c index ce157578d9f..18506d0546b 100644 --- a/src/tools/info/ucx_info.c +++ b/src/tools/info/ucx_info.c @@ -166,7 +166,6 @@ int main(int argc, char **argv) if (print_flags & UCS_CONFIG_PRINT_CONFIG) { ucs_config_parser_print_all_opts(stdout, print_flags); - ucm_config_print(stdout, print_flags); } if (print_opts & PRINT_DEVICES) { diff --git a/src/ucm/Makefile.am b/src/ucm/Makefile.am index 20a26c86813..bb51db5cae8 100644 --- a/src/ucm/Makefile.am +++ b/src/ucm/Makefile.am @@ -32,7 +32,6 @@ noinst_HEADERS = \ malloc/malloc_hook.h \ malloc/allocator.h \ mmap/mmap.h \ - util/ucm_config.h \ util/replace.h \ util/log.h \ util/reloc.h \ @@ -43,7 +42,6 @@ libucm_la_SOURCES = \ malloc/malloc_hook.c \ mmap/install.c \ util/replace.c \ - util/ucm_config.c \ util/log.c \ util/reloc.c \ util/sys.c diff --git a/src/ucm/api/ucm.h b/src/ucm/api/ucm.h index d7d529c11ef..c25a7c71efd 100644 --- a/src/ucm/api/ucm.h +++ b/src/ucm/api/ucm.h @@ -152,6 +152,27 @@ typedef union ucm_event { } ucm_event_t; +/** + * @brief Global UCM configuration. + * + * Can be safely modified before using UCM functions. + */ +typedef struct ucm_global_config { + ucs_log_level_t log_level; /* Logging level */ + int enable_events; /* Enable memory events */ + int enable_mmap_reloc; /* Enable installing mmap relocations */ + int enable_malloc_hooks; /* Enable installing malloc hooks */ + int enable_malloc_reloc; /* Enable installing malloc relocations */ + int enable_cuda_reloc; /* Enable installing CUDA relocations */ + int enable_dynamic_mmap_thresh; /* Enable adaptive mmap threshold */ + size_t alloc_alignment; /* Alignment for memory allocations */ +} ucm_global_config_t; + + +/* Global UCM configuration */ +extern ucm_global_config_t ucm_global_opts; + + /** * @brief Memory event callback. * @@ -190,27 +211,6 @@ typedef void (*ucm_event_callback_t)(ucm_event_type_t event_type, ucm_event_t *event, void *arg); - -/** - * @brief Print UCM global configuration to a stream. - * - * @param [in] stream Output stream to print to. - * @param [in] print_flags Controls how the configuration is printed. - */ -void ucm_config_print(FILE *stream, ucs_config_print_flags_t print_flags); - - -/** - * @brief Modify UCM global configuration. - * - * @param [in] name Configuration variable name. - * @param [in] value Value to set. - * - * @return Error code. - */ -ucs_status_t ucm_config_modify(const char *name, const char *value); - - /** * @brief Install a handler for memory events. * diff --git a/src/ucm/cuda/install.c b/src/ucm/cuda/install.c index 698fdf59dd4..b7b7757a1ee 100644 --- a/src/ucm/cuda/install.c +++ b/src/ucm/cuda/install.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -46,7 +45,7 @@ ucs_status_t ucm_cudamem_install() ucm_reloc_patch_t *patch; ucs_status_t status; - if (!ucm_global_config.enable_cuda_hooks) { + if (!ucm_global_opts.enable_cuda_reloc) { ucm_debug("installing cudamem relocations is disabled by configuration"); return UCS_ERR_UNSUPPORTED; } diff --git a/src/ucm/event/event.c b/src/ucm/event/event.c index eebb76acad4..cfd20d54612 100644 --- a/src/ucm/event/event.c +++ b/src/ucm/event/event.c @@ -16,7 +16,6 @@ #if HAVE_CUDA #include #endif -#include #include #include #include @@ -673,7 +672,7 @@ ucs_status_t ucm_set_event_handler(int events, int priority, ucm_event_handler_t *handler; ucs_status_t status; - if (!ucm_global_config.enable_events) { + if (!ucm_global_opts.enable_events) { return UCS_ERR_UNSUPPORTED; } diff --git a/src/ucm/malloc/malloc_hook.c b/src/ucm/malloc/malloc_hook.c index d558b6ae61f..9987bff2915 100644 --- a/src/ucm/malloc/malloc_hook.c +++ b/src/ucm/malloc/malloc_hook.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -226,8 +225,8 @@ static void *ucm_malloc_impl(size_t size, const char *debug_name) void *ptr; ucm_malloc_hook_state.hook_called = 1; - if (ucm_global_config.alloc_alignment > 1) { - ptr = ucm_dlmemalign(ucm_global_config.alloc_alignment, size); + if (ucm_global_opts.alloc_alignment > 1) { + ptr = ucm_dlmemalign(ucm_global_opts.alloc_alignment, size); } else { ptr = ucm_dlmalloc(size); } @@ -240,7 +239,11 @@ static void ucm_malloc_adjust_thresholds(size_t size) int mmap_thresh; if (size > ucm_malloc_hook_state.max_freed_size) { - if (ucm_global_config.enable_dynamic_mmap_thresh && + /* Valgrind limits the size of brk() segments to 8mb, so must use mmap + * for large allocations. + */ + if (!RUNNING_ON_VALGRIND && + ucm_global_opts.enable_dynamic_mmap_thresh && !ucm_malloc_hook_state.trim_thresh_set && !ucm_malloc_hook_state.mmap_thresh_set) { /* new mmap threshold is increased to the size of released block, @@ -287,7 +290,7 @@ static void *ucm_memalign_impl(size_t alignment, size_t size, const char *debug_ void *ptr; ucm_malloc_hook_state.hook_called = 1; - ptr = ucm_dlmemalign(ucs_max(alignment, ucm_global_config.alloc_alignment), size); + ptr = ucm_dlmemalign(ucs_max(alignment, ucm_global_opts.alloc_alignment), size); ucm_malloc_allocated(ptr, size, debug_name); return ptr; } @@ -702,7 +705,7 @@ ucs_status_t ucm_malloc_install(int events) * valgrind anyway. */ #if HAVE_MALLOC_HOOK - if (ucm_global_config.enable_malloc_hooks) { + if (ucm_global_opts.enable_malloc_hooks) { /* Install using malloc hooks. * TODO detect glibc support in configure-time. */ @@ -727,7 +730,7 @@ ucs_status_t ucm_malloc_install(int events) } /* Install using malloc symbols */ - if (ucm_global_config.enable_malloc_reloc) { + if (ucm_global_opts.enable_malloc_reloc) { if (!(ucm_malloc_hook_state.install_state & UCM_MALLOC_INSTALLED_MALL_SYMS)) { ucm_debug("installing malloc relocations"); ucm_malloc_populate_glibc_cache(); diff --git a/src/ucm/mmap/install.c b/src/ucm/mmap/install.c index 9e903bb4af7..e3cff956d97 100644 --- a/src/ucm/mmap/install.c +++ b/src/ucm/mmap/install.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -107,7 +106,7 @@ static ucs_status_t ucs_mmap_install_reloc(int events) ucm_mmap_func_t *entry; ucs_status_t status; - if (!ucm_global_config.enable_mmap_reloc) { + if (!ucm_global_opts.enable_mmap_reloc) { ucm_debug("installing mmap relocations is disabled by configuration"); return UCS_ERR_UNSUPPORTED; } diff --git a/src/ucm/util/log.c b/src/ucm/util/log.c index 9a9a487a466..ea6748fbc3e 100644 --- a/src/ucm/util/log.c +++ b/src/ucm/util/log.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/ucm/util/log.h b/src/ucm/util/log.h index 3c186dc7d54..c69782341cc 100644 --- a/src/ucm/util/log.h +++ b/src/ucm/util/log.h @@ -11,13 +11,12 @@ # include "config.h" #endif +#include #include -#include "ucm_config.h" - #define ucm_log(_level, _message, ...) \ - if (((_level) <= UCS_MAX_LOG_LEVEL) && ((_level) <= ucm_global_config.log_level)) { \ + if (((_level) <= UCS_MAX_LOG_LEVEL) && ((_level) <= ucm_global_opts.log_level)) { \ __ucm_log(__FILE__, __LINE__, __FUNCTION__, (_level), _message, \ ## __VA_ARGS__); \ } diff --git a/src/ucm/util/reloc.c b/src/ucm/util/reloc.c index cce6493fabd..fb84f13f881 100644 --- a/src/ucm/util/reloc.c +++ b/src/ucm/util/reloc.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/src/ucm/util/sys.c b/src/ucm/util/sys.c index bee9b48f288..56c079103ee 100644 --- a/src/ucm/util/sys.c +++ b/src/ucm/util/sys.c @@ -20,6 +20,16 @@ #define UCM_PROC_SELF_MAPS "/proc/self/maps" +ucm_global_config_t ucm_global_opts = { + .log_level = UCS_LOG_LEVEL_WARN, + .enable_events = 1, + .enable_mmap_reloc = 1, + .enable_malloc_hooks = 1, + .enable_malloc_reloc = 0, + .enable_cuda_reloc = 1, + .enable_dynamic_mmap_thresh = 1, + .alloc_alignment = 16 +}; static size_t ucm_get_page_size() { diff --git a/src/ucm/util/ucm_config.c b/src/ucm/util/ucm_config.c deleted file mode 100644 index 29657e9d2fd..00000000000 --- a/src/ucm/util/ucm_config.c +++ /dev/null @@ -1,222 +0,0 @@ -/** - * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. - * - * See file LICENSE for terms. - */ - -#include "ucm_config.h" - -#include -#include -#include -#include -#include -#include - -#define UCM_ENV_PREFIX UCS_CONFIG_PREFIX "MEM_" - -#define UCM_LOG_LEVEL_VAR "LOG_LEVEL" -#define UCM_ALLOC_ALIGN_VAR "ALLOC_ALIGN" -#define UCM_EN_EVENTS_VAR "EVENTS" -#define UCM_EN_MMAP_RELOC_VAR "MMAP_RELOC" -#define UCM_EN_MALLOC_HOOKS_VAR "MALLOC_HOOKS" -#define UCM_EN_MALLOC_RELOC_VAR "MALLOC_RELOC" -#define UCM_EN_DYNAMIC_MMAP_VAR "DYNAMIC_MMAP_THRESH" -#define UCM_EN_CUDA_HOOKS_VAR "CUDA_HOOKS" - - -ucm_config_t ucm_global_config = { - .log_level = UCS_LOG_LEVEL_WARN, - .alloc_alignment = 16, - .enable_events = 1, - .enable_mmap_reloc = 1, - .enable_malloc_hooks = 1, - .enable_malloc_reloc = 0, - .enable_dynamic_mmap_thresh = 1, -#if HAVE_CUDA - .enable_cuda_hooks = 1 -#endif -}; - -static const char *ucm_config_bool_to_string(int value) -{ - return value ? "yes" : "no"; -} - -static void ucm_config_print_doc(FILE *stream, const char *doc, const char *syntax, - ucs_config_print_flags_t print_flags) -{ - if (!(print_flags & UCS_CONFIG_PRINT_DOC)) { - return; - } - - fprintf(stream, "\n"); - fprintf(stream, "#\n"); - fprintf(stream, "# %s\n", doc); - fprintf(stream, "#\n"); - fprintf(stream, "# Syntax: %s\n", syntax); - fprintf(stream, "#\n"); -} - -static void ucm_config_print_bool_doc(FILE *stream, const char *doc, - ucs_config_print_flags_t print_flags) -{ - ucm_config_print_doc(stream, doc, "", print_flags); -} - -void ucm_config_print(FILE *stream, ucs_config_print_flags_t print_flags) -{ - if (print_flags & UCS_CONFIG_PRINT_HEADER) { - fprintf(stream, "#\n"); - fprintf(stream, "# UCM configuration\n"); - fprintf(stream, "#\n"); - } - - if (!(print_flags & UCS_CONFIG_PRINT_CONFIG)) { - return; - } - - ucm_config_print_doc(stream, - "Logging level", "", - print_flags); - fprintf(stream, "%s%s=%s\n", UCM_ENV_PREFIX, UCM_LOG_LEVEL_VAR, - ucm_log_level_names[ucm_global_config.log_level]); - - ucm_config_print_doc(stream, - "Minimal alignment of allocated blocks", - "long integer", print_flags); - fprintf(stream, "%s%s=%zu\n", UCM_ENV_PREFIX, UCM_ALLOC_ALIGN_VAR, - ucm_global_config.alloc_alignment); - - ucm_config_print_bool_doc(stream, - "Enable memory events", - print_flags); - fprintf(stream, "%s%s=%s\n", UCM_ENV_PREFIX, UCM_EN_EVENTS_VAR, - ucm_config_bool_to_string(ucm_global_config.enable_events)); - - ucm_config_print_bool_doc(stream, - "Enable installing mmap symbols in the relocation table", - print_flags); - fprintf(stream, "%s%s=%s\n", UCM_ENV_PREFIX, UCM_EN_MMAP_RELOC_VAR, - ucm_config_bool_to_string(ucm_global_config.enable_mmap_reloc)); - - ucm_config_print_bool_doc(stream, - "Enable using glibc malloc hooks", - print_flags); - fprintf(stream, "%s%s=%s\n", UCM_ENV_PREFIX, UCM_EN_MALLOC_HOOKS_VAR, - ucm_config_bool_to_string(ucm_global_config.enable_malloc_hooks)); - - ucm_config_print_bool_doc(stream, - "Enable installing malloc symbols in the relocation table.\n" - "This is unsafe and off by default, because sometimes glibc\n" - "calls malloc/free without going through the relocation table,\n" - "which would use the original implementation and not ours.", - print_flags); - fprintf(stream, "%s%s=%s\n", UCM_ENV_PREFIX, UCM_EN_MALLOC_RELOC_VAR, - ucm_config_bool_to_string(ucm_global_config.enable_malloc_reloc)); - - - ucm_config_print_bool_doc(stream, - "Enable dynamic mmap threshold: for every released block, the\n" - "mmap threshold is adjusted upward to the size of the size of\n" - "the block, and trim threshold is adjust to twice the size of\n" - "the dynamic mmap threshold.", - print_flags); - fprintf(stream, "%s%s=%s\n", UCM_ENV_PREFIX, UCM_EN_DYNAMIC_MMAP_VAR, - ucm_config_bool_to_string(ucm_global_config.enable_dynamic_mmap_thresh)); - - -#if HAVE_CUDA - fprintf(stream, "%s%s=%s\n", UCM_ENV_PREFIX, UCM_EN_CUDA_HOOKS_VAR, - ucm_config_bool_to_string(ucm_global_config.enable_cuda_hooks)); -#endif -} - -static void ucm_config_set_value_table(const char *str_value, const char **table, - int *value) -{ - int i; - - for (i = 0; table[i] != NULL; ++i) { - if (!strcasecmp(table[i], str_value)) { - ucm_global_config.log_level = i; - return; - } - } -} - -static void ucm_config_set_value_bool(const char *str_value, int *value) -{ - if (!strcasecmp(str_value, "1") || !strcasecmp(str_value, "y") || !strcasecmp(str_value, "yes")) { - *value = 1; - } else if (!strcasecmp(str_value, "0") || !strcasecmp(str_value, "n") || !strcasecmp(str_value, "no")) { - *value = 0; - } -} - -static void ucm_config_set_value_size(const char *str_value, size_t *value) -{ - char *endptr; - size_t n; - - n = strtoul(str_value, &endptr, 10); - if (*endptr == '\0') { - *value = n; - } -} - -ucs_status_t ucm_config_modify(const char *name, const char *value) -{ - if (!strcmp(name, UCM_LOG_LEVEL_VAR)) { - ucm_config_set_value_table(value, ucm_log_level_names, - (int*)&ucm_global_config.log_level); - } else if (!strcmp(name, UCM_ALLOC_ALIGN_VAR)) { - ucm_config_set_value_size(value, &ucm_global_config.alloc_alignment); - } else if (!strcmp(name, UCM_EN_EVENTS_VAR)) { - ucm_config_set_value_bool(value, &ucm_global_config.enable_events); - } else if (!strcmp(name, UCM_EN_MMAP_RELOC_VAR)) { - ucm_config_set_value_bool(value, &ucm_global_config.enable_mmap_reloc); - } else if (!strcmp(name, UCM_EN_MALLOC_HOOKS_VAR)) { - ucm_config_set_value_bool(value, &ucm_global_config.enable_malloc_hooks); - } else if (!strcmp(name, UCM_EN_MALLOC_RELOC_VAR)) { - ucm_config_set_value_bool(value, &ucm_global_config.enable_malloc_reloc); - } else if (!strcmp(name, UCM_EN_DYNAMIC_MMAP_VAR)) { - ucm_config_set_value_bool(value, &ucm_global_config.enable_dynamic_mmap_thresh); -#if HAVE_CUDA - } else if (!strcmp(name, UCM_EN_CUDA_HOOKS_VAR)) { - ucm_config_set_value_bool(value, &ucm_global_config.enable_cuda_hooks); -#endif - } else { - return UCS_ERR_INVALID_PARAM; - } - - return UCS_OK; -} - -static void ucm_config_set(const char *name) -{ - char var_name[64]; - char *str_value; - - snprintf(var_name, sizeof(var_name), "%s%s", UCM_ENV_PREFIX, name); - str_value = getenv(var_name); - if (str_value != NULL) { - ucm_config_modify(name, str_value); - } -} - -UCS_STATIC_INIT { - if (RUNNING_ON_VALGRIND) { - /* Valgrind limits the size of brk() segments to 8mb, so must use mmap - * for large allocations. - */ - ucm_global_config.enable_dynamic_mmap_thresh = 0; - } - ucm_config_set(UCM_LOG_LEVEL_VAR); - ucm_config_set(UCM_ALLOC_ALIGN_VAR); - ucm_config_set(UCM_EN_EVENTS_VAR); - ucm_config_set(UCM_EN_MMAP_RELOC_VAR); - ucm_config_set(UCM_EN_MALLOC_HOOKS_VAR); - ucm_config_set(UCM_EN_MALLOC_RELOC_VAR); - ucm_config_set(UCM_EN_DYNAMIC_MMAP_VAR); -} diff --git a/src/ucm/util/ucm_config.h b/src/ucm/util/ucm_config.h deleted file mode 100644 index c99fcd50378..00000000000 --- a/src/ucm/util/ucm_config.h +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. - * - * See file LICENSE for terms. - */ - -#ifndef UCM_UTIL_CONFIG_H_ -#define UCM_UTIL_CONFIG_H_ - -#include -#include -#include - - -typedef struct ucm_config { - ucs_log_level_t log_level; - int enable_events; - int enable_mmap_reloc; - int enable_malloc_hooks; - int enable_malloc_reloc; - int enable_dynamic_mmap_thresh; -#if HAVE_CUDA - int enable_cuda_hooks; -#endif - size_t alloc_alignment; -} ucm_config_t; - - -extern ucm_config_t ucm_global_config; - -#endif diff --git a/src/ucs/Makefile.am b/src/ucs/Makefile.am index 7d6df44280c..2be2b501e53 100644 --- a/src/ucs/Makefile.am +++ b/src/ucs/Makefile.am @@ -98,6 +98,7 @@ libucs_la_SOURCES = \ async/pipe.c \ async/thread.c \ config/global_opts.c \ + config/ucm_opts.c \ config/parser.c \ datastruct/arbiter.c \ datastruct/callbackq.c \ diff --git a/src/ucs/config/ucm_opts.c b/src/ucs/config/ucm_opts.c new file mode 100644 index 00000000000..ae331d26abe --- /dev/null +++ b/src/ucs/config/ucm_opts.c @@ -0,0 +1,67 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "parser.h" + +#include +#include + + +#define UCM_CONFIG_PREFIX "MEM_" + +static ucs_config_field_t ucm_global_config_table[] = { + {"LOG_LEVEL", "warn", + "Logging level for memory events", ucs_offsetof(ucm_global_config_t, log_level), + UCS_CONFIG_TYPE_ENUM(ucm_log_level_names)}, + + {"ALLOC_ALIGN", "16", + "Minimal alignment of allocated blocks", + ucs_offsetof(ucm_global_config_t, alloc_alignment), UCS_CONFIG_TYPE_MEMUNITS}, + + {"EVENTS", "yes", + "Enable memory events", + ucs_offsetof(ucm_global_config_t, enable_events), UCS_CONFIG_TYPE_BOOL}, + + {"MMAP_RELOC", "yes", + "Enable installing mmap symbols in the relocation table", + ucs_offsetof(ucm_global_config_t, enable_mmap_reloc), UCS_CONFIG_TYPE_BOOL}, + + {"MALLOC_HOOKS", "yes", + "Enable using glibc malloc hooks", + ucs_offsetof(ucm_global_config_t, enable_malloc_hooks), + UCS_CONFIG_TYPE_BOOL}, + + {"MALLOC_RELOC", "yes", + "Enable installing malloc symbols in the relocation table.\n" + "This is unsafe and off by default, because sometimes glibc\n" + "calls malloc/free without going through the relocation table,\n" + "which would use the original implementation and not ours.", + ucs_offsetof(ucm_global_config_t, enable_malloc_reloc), UCS_CONFIG_TYPE_BOOL}, + + {"CUDA_RELOC", "yes", + "Enable installing CUDA symbols in the relocation table", + ucs_offsetof(ucm_global_config_t, enable_dynamic_mmap_thresh), + UCS_CONFIG_TYPE_BOOL}, + + {"DYNAMIC_MMAP_THRESH", "yes", + "Enable dynamic mmap threshold: for every released block, the\n" + "mmap threshold is adjusted upward to the size of the size of\n" + "the block, and trim threshold is adjust to twice the size of\n" + "the dynamic mmap threshold.\n" + "Note: dynamic mmap threshold is disabled when running on valgrind.", + ucs_offsetof(ucm_global_config_t, enable_dynamic_mmap_thresh), + UCS_CONFIG_TYPE_BOOL}, + + {NULL} +}; + +UCS_CONFIG_REGISTER_TABLE(ucm_global_config_table, "UCM", UCM_CONFIG_PREFIX, + ucm_global_config_t) + +UCS_STATIC_INIT { + (void)ucs_config_parser_fill_opts(&ucm_global_opts, ucm_global_config_table, + NULL, UCM_CONFIG_PREFIX, 0); +} diff --git a/test/gtest/common/main.cc b/test/gtest/common/main.cc index 0cde3173af8..8ec43d759d3 100644 --- a/test/gtest/common/main.cc +++ b/test/gtest/common/main.cc @@ -79,8 +79,8 @@ int main(int argc, char **argv) { modify_config_for_valgrind("IB_TX_BUFS_GROW", "64"); modify_config_for_valgrind("RC_TX_CQ_LEN", "256"); modify_config_for_valgrind("CM_TIMEOUT", "600ms"); - ucm_config_modify("MALLOC_RELOC", "y"); /* Test reloc hooks with valgrind, - though it's generally unsafe. */ + ucm_global_opts.enable_malloc_reloc = 1; /* Test reloc hooks with valgrind, + though it's generally unsafe. */ } return RUN_ALL_TESTS(); } diff --git a/test/gtest/ucm/malloc_hook.cc b/test/gtest/ucm/malloc_hook.cc index dbb767a07d3..dfe718a7ba0 100644 --- a/test/gtest/ucm/malloc_hook.cc +++ b/test/gtest/ucm/malloc_hook.cc @@ -19,7 +19,6 @@ extern "C" { #include #include -#include #include #include } @@ -384,11 +383,11 @@ class malloc_hook_cplusplus : public malloc_hook { malloc_hook_cplusplus() : m_mapped_size(0), m_unmapped_size(0), - m_dynamic_mmap_config(ucm_global_config.enable_dynamic_mmap_thresh) { + m_dynamic_mmap_config(ucm_global_opts.enable_dynamic_mmap_thresh) { } ~malloc_hook_cplusplus() { - ucm_global_config.enable_dynamic_mmap_thresh = m_dynamic_mmap_config; + ucm_global_opts.enable_dynamic_mmap_thresh = m_dynamic_mmap_config; } void set() { @@ -459,7 +458,7 @@ class malloc_hook_cplusplus : public malloc_hook { m_unmapped_size = 0; strs.clear(); - if (ucm_global_config.enable_dynamic_mmap_thresh) { + if (ucm_global_opts.enable_dynamic_mmap_thresh) { EXPECT_EQ(0ul, m_unmapped_size); } else { EXPECT_GE(m_unmapped_size, size); @@ -503,12 +502,12 @@ UCS_TEST_F(malloc_hook_cplusplus, dynamic_mmap_enable) { if (RUNNING_ON_VALGRIND) { UCS_TEST_SKIP_R("skipping on valgrind"); } - EXPECT_TRUE(ucm_global_config.enable_dynamic_mmap_thresh); + EXPECT_TRUE(ucm_global_opts.enable_dynamic_mmap_thresh); test_dynamic_mmap_thresh(); } UCS_TEST_F(malloc_hook_cplusplus, dynamic_mmap_disable) { - ucm_global_config.enable_dynamic_mmap_thresh = 0; + ucm_global_opts.enable_dynamic_mmap_thresh = 0; test_dynamic_mmap_thresh(); } @@ -585,7 +584,7 @@ UCS_TEST_F(malloc_hook_cplusplus, mmap_ptrs) { UCS_TEST_SKIP_R("skipping on valgrind"); } - ucm_global_config.enable_dynamic_mmap_thresh = 0; + ucm_global_opts.enable_dynamic_mmap_thresh = 0; set(); const size_t size = ucm_dlmallopt_get(M_MMAP_THRESHOLD) * 2; diff --git a/test/mpi/test_memhooks.c b/test/mpi/test_memhooks.c index c029c9b64e9..e198c09465f 100644 --- a/test/mpi/test_memhooks.c +++ b/test/mpi/test_memhooks.c @@ -13,6 +13,7 @@ #include #include #include +#include #define CHKERR_JUMP(cond, msg, label) \ do { \ @@ -78,7 +79,7 @@ static void event_callback(ucm_event_type_t event_type, ucm_event_t *event, } } -static ucs_status_t set_event_handler (void *dl, int events) +static ucs_status_t set_event_handler(void *dl, int events) { ucs_status_t (*set_handler)(int events, int priority, ucm_event_callback_t cb, void *arg); @@ -91,18 +92,9 @@ static ucs_status_t set_event_handler (void *dl, int events) static ucs_status_t disable_memory_hooks(void *dl) { - ucs_status_t (*modify_cfg)(const char *name, const char *val); - ucs_status_t status; - - DL_FIND_FUNC(dl, "ucm_config_modify", modify_cfg, - return UCS_ERR_UNSUPPORTED); - - status = modify_cfg("MALLOC_HOOKS", "no"); - if (status == UCS_OK) { - status = modify_cfg("MMAP_RELOC", "no"); - } - - return status; + setenv("UCX_MEM_MALLOC_HOOKS", "n", 1); + setenv("UCX_MEM_MMAP_RELOC", "n", 1); + return UCS_OK; } void* open_dyn_lib(const char *lib_path) From c4aa77f4c2b4b0998690a6a057b8fc2ebf3562b9 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Thu, 1 Mar 2018 21:48:41 +0200 Subject: [PATCH 2/3] UCT/MLX5: added DM initialization - added basic initialization of Device Memory - DM is disabled by default --- config/m4/ib.m4 | 13 +- src/uct/ib/mlx5/ib_mlx5.h | 1 + src/uct/ib/rc/accel/rc_mlx5_common.c | 182 ++++++++++++++++++++++++++- src/uct/ib/rc/accel/rc_mlx5_common.h | 26 ++++ 4 files changed, 216 insertions(+), 6 deletions(-) diff --git a/config/m4/ib.m4 b/config/m4/ib.m4 index 3dd93c8c5f4..968015a7363 100644 --- a/config/m4/ib.m4 +++ b/config/m4/ib.m4 @@ -290,10 +290,17 @@ AS_IF([test "x$with_ib" == xyes], # Device Memory support AS_IF([test "x$with_dm" != xno], - [AC_CHECK_DECLS([ibv_exp_alloc_dm, ibv_exp_reg_mr, ibv_dereg_mr, ibv_exp_free_dm], + [AC_TRY_COMPILE([#include ], + [ + struct ibv_exp_dm ibv_dm; + struct ibv_exp_alloc_dm_attr dm_attr; + void* a1 = ibv_exp_alloc_dm; + void* a2 = ibv_exp_reg_mr; + void* a3 = ibv_dereg_mr; + void* a4 = ibv_exp_free_dm; + ], [AC_DEFINE([HAVE_IBV_EXP_DM], 1, [Device Memory support])], - [], - [infiniband/verbs_exp.h]) + []) ]) diff --git a/src/uct/ib/mlx5/ib_mlx5.h b/src/uct/ib/mlx5/ib_mlx5.h index 372fc68fc03..bf8525b63cd 100644 --- a/src/uct/ib/mlx5/ib_mlx5.h +++ b/src/uct/ib/mlx5/ib_mlx5.h @@ -29,6 +29,7 @@ #define UCT_IB_MLX5_CQE128_SIZE_LOG 7 #define UCT_IB_MLX5_MAX_BB 4 #define UCT_IB_MLX5_WORKER_BF_KEY 0x00c1b7e8u +#define UCT_IB_MLX5_WORKER_DM_KEY 0xacdf1245u #define UCT_IB_MLX5_EXTENDED_UD_AV 0x80 /* htonl(0x80000000) */ #define UCT_IB_MLX5_BF_REG_SIZE 256 #define UCT_IB_MLX5_CQE_VENDOR_SYND_ODP 0x93 diff --git a/src/uct/ib/rc/accel/rc_mlx5_common.c b/src/uct/ib/rc/accel/rc_mlx5_common.c index f6abe912588..64711db6de1 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_common.c +++ b/src/uct/ib/rc/accel/rc_mlx5_common.c @@ -25,16 +25,28 @@ ucs_stats_class_t uct_rc_mlx5_iface_stats_class = { ucs_config_field_t uct_mlx5_common_config_table[] = { #if HAVE_IBV_EXP_DM - {"DM_SIZE", "1k", + /* TODO: set 1k limit */ + {"DM_SIZE", "0", "Device Memory segment size (0 - disabled)", ucs_offsetof(uct_common_mlx5_iface_config_t, dm.seg_len), UCS_CONFIG_TYPE_MEMUNITS}, - {"DM_COUNT", "1", + /* TODO: set 1 buffer limit */ + {"DM_COUNT", "0", "Device Memory segments count (0 - disabled)", ucs_offsetof(uct_common_mlx5_iface_config_t, dm.count), UCS_CONFIG_TYPE_UINT}, #endif {NULL} }; +#if HAVE_IBV_EXP_DM +/* uct_mlx5_dm_va is used to get pointer to DM mapped into process address space */ +typedef struct uct_mlx5_dm_va { + struct ibv_exp_dm ibv_dm; + size_t length; + uint64_t *start_va; +} uct_mlx5_dm_va_t; +#endif + + unsigned uct_rc_mlx5_iface_srq_post_recv(uct_rc_iface_t *iface, uct_ib_mlx5_srq_t *srq) { uct_ib_mlx5_srq_seg_t *seg; @@ -220,6 +232,159 @@ void uct_rc_mlx5_iface_common_tag_cleanup(uct_rc_mlx5_iface_common_t *iface, #endif } + +#if HAVE_IBV_EXP_DM +static ucs_status_t +ucs_iface_dm_mpool_chunk_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + ucs_status_t status; + + status = ucs_mpool_chunk_malloc(mp, size_p, chunk_p); + if (status == UCS_OK) { + memset(*chunk_p, 0, *size_p); + } + + return status; +} + +static void uct_iface_dm_mp_obj_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_mlx5_dm_data_t *dm = ucs_container_of(mp, uct_mlx5_dm_data_t, mp); + uct_rc_iface_send_desc_t* desc = (uct_rc_iface_send_desc_t*)obj; + + ucs_assert(desc->super.buffer == NULL); + ucs_assert(dm->seg_attached < dm->seg_count); + + desc->lkey = dm->mr->lkey; + desc->super.buffer = UCS_PTR_BYTE_OFFSET(dm->start_va, dm->seg_attached * dm->seg_len); + desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; + dm->seg_attached++; +} + +static ucs_mpool_ops_t uct_dm_iface_mpool_ops = { + .chunk_alloc = ucs_iface_dm_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = uct_iface_dm_mp_obj_init, + .obj_cleanup = NULL +}; + + +static int uct_mlx5_iface_dm_device_cmp(uct_mlx5_dm_data_t *dm_data, + uct_rc_iface_t *iface, + const uct_common_mlx5_iface_config_t *config) +{ + uct_ib_device_t *dev = uct_ib_iface_device(&iface->super); + + return dm_data->device->ibv_context == dev->ibv_context; +} + +static ucs_status_t uct_mlx5_iface_init_dm_tl(uct_mlx5_dm_data_t *data, + uct_rc_iface_t *iface, + const uct_common_mlx5_iface_config_t *config) +{ + ucs_status_t status; + struct ibv_exp_alloc_dm_attr dm_attr; + struct ibv_exp_reg_mr_in mr_in; + + data->seg_len = ucs_align_up(config->dm.seg_len, 8); + data->seg_count = config->dm.count; + data->seg_attached = 0; + data->device = uct_ib_iface_device(&iface->super); + + dm_attr.length = data->seg_len * data->seg_count; + dm_attr.comp_mask = 0; + data->dm = ibv_exp_alloc_dm(data->device->ibv_context, &dm_attr); + + if (data->dm == NULL) { + /* TODO: prompt warning? */ + ucs_debug("ibv_exp_alloc_dm(dev=%s length=%zu) failed: %m", + uct_ib_device_name(data->device), dm_attr.length); + return UCS_OK; + } + + memset(&mr_in, 0, sizeof(mr_in)); + mr_in.pd = uct_ib_iface_md(&iface->super)->pd; + mr_in.comp_mask = IBV_EXP_REG_MR_DM; + mr_in.dm = data->dm; + mr_in.length = dm_attr.length; + data->mr = ibv_exp_reg_mr(&mr_in); + if (data->mr == NULL) { + ucs_warn("ibv_exp_reg_mr() error - On Device Memory registration failed, %d %m", errno); + goto failed_mr; + } + + data->start_va = ((uct_mlx5_dm_va_t*)data->dm)->start_va; + + status = ucs_mpool_init(&data->mp, 0, + sizeof(uct_rc_iface_send_desc_t), 0, UCS_SYS_CACHE_LINE_SIZE, + data->seg_count, data->seg_count, + &uct_dm_iface_mpool_ops, "mlx5_dm_desc"); + if (status != UCS_OK) { + goto failed_mpool; + } + + /* DM initialization may fail due to any reason, just + * free resources & continue without DM */ + return UCS_OK; + +failed_mpool: + ibv_dereg_mr(data->mr); +failed_mr: + ibv_exp_free_dm(data->dm); + data->dm = NULL; + return UCS_OK; +} + +static void uct_mlx5_iface_cleanup_dm_tl(uct_mlx5_dm_data_t *data) +{ + ucs_assert(data->dm != NULL); + ucs_assert(data->mr != NULL); + + ucs_mpool_cleanup(&data->mp, 1); + ibv_dereg_mr(data->mr); + ibv_exp_free_dm(data->dm); +} +#endif + +static ucs_status_t uct_mlx5_iface_init_dm(uct_rc_mlx5_iface_common_t *iface, + uct_rc_iface_t *rc_iface, + const uct_common_mlx5_iface_config_t *config) +{ +#if HAVE_IBV_EXP_DM + if ((config->dm.seg_len * config->dm.count) == 0) { + iface->dm.dm = NULL; + return UCS_OK; + } + + iface->dm.dm = uct_worker_tl_data_get(rc_iface->super.super.worker, + UCT_IB_MLX5_WORKER_DM_KEY, + uct_mlx5_dm_data_t, + uct_mlx5_iface_dm_device_cmp, + uct_mlx5_iface_init_dm_tl, rc_iface, config); + if (iface->dm.dm == NULL) { + return UCS_OK; + } + + if (iface->dm.dm->dm == NULL) { + /* failed to initialize DM. put back object */ + uct_worker_tl_data_put(iface->dm.dm, uct_mlx5_iface_cleanup_dm_tl); + iface->dm.dm = NULL; + } else { + iface->dm.seg_len = iface->dm.dm->seg_len; + } +#endif + return UCS_OK; +} + +static void uct_mlx_iface_dm_cleanup(uct_rc_mlx5_iface_common_t *iface) +{ +#if HAVE_IBV_EXP_DM + if (iface->dm.dm) { + uct_worker_tl_data_put(iface->dm.dm, uct_mlx5_iface_cleanup_dm_tl); + } +#endif +} + ucs_status_t uct_rc_mlx5_iface_common_init(uct_rc_mlx5_iface_common_t *iface, uct_rc_iface_t *rc_iface, uct_rc_iface_config_t *config, @@ -243,6 +408,11 @@ ucs_status_t uct_rc_mlx5_iface_common_init(uct_rc_mlx5_iface_common_t *iface, return status; } + status = uct_mlx5_iface_init_dm(iface, rc_iface, common_config); + if (status != UCS_OK) { + return status; + } + rc_iface->rx.srq.quota = iface->rx.srq.mask + 1; /* By default set to something that is always in cache */ @@ -251,7 +421,7 @@ ucs_status_t uct_rc_mlx5_iface_common_init(uct_rc_mlx5_iface_common_t *iface, status = UCS_STATS_NODE_ALLOC(&iface->stats, &uct_rc_mlx5_iface_stats_class, rc_iface->stats); if (status != UCS_OK) { - return status; + goto cleanup_dm; } status = uct_iface_mpool_init(&rc_iface->super.super, @@ -265,6 +435,7 @@ ucs_status_t uct_rc_mlx5_iface_common_init(uct_rc_mlx5_iface_common_t *iface, "rc_mlx5_atomic_desc"); if (status != UCS_OK) { UCS_STATS_NODE_FREE(iface->stats); + goto cleanup_dm; } /* For little-endian atomic reply, override the default functions, to still @@ -280,6 +451,10 @@ ucs_status_t uct_rc_mlx5_iface_common_init(uct_rc_mlx5_iface_common_t *iface, rc_iface->config.atomic64_ext_handler = uct_rc_mlx5_common_atomic64_le_handler; } + return UCS_OK; + +cleanup_dm: + uct_mlx_iface_dm_cleanup(iface); return status; } @@ -287,6 +462,7 @@ void uct_rc_mlx5_iface_common_cleanup(uct_rc_mlx5_iface_common_t *iface) { UCS_STATS_NODE_FREE(iface->stats); ucs_mpool_cleanup(&iface->tx.atomic_desc_mp, 1); + uct_mlx_iface_dm_cleanup(iface); } void uct_rc_mlx5_iface_common_query(uct_iface_attr_t *iface_attr) diff --git a/src/uct/ib/rc/accel/rc_mlx5_common.h b/src/uct/ib/rc/accel/rc_mlx5_common.h index dd069c2f87b..52282b2d4dd 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_common.h +++ b/src/uct/ib/rc/accel/rc_mlx5_common.h @@ -116,6 +116,20 @@ typedef struct uct_rc_mlx5_cmd_wq { #endif /* IBV_EXP_HW_TM */ +#if HAVE_IBV_EXP_DM +typedef struct uct_mlx5_dm_data { + uct_worker_tl_data_t super; + ucs_mpool_t mp; + struct ibv_mr *mr; + struct ibv_exp_dm *dm; + void *start_va; + size_t seg_len; + unsigned seg_count; + unsigned seg_attached; + uct_ib_device_t *device; +} uct_mlx5_dm_data_t; +#endif + typedef struct uct_common_mlx5_iface_config { #if HAVE_IBV_EXP_DM struct { @@ -142,6 +156,18 @@ typedef struct uct_rc_mlx5_iface_common { uct_rc_mlx5_tag_entry_t *tail; uct_rc_mlx5_tag_entry_t *list; } tm; +#endif +#if HAVE_IBV_EXP_DM + struct { + uct_mlx5_dm_data_t *dm; + size_t seg_len; /* cached value to avoid double-pointer access */ + ucs_status_t (*am_short)(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *payload, unsigned length); +#if IBV_EXP_HW_TM + ucs_status_t (*tag_short)(uct_ep_h tl_ep, uct_tag_t tag, + const void *data, size_t length); +#endif + } dm; #endif UCS_STATS_NODE_DECLARE(stats); } uct_rc_mlx5_iface_common_t; From 3942a671c35454935cfb332348aeffebd29bc61f Mon Sep 17 00:00:00 2001 From: Mikhail Brinskii Date: Tue, 6 Mar 2018 22:07:06 +0200 Subject: [PATCH 3/3] UCT/DC/MLX5: Register correct progress cb in worker --- src/uct/ib/dc/accel/dc_mlx5.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/uct/ib/dc/accel/dc_mlx5.c b/src/uct/ib/dc/accel/dc_mlx5.c index 6c236d12bbe..7bec3a57bfb 100644 --- a/src/uct/ib/dc/accel/dc_mlx5.c +++ b/src/uct/ib/dc/accel/dc_mlx5.c @@ -771,6 +771,13 @@ static unsigned uct_dc_mlx5_iface_progress_tm(void *arg) } #endif +static void uct_dc_mlx5_iface_progress_enable(uct_iface_h tl_iface, unsigned flags) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_iface_t); + + uct_base_iface_progress_enable_cb(&iface->super.super, iface->progress, flags); +} + static void uct_dc_mlx5_iface_handle_failure(uct_ib_iface_t *ib_iface, void *arg, ucs_status_t status) { @@ -944,7 +951,7 @@ static uct_dc_iface_ops_t uct_dc_mlx5_iface_ops = { #endif .iface_flush = uct_dc_iface_flush, .iface_fence = uct_base_iface_fence, - .iface_progress_enable = uct_base_iface_progress_enable, + .iface_progress_enable = uct_dc_mlx5_iface_progress_enable, .iface_progress_disable = uct_base_iface_progress_disable, .iface_progress = uct_rc_iface_do_progress, .iface_event_fd_get = uct_ib_iface_event_fd_get,