diff --git a/config/m4/ucm.m4 b/config/m4/ucm.m4 index 1e229edc51f..8d7a9e40ec0 100644 --- a/config/m4/ucm.m4 +++ b/config/m4/ucm.m4 @@ -46,6 +46,13 @@ AC_CHECK_DECLS([MADV_FREE, [#include ]) +# +# getauxval() +# +AC_CHECK_DECLS([getauxval], [], [], + [#include ]) + + # BISTRO hooks infrastructure # # SYS_xxx macro diff --git a/src/ucm/cuda/cudamem.c b/src/ucm/cuda/cudamem.c index 23a9c5d1806..fcae85b50b0 100644 --- a/src/ucm/cuda/cudamem.c +++ b/src/ucm/cuda/cudamem.c @@ -194,75 +194,66 @@ static ucm_cuda_func_t ucm_cuda_runtime_funcs[] = { {{NULL}, NULL} }; -static int ucm_cuda_allow_hook_mode(ucm_mmap_hook_mode_t mode) -{ - return (ucm_global_opts.cuda_hook_modes & UCS_BIT(mode)) && - (ucm_get_hook_mode(mode) == mode); -} - static ucs_status_t -ucm_cuda_install_hooks(ucm_cuda_func_t *funcs, int *used_reloc, - const char *name) +ucm_cuda_install_hooks(ucm_cuda_func_t *funcs, const char *name, + ucm_mmap_hook_mode_t mode, int *installed_hooks_p) { - const char UCS_V_UNUSED *hook_mode; - unsigned num_bistro, num_reloc; ucm_cuda_func_t *func; ucs_status_t status; void *func_ptr; + int count; + + if (*installed_hooks_p & UCS_BIT(mode)) { + return UCS_OK; + } - num_bistro = 0; - num_reloc = 0; + if (!(ucm_global_opts.cuda_hook_modes & UCS_BIT(mode))) { + /* Disabled by configuration */ + ucm_debug("cuda memory hooks mode %s is disabled for %s API", + ucm_mmap_hook_modes[mode], name); + return UCS_OK; + } + + count = 0; for (func = funcs; func->patch.symbol != NULL; ++func) { func_ptr = ucm_reloc_get_orig(func->patch.symbol, func->patch.value); if (func_ptr == NULL) { continue; } - status = UCS_ERR_UNSUPPORTED; - - if (ucm_cuda_allow_hook_mode(UCM_MMAP_HOOK_BISTRO)) { + if (mode == UCM_MMAP_HOOK_BISTRO) { status = ucm_bistro_patch(func_ptr, func->patch.value, func->patch.symbol, func->orig_func_ptr, NULL); - if (status == UCS_OK) { - ucm_debug("installed bistro hook for '%s': %d", - func->patch.symbol, status); - ++num_bistro; - continue; - } - - ucm_debug("failed to install bistro hook for '%s'", - func->patch.symbol); + } else if (mode == UCM_MMAP_HOOK_RELOC) { + status = ucm_reloc_modify(&func->patch); + } else { + break; } - if (ucm_cuda_allow_hook_mode(UCM_MMAP_HOOK_RELOC)) { - status = ucm_reloc_modify(&func->patch); - if (status == UCS_OK) { - ++num_reloc; - ucm_debug("installed reloc hook on '%s'", func->patch.symbol); - continue; - } - - ucm_debug("failed to install relocation table hook for '%s'", - func->patch.symbol); + if (status != UCS_OK) { + ucm_diag("failed to install %s hook for '%s'", + ucm_mmap_hook_modes[mode], func->patch.symbol); + return status; } - ucm_diag("failed to install hook for '%s'", func->patch.symbol); - return status; + ucm_debug("installed %s hook for '%s'", ucm_mmap_hook_modes[mode], + func->patch.symbol); + ++count; } - *used_reloc = num_reloc > 0; - ucm_info("cuda memory hooks on %s API: installed %u bistro and %u reloc", - name, num_bistro, num_reloc); + *installed_hooks_p |= UCS_BIT(mode); + ucm_info("cuda memory hooks mode %s: installed %d on %s API", + ucm_mmap_hook_modes[mode], count, name); return UCS_OK; } static ucs_status_t ucm_cudamem_install(int events) { - static int ucm_cudamem_installed = 0; static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER; + static int driver_api_hooks = 0; + static int runtime_api_hooks = 0; ucs_status_t status = UCS_OK; - int used_reloc; if (!(events & (UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE))) { goto out; @@ -276,26 +267,22 @@ static ucs_status_t ucm_cudamem_install(int events) pthread_mutex_lock(&install_mutex); - if (ucm_cudamem_installed) { + status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, "driver", + UCM_MMAP_HOOK_BISTRO, &driver_api_hooks); + if (status != UCS_OK) { goto out_unlock; } - status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, &used_reloc, - "driver"); + status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, "driver", + UCM_MMAP_HOOK_RELOC, &driver_api_hooks); if (status != UCS_OK) { - ucm_warn("failed to install cuda memory hooks on driver API"); - } else if (!used_reloc) { - ucm_cudamem_installed = 1; - } else if (status == UCS_OK) { - /* Failed to install bistro hooks on all driver APIs, so need to install - hooks on runtime APIs. */ - status = ucm_cuda_install_hooks(ucm_cuda_runtime_funcs, &used_reloc, - "runtime"); - if (status == UCS_OK) { - ucm_cudamem_installed = 1; - } else { - ucm_warn("failed to install cuda memory hooks on runtime API") - } + goto out_unlock; + } + + status = ucm_cuda_install_hooks(ucm_cuda_runtime_funcs, "runtime", + UCM_MMAP_HOOK_RELOC, &runtime_api_hooks); + if (status != UCS_OK) { + goto out_unlock; } out_unlock: diff --git a/src/ucm/mmap/install.c b/src/ucm/mmap/install.c index e9573ac7bce..762bee2ae21 100644 --- a/src/ucm/mmap/install.c +++ b/src/ucm/mmap/install.c @@ -58,7 +58,6 @@ ((UCM_MMAP_MAX_EVENT_NAME_LEN + 2) * \ ucs_static_array_size(ucm_mmap_event_name)) -extern const char *ucm_mmap_hook_modes[]; typedef struct ucm_mmap_func { ucm_reloc_patch_t patch; @@ -89,6 +88,15 @@ static ucm_mmap_func_t ucm_mmap_funcs[] = { static pthread_mutex_t ucm_mmap_install_mutex = PTHREAD_MUTEX_INITIALIZER; static int ucm_mmap_installed_events = 0; /* events that were reported as installed */ +const char *ucm_mmap_hook_modes[] = { + [UCM_MMAP_HOOK_NONE] = "none", + [UCM_MMAP_HOOK_RELOC] = UCM_MMAP_HOOK_RELOC_STR, +#if UCM_BISTRO_HOOKS + [UCM_MMAP_HOOK_BISTRO] = UCM_MMAP_HOOK_BISTRO_STR, +#endif + [UCM_MMAP_HOOK_LAST] = NULL +}; + static const char *ucm_mmap_event_name[] = { /* Native events */ UCM_MMAP_EVENT_NAME_ENTRY(MMAP), diff --git a/src/ucm/mmap/mmap.h b/src/ucm/mmap/mmap.h index c7b4e4e37b2..0d3bd15b115 100644 --- a/src/ucm/mmap/mmap.h +++ b/src/ucm/mmap/mmap.h @@ -39,6 +39,13 @@ ucs_status_t ucm_mmap_test_installed_events(int events); ucs_status_t ucm_mmap_test_events(int events, const char *event_type); void ucm_mmap_init(); + +/** + * Memory hooks mode names. + */ +extern const char *ucm_mmap_hook_modes[]; + + static UCS_F_ALWAYS_INLINE ucm_mmap_hook_mode_t ucm_mmap_hook_mode(void) { return ucm_get_hook_mode(ucm_global_opts.mmap_hook_mode); diff --git a/src/ucm/util/reloc.c b/src/ucm/util/reloc.c index bee825b95b7..de890bf19e5 100644 --- a/src/ucm/util/reloc.c +++ b/src/ucm/util/reloc.c @@ -29,6 +29,11 @@ #include #include +#ifdef HAVE_DECL_GETAUXVAL +#include +#endif + + /* Ensure this macro is defined (from ) - otherwise, cppcheck might fail with an "unknown macro" warning */ #ifndef ElfW @@ -116,6 +121,14 @@ static ucs_status_t ucm_reloc_get_aux_phsize(int *phsize_p) return UCS_OK; } +#ifdef HAVE_DECL_GETAUXVAL + phsize = getauxval(AT_PHENT); + if (phsize > 0) { + *phsize_p = phsize; + return UCS_OK; + } +#endif + fd = open(proc_auxv_filename, O_RDONLY); if (fd < 0) { ucm_error("failed to open '%s' for reading: %m", proc_auxv_filename); diff --git a/src/ucs/config/global_opts.c b/src/ucs/config/global_opts.c index cdc4083a240..f0bfba5f840 100644 --- a/src/ucs/config/global_opts.c +++ b/src/ucs/config/global_opts.c @@ -37,7 +37,7 @@ ucs_global_opts_t ucs_global_opts = { .debug_signo = SIGHUP, .log_level_trigger = UCS_LOG_LEVEL_FATAL, .warn_unused_env_vars = 1, - .enable_memtype_cache = 1, + .enable_memtype_cache = UCS_TRY, .async_max_events = 64, .async_signo = SIGALRM, .stats_dest = "", @@ -148,9 +148,9 @@ static ucs_config_field_t ucs_global_opts_table[] = { "configuration parser.", ucs_offsetof(ucs_global_opts_t, warn_unused_env_vars), UCS_CONFIG_TYPE_BOOL}, - {"MEMTYPE_CACHE", "y", + {"MEMTYPE_CACHE", "try", "Enable memory type (cuda/rocm) cache", - ucs_offsetof(ucs_global_opts_t, enable_memtype_cache), UCS_CONFIG_TYPE_BOOL}, + ucs_offsetof(ucs_global_opts_t, enable_memtype_cache), UCS_CONFIG_TYPE_TERNARY}, {"ASYNC_MAX_EVENTS", "1024", /* TODO remove this; resize mpmc */ "Maximal number of events which can be handled from one context", diff --git a/src/ucs/config/global_opts.h b/src/ucs/config/global_opts.h index 82f095cedb8..a11e39989b4 100644 --- a/src/ucs/config/global_opts.h +++ b/src/ucs/config/global_opts.h @@ -80,7 +80,7 @@ typedef struct { unsigned async_max_events; /** Memtype cache */ - int enable_memtype_cache; + ucs_ternary_auto_value_t enable_memtype_cache; /* Destination for statistics: udp:host:port / file:path / stdout */ diff --git a/src/ucs/config/ucm_opts.c b/src/ucs/config/ucm_opts.c index 36661cad15a..c5170e18b65 100644 --- a/src/ucs/config/ucm_opts.c +++ b/src/ucs/config/ucm_opts.c @@ -12,21 +12,13 @@ #include #include +#include #include #include #define UCM_CONFIG_PREFIX "MEM_" -static const char *ucm_mmap_hook_modes[] = { - [UCM_MMAP_HOOK_NONE] = "none", - [UCM_MMAP_HOOK_RELOC] = UCM_MMAP_HOOK_RELOC_STR, -#if UCM_BISTRO_HOOKS - [UCM_MMAP_HOOK_BISTRO] = UCM_MMAP_HOOK_BISTRO_STR, -#endif - [UCM_MMAP_HOOK_LAST] = NULL -}; - static const char *ucm_module_unload_prevent_modes[] = { [UCM_UNLOAD_PREVENT_MODE_LAZY] = "lazy", [UCM_UNLOAD_PREVENT_MODE_NOW] = "now", @@ -71,9 +63,10 @@ static ucs_config_field_t ucm_global_config_table[] = { {"CUDA_HOOK_MODE", #if UCM_BISTRO_HOOKS - UCM_MMAP_HOOK_BISTRO_STR "," -#endif + UCM_MMAP_HOOK_BISTRO_STR, +#else UCM_MMAP_HOOK_RELOC_STR, +#endif "Cuda memory hook modes. A combination of:\n" " none - Don't set Cuda hooks.\n" " reloc - Use ELF relocation table to set hooks. In this mode, if any\n" diff --git a/src/ucs/memory/memtype_cache.c b/src/ucs/memory/memtype_cache.c index 4a88cff7693..ea01d47fc19 100644 --- a/src/ucs/memory/memtype_cache.c +++ b/src/ucs/memory/memtype_cache.c @@ -24,6 +24,7 @@ static ucs_spinlock_t ucs_memtype_cache_global_instance_lock; +static int ucs_memtype_cache_failed = 0; ucs_memtype_cache_t *ucs_memtype_cache_global_instance = NULL; @@ -56,16 +57,23 @@ static UCS_F_ALWAYS_INLINE ucs_memtype_cache_t *ucs_memtype_cache_get_global() ucs_memtype_cache_t *memtype_cache = NULL; ucs_status_t status; - if (!ucs_global_opts.enable_memtype_cache) { + if (ucs_global_opts.enable_memtype_cache == UCS_NO) { return NULL; } /* Double-check lock scheme */ - if (ucs_unlikely(ucs_memtype_cache_global_instance == NULL)) { + if (ucs_unlikely(ucs_memtype_cache_global_instance == NULL) && + !ucs_memtype_cache_failed) { /* Create the memtype cache outside the lock, to avoid a Coverity error of lock inversion with UCS_INIT_ONCE from ucm_set_event_handler() */ status = UCS_CLASS_NEW(ucs_memtype_cache_t, &memtype_cache); if (status != UCS_OK) { + /* If we failed to create the memtype cache once, do not try again */ + ucs_memtype_cache_failed = 1; + if (ucs_global_opts.enable_memtype_cache == UCS_YES) { + ucs_warn("failed to create memtype cache: %s", + ucs_status_string(status)); + } return NULL; } @@ -386,9 +394,9 @@ static UCS_CLASS_INIT_FUNC(ucs_memtype_cache_t) UCM_EVENT_FLAG_EXISTING_ALLOC, 1000, ucs_memtype_cache_event_callback, self); - if ((status != UCS_OK) && (status != UCS_ERR_UNSUPPORTED)) { - ucs_error("failed to set UCM memtype event handler: %s", - ucs_status_string(status)); + if (status != UCS_OK) { + ucs_diag("failed to set UCM memtype event handler: %s", + ucs_status_string(status)); goto err_cleanup_pgtable; } diff --git a/src/ucs/memory/rcache.c b/src/ucs/memory/rcache.c index 69bf08d196a..2a0df2e212e 100644 --- a/src/ucs/memory/rcache.c +++ b/src/ucs/memory/rcache.c @@ -1294,6 +1294,8 @@ static UCS_CLASS_INIT_FUNC(ucs_rcache_t, const ucs_rcache_params_t *params, status = ucm_set_event_handler(params->ucm_events, params->ucm_event_priority, ucs_rcache_unmapped_callback, self); if (status != UCS_OK) { + ucs_diag("rcache failed to install UCM event handler: %s", + ucs_status_string(status)); goto err_remove_vfs; } diff --git a/test/gtest/ucs/test_memtype_cache.cc b/test/gtest/ucs/test_memtype_cache.cc index bfce9faa88b..ebd415363ca 100644 --- a/test/gtest/ucs/test_memtype_cache.cc +++ b/test/gtest/ucs/test_memtype_cache.cc @@ -45,13 +45,13 @@ class test_memtype_cache : public ucs::test_with_param { if (!expect_found || (expected_type == UCS_MEMORY_TYPE_HOST)) { /* memory type should be not found or unknown */ if (status != UCS_ERR_NO_ELEM) { - ASSERT_UCS_OK(status, << "ptr=" << ptr << " size=" << size); + ASSERT_UCS_OK(status, << " ptr=" << ptr << " size=" << size); EXPECT_EQ(UCS_MEMORY_TYPE_UNKNOWN, mem_info.type) << "ptr=" << ptr << " size=" << size << mem_buffer::mem_type_name(mem_info.type); } } else { - ASSERT_UCS_OK(status, << "ptr=" << ptr << " size=" << size); + ASSERT_UCS_OK(status, << " ptr=" << ptr << " size=" << size); EXPECT_TRUE((UCS_MEMORY_TYPE_UNKNOWN == mem_info.type) || (expected_type == mem_info.type)) << "ptr=" << ptr << " size=" << size diff --git a/test/gtest/uct/test_p2p_err.cc b/test/gtest/uct/test_p2p_err.cc index 1a4471ac3bc..5825ec0b84c 100644 --- a/test/gtest/uct/test_p2p_err.cc +++ b/test/gtest/uct/test_p2p_err.cc @@ -126,7 +126,7 @@ class uct_p2p_err_test : public uct_p2p_test { { void *address = NULL; ucs_status_t status = ucs_mmap_alloc(&length, &address, 0, "test_dummy"); - ASSERT_UCS_OK(status, << "length = " << length); + ASSERT_UCS_OK(status, << " length = " << length); status = ucs_mmap_free(address, length); ASSERT_UCS_OK(status); /* coverity[use_after_free] */