Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memtype cache fixes - v1.12.x #7909

Merged
merged 3 commits into from
Feb 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* Fixed memory hooks for Cuda 11.5
* Fixed memory type cache merge
* Fixed continuously triggering wakeup fd when keepalive is used
* Fixed memtype cache fallback when memory hooks are not installed

## 1.12.0 (January 12, 2022)
### Features:
Expand Down
7 changes: 7 additions & 0 deletions config/m4/ucm.m4
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ AC_CHECK_DECLS([MADV_FREE,
[#include <sys/mman.h>])


#
# getauxval()
#
AC_CHECK_DECLS([getauxval], [], [],
[#include <sys/auxv.h>])


# BISTRO hooks infrastructure
#
# SYS_xxx macro
Expand Down
101 changes: 44 additions & 57 deletions src/ucm/cuda/cudamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,75 +194,66 @@ static ucm_cuda_func_t ucm_cuda_runtime_funcs[] = {
{{NULL}, NULL}
};

static int ucm_cuda_allow_hook_mode(ucm_mmap_hook_mode_t mode)
{
return (ucm_global_opts.cuda_hook_modes & UCS_BIT(mode)) &&
(ucm_get_hook_mode(mode) == mode);
}

static ucs_status_t
ucm_cuda_install_hooks(ucm_cuda_func_t *funcs, int *used_reloc,
const char *name)
ucm_cuda_install_hooks(ucm_cuda_func_t *funcs, const char *name,
ucm_mmap_hook_mode_t mode, int *installed_hooks_p)
{
const char UCS_V_UNUSED *hook_mode;
unsigned num_bistro, num_reloc;
ucm_cuda_func_t *func;
ucs_status_t status;
void *func_ptr;
int count;

if (*installed_hooks_p & UCS_BIT(mode)) {
return UCS_OK;
}

num_bistro = 0;
num_reloc = 0;
if (!(ucm_global_opts.cuda_hook_modes & UCS_BIT(mode))) {
/* Disabled by configuration */
ucm_debug("cuda memory hooks mode %s is disabled for %s API",
ucm_mmap_hook_modes[mode], name);
return UCS_OK;
}

count = 0;
for (func = funcs; func->patch.symbol != NULL; ++func) {
func_ptr = ucm_reloc_get_orig(func->patch.symbol, func->patch.value);
if (func_ptr == NULL) {
continue;
}

status = UCS_ERR_UNSUPPORTED;

if (ucm_cuda_allow_hook_mode(UCM_MMAP_HOOK_BISTRO)) {
if (mode == UCM_MMAP_HOOK_BISTRO) {
status = ucm_bistro_patch(func_ptr, func->patch.value,
func->patch.symbol, func->orig_func_ptr,
NULL);
if (status == UCS_OK) {
ucm_trace("installed bistro hook for '%s': %s",
func->patch.symbol, ucs_status_string(status));
++num_bistro;
continue;
}

ucm_debug("failed to install bistro hook for '%s'",
func->patch.symbol);
} else if (mode == UCM_MMAP_HOOK_RELOC) {
status = ucm_reloc_modify(&func->patch);
} else {
break;
}

if (ucm_cuda_allow_hook_mode(UCM_MMAP_HOOK_RELOC)) {
status = ucm_reloc_modify(&func->patch);
if (status == UCS_OK) {
++num_reloc;
ucm_trace("installed reloc hook on '%s'", func->patch.symbol);
continue;
}

ucm_debug("failed to install relocation table hook for '%s'",
func->patch.symbol);
if (status != UCS_OK) {
ucm_diag("failed to install %s hook for '%s'",
ucm_mmap_hook_modes[mode], func->patch.symbol);
return status;
}

ucm_diag("failed to install hook for '%s'", func->patch.symbol);
return status;
ucm_debug("installed %s hook for '%s'", ucm_mmap_hook_modes[mode],
func->patch.symbol);
++count;
}

*used_reloc = num_reloc > 0;
ucm_info("cuda memory hooks on %s API: installed %u bistro and %u reloc",
name, num_bistro, num_reloc);
*installed_hooks_p |= UCS_BIT(mode);
ucm_info("cuda memory hooks mode %s: installed %d on %s API",
ucm_mmap_hook_modes[mode], count, name);
return UCS_OK;
}

static ucs_status_t ucm_cudamem_install(int events)
{
static int ucm_cudamem_installed = 0;
static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER;
static int driver_api_hooks = 0;
static int runtime_api_hooks = 0;
ucs_status_t status = UCS_OK;
int used_reloc;

if (!(events & (UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE))) {
goto out;
Expand All @@ -276,26 +267,22 @@ static ucs_status_t ucm_cudamem_install(int events)

pthread_mutex_lock(&install_mutex);

if (ucm_cudamem_installed) {
status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, "driver",
UCM_MMAP_HOOK_BISTRO, &driver_api_hooks);
if (status != UCS_OK) {
goto out_unlock;
}

status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, &used_reloc,
"driver");
status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, "driver",
UCM_MMAP_HOOK_RELOC, &driver_api_hooks);
if (status != UCS_OK) {
ucm_warn("failed to install cuda memory hooks on driver API");
} else if (!used_reloc) {
ucm_cudamem_installed = 1;
} else if (status == UCS_OK) {
/* Failed to install bistro hooks on all driver APIs, so need to install
hooks on runtime APIs. */
status = ucm_cuda_install_hooks(ucm_cuda_runtime_funcs, &used_reloc,
"runtime");
if (status == UCS_OK) {
ucm_cudamem_installed = 1;
} else {
ucm_warn("failed to install cuda memory hooks on runtime API")
}
goto out_unlock;
}

status = ucm_cuda_install_hooks(ucm_cuda_runtime_funcs, "runtime",
UCM_MMAP_HOOK_RELOC, &runtime_api_hooks);
if (status != UCS_OK) {
goto out_unlock;
}

out_unlock:
Expand Down
10 changes: 9 additions & 1 deletion src/ucm/mmap/install.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
((UCM_MMAP_MAX_EVENT_NAME_LEN + 2) * \
ucs_static_array_size(ucm_mmap_event_name))

extern const char *ucm_mmap_hook_modes[];

typedef struct ucm_mmap_func {
ucm_reloc_patch_t patch;
Expand Down Expand Up @@ -89,6 +88,15 @@ static ucm_mmap_func_t ucm_mmap_funcs[] = {
static pthread_mutex_t ucm_mmap_install_mutex = PTHREAD_MUTEX_INITIALIZER;
static int ucm_mmap_installed_events = 0; /* events that were reported as installed */

const char *ucm_mmap_hook_modes[] = {
[UCM_MMAP_HOOK_NONE] = "none",
[UCM_MMAP_HOOK_RELOC] = UCM_MMAP_HOOK_RELOC_STR,
#if UCM_BISTRO_HOOKS
[UCM_MMAP_HOOK_BISTRO] = UCM_MMAP_HOOK_BISTRO_STR,
#endif
[UCM_MMAP_HOOK_LAST] = NULL
};

static const char *ucm_mmap_event_name[] = {
/* Native events */
UCM_MMAP_EVENT_NAME_ENTRY(MMAP),
Expand Down
7 changes: 7 additions & 0 deletions src/ucm/mmap/mmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ ucs_status_t ucm_mmap_test_installed_events(int events);
ucs_status_t ucm_mmap_test_events(int events, const char *event_type);
void ucm_mmap_init();


/**
* Memory hooks mode names.
*/
extern const char *ucm_mmap_hook_modes[];


static UCS_F_ALWAYS_INLINE ucm_mmap_hook_mode_t ucm_mmap_hook_mode(void)
{
return ucm_get_hook_mode(ucm_global_opts.mmap_hook_mode);
Expand Down
13 changes: 13 additions & 0 deletions src/ucm/util/reloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
#include <link.h>
#include <limits.h>

#ifdef HAVE_DECL_GETAUXVAL
#include <sys/auxv.h>
#endif


/* Ensure this macro is defined (from <link.h>) - otherwise, cppcheck might
fail with an "unknown macro" warning */
#ifndef ElfW
Expand Down Expand Up @@ -116,6 +121,14 @@ static ucs_status_t ucm_reloc_get_aux_phsize(int *phsize_p)
return UCS_OK;
}

#ifdef HAVE_DECL_GETAUXVAL
phsize = getauxval(AT_PHENT);
if (phsize > 0) {
*phsize_p = phsize;
return UCS_OK;
}
#endif

fd = open(proc_auxv_filename, O_RDONLY);
if (fd < 0) {
ucm_error("failed to open '%s' for reading: %m", proc_auxv_filename);
Expand Down
28 changes: 11 additions & 17 deletions src/ucp/core/ucp_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -505,25 +505,19 @@ ucp_memory_detect_internal(ucp_context_h context, const void *address,
}

status = ucs_memtype_cache_lookup(address, length, mem_info);
if (status != UCS_ERR_NO_ELEM) {
if (ucs_likely(status != UCS_OK)) {
ucs_assert(status == UCS_ERR_NO_ELEM);
goto out_host_mem;
}

if ((mem_info->type != UCS_MEMORY_TYPE_UNKNOWN) &&
((mem_info->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN))) {
return;
}

/* Fall thru to slow-path memory type and system device detection by UCT
* memory domains. In any case, the memory type cache is not expected to
* return HOST memory type.
*/
ucs_assert(mem_info->type != UCS_MEMORY_TYPE_HOST);
if (ucs_likely(status == UCS_ERR_NO_ELEM)) {
goto out_host_mem;
} else if ((status == UCS_ERR_UNSUPPORTED) ||
((status == UCS_OK) &&
((mem_info->type == UCS_MEMORY_TYPE_UNKNOWN) ||
(mem_info->sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN)))) {
ucp_memory_detect_slowpath(context, address, length, mem_info);
} else {
ucs_assertv(status == UCS_OK, "%s (%d)", ucs_status_string(status),
status);
}

ucp_memory_detect_slowpath(context, address, length, mem_info);
/* Memory type and system device was detected successfully */
return;

out_host_mem:
Expand Down
6 changes: 3 additions & 3 deletions src/ucs/config/global_opts.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ucs_global_opts_t ucs_global_opts = {
.debug_signo = SIGHUP,
.log_level_trigger = UCS_LOG_LEVEL_FATAL,
.warn_unused_env_vars = 1,
.enable_memtype_cache = 1,
.enable_memtype_cache = UCS_TRY,
.async_max_events = 64,
.async_signo = SIGALRM,
.stats_dest = "",
Expand Down Expand Up @@ -145,9 +145,9 @@ static ucs_config_field_t ucs_global_opts_table[] = {
"configuration parser.",
ucs_offsetof(ucs_global_opts_t, warn_unused_env_vars), UCS_CONFIG_TYPE_BOOL},

{"MEMTYPE_CACHE", "y",
{"MEMTYPE_CACHE", "try",
"Enable memory type (cuda/rocm) cache",
ucs_offsetof(ucs_global_opts_t, enable_memtype_cache), UCS_CONFIG_TYPE_BOOL},
ucs_offsetof(ucs_global_opts_t, enable_memtype_cache), UCS_CONFIG_TYPE_TERNARY},

{"ASYNC_MAX_EVENTS", "1024", /* TODO remove this; resize mpmc */
"Maximal number of events which can be handled from one context",
Expand Down
2 changes: 1 addition & 1 deletion src/ucs/config/global_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ typedef struct {
unsigned async_max_events;

/** Memtype cache */
int enable_memtype_cache;
ucs_ternary_auto_value_t enable_memtype_cache;

/* Destination for statistics: udp:host:port / file:path / stdout
*/
Expand Down
15 changes: 4 additions & 11 deletions src/ucs/config/ucm_opts.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,13 @@

#include <ucm/api/ucm.h>
#include <ucm/util/log.h>
#include <ucm/util/sys.h>
#include <ucm/mmap/mmap.h>
#include <ucs/sys/compiler.h>


#define UCM_CONFIG_PREFIX "MEM_"

static const char *ucm_mmap_hook_modes[] = {
[UCM_MMAP_HOOK_NONE] = "none",
[UCM_MMAP_HOOK_RELOC] = UCM_MMAP_HOOK_RELOC_STR,
#if UCM_BISTRO_HOOKS
[UCM_MMAP_HOOK_BISTRO] = UCM_MMAP_HOOK_BISTRO_STR,
#endif
[UCM_MMAP_HOOK_LAST] = NULL
};

static const char *ucm_module_unload_prevent_modes[] = {
[UCM_UNLOAD_PREVENT_MODE_LAZY] = "lazy",
[UCM_UNLOAD_PREVENT_MODE_NOW] = "now",
Expand Down Expand Up @@ -71,9 +63,10 @@ static ucs_config_field_t ucm_global_config_table[] = {

{"CUDA_HOOK_MODE",
#if UCM_BISTRO_HOOKS
UCM_MMAP_HOOK_BISTRO_STR ","
#endif
UCM_MMAP_HOOK_BISTRO_STR,
#else
UCM_MMAP_HOOK_RELOC_STR,
#endif
"Cuda memory hook modes. A combination of:\n"
" none - Don't set Cuda hooks.\n"
" reloc - Use ELF relocation table to set hooks. In this mode, if any\n"
Expand Down
Loading