Skip to content

Commit

Permalink
Merge pull request #7909 from yosefe/topic/memtype-cache-fixes-v1.12.x
Browse files Browse the repository at this point in the history
Memtype cache fixes - v1.12.x
  • Loading branch information
yosefe authored Feb 7, 2022
2 parents 11bf7e9 + 87c7e1a commit 0796bf5
Show file tree
Hide file tree
Showing 16 changed files with 133 additions and 103 deletions.
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* Fixed memory hooks for Cuda 11.5
* Fixed memory type cache merge
* Fixed continuously triggering wakeup fd when keepalive is used
* Fixed memtype cache fallback when memory hooks are not installed

## 1.12.0 (January 12, 2022)
### Features:
Expand Down
7 changes: 7 additions & 0 deletions config/m4/ucm.m4
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ AC_CHECK_DECLS([MADV_FREE,
[#include <sys/mman.h>])


#
# getauxval()
#
AC_CHECK_DECLS([getauxval], [], [],
[#include <sys/auxv.h>])


# BISTRO hooks infrastructure
#
# SYS_xxx macro
Expand Down
101 changes: 44 additions & 57 deletions src/ucm/cuda/cudamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,75 +194,66 @@ static ucm_cuda_func_t ucm_cuda_runtime_funcs[] = {
{{NULL}, NULL}
};

static int ucm_cuda_allow_hook_mode(ucm_mmap_hook_mode_t mode)
{
return (ucm_global_opts.cuda_hook_modes & UCS_BIT(mode)) &&
(ucm_get_hook_mode(mode) == mode);
}

static ucs_status_t
ucm_cuda_install_hooks(ucm_cuda_func_t *funcs, int *used_reloc,
const char *name)
ucm_cuda_install_hooks(ucm_cuda_func_t *funcs, const char *name,
ucm_mmap_hook_mode_t mode, int *installed_hooks_p)
{
const char UCS_V_UNUSED *hook_mode;
unsigned num_bistro, num_reloc;
ucm_cuda_func_t *func;
ucs_status_t status;
void *func_ptr;
int count;

if (*installed_hooks_p & UCS_BIT(mode)) {
return UCS_OK;
}

num_bistro = 0;
num_reloc = 0;
if (!(ucm_global_opts.cuda_hook_modes & UCS_BIT(mode))) {
/* Disabled by configuration */
ucm_debug("cuda memory hooks mode %s is disabled for %s API",
ucm_mmap_hook_modes[mode], name);
return UCS_OK;
}

count = 0;
for (func = funcs; func->patch.symbol != NULL; ++func) {
func_ptr = ucm_reloc_get_orig(func->patch.symbol, func->patch.value);
if (func_ptr == NULL) {
continue;
}

status = UCS_ERR_UNSUPPORTED;

if (ucm_cuda_allow_hook_mode(UCM_MMAP_HOOK_BISTRO)) {
if (mode == UCM_MMAP_HOOK_BISTRO) {
status = ucm_bistro_patch(func_ptr, func->patch.value,
func->patch.symbol, func->orig_func_ptr,
NULL);
if (status == UCS_OK) {
ucm_trace("installed bistro hook for '%s': %s",
func->patch.symbol, ucs_status_string(status));
++num_bistro;
continue;
}

ucm_debug("failed to install bistro hook for '%s'",
func->patch.symbol);
} else if (mode == UCM_MMAP_HOOK_RELOC) {
status = ucm_reloc_modify(&func->patch);
} else {
break;
}

if (ucm_cuda_allow_hook_mode(UCM_MMAP_HOOK_RELOC)) {
status = ucm_reloc_modify(&func->patch);
if (status == UCS_OK) {
++num_reloc;
ucm_trace("installed reloc hook on '%s'", func->patch.symbol);
continue;
}

ucm_debug("failed to install relocation table hook for '%s'",
func->patch.symbol);
if (status != UCS_OK) {
ucm_diag("failed to install %s hook for '%s'",
ucm_mmap_hook_modes[mode], func->patch.symbol);
return status;
}

ucm_diag("failed to install hook for '%s'", func->patch.symbol);
return status;
ucm_debug("installed %s hook for '%s'", ucm_mmap_hook_modes[mode],
func->patch.symbol);
++count;
}

*used_reloc = num_reloc > 0;
ucm_info("cuda memory hooks on %s API: installed %u bistro and %u reloc",
name, num_bistro, num_reloc);
*installed_hooks_p |= UCS_BIT(mode);
ucm_info("cuda memory hooks mode %s: installed %d on %s API",
ucm_mmap_hook_modes[mode], count, name);
return UCS_OK;
}

static ucs_status_t ucm_cudamem_install(int events)
{
static int ucm_cudamem_installed = 0;
static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER;
static int driver_api_hooks = 0;
static int runtime_api_hooks = 0;
ucs_status_t status = UCS_OK;
int used_reloc;

if (!(events & (UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE))) {
goto out;
Expand All @@ -276,26 +267,22 @@ static ucs_status_t ucm_cudamem_install(int events)

pthread_mutex_lock(&install_mutex);

if (ucm_cudamem_installed) {
status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, "driver",
UCM_MMAP_HOOK_BISTRO, &driver_api_hooks);
if (status != UCS_OK) {
goto out_unlock;
}

status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, &used_reloc,
"driver");
status = ucm_cuda_install_hooks(ucm_cuda_driver_funcs, "driver",
UCM_MMAP_HOOK_RELOC, &driver_api_hooks);
if (status != UCS_OK) {
ucm_warn("failed to install cuda memory hooks on driver API");
} else if (!used_reloc) {
ucm_cudamem_installed = 1;
} else if (status == UCS_OK) {
/* Failed to install bistro hooks on all driver APIs, so need to install
hooks on runtime APIs. */
status = ucm_cuda_install_hooks(ucm_cuda_runtime_funcs, &used_reloc,
"runtime");
if (status == UCS_OK) {
ucm_cudamem_installed = 1;
} else {
ucm_warn("failed to install cuda memory hooks on runtime API")
}
goto out_unlock;
}

status = ucm_cuda_install_hooks(ucm_cuda_runtime_funcs, "runtime",
UCM_MMAP_HOOK_RELOC, &runtime_api_hooks);
if (status != UCS_OK) {
goto out_unlock;
}

out_unlock:
Expand Down
10 changes: 9 additions & 1 deletion src/ucm/mmap/install.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
((UCM_MMAP_MAX_EVENT_NAME_LEN + 2) * \
ucs_static_array_size(ucm_mmap_event_name))

extern const char *ucm_mmap_hook_modes[];

typedef struct ucm_mmap_func {
ucm_reloc_patch_t patch;
Expand Down Expand Up @@ -89,6 +88,15 @@ static ucm_mmap_func_t ucm_mmap_funcs[] = {
static pthread_mutex_t ucm_mmap_install_mutex = PTHREAD_MUTEX_INITIALIZER;
static int ucm_mmap_installed_events = 0; /* events that were reported as installed */

const char *ucm_mmap_hook_modes[] = {
[UCM_MMAP_HOOK_NONE] = "none",
[UCM_MMAP_HOOK_RELOC] = UCM_MMAP_HOOK_RELOC_STR,
#if UCM_BISTRO_HOOKS
[UCM_MMAP_HOOK_BISTRO] = UCM_MMAP_HOOK_BISTRO_STR,
#endif
[UCM_MMAP_HOOK_LAST] = NULL
};

static const char *ucm_mmap_event_name[] = {
/* Native events */
UCM_MMAP_EVENT_NAME_ENTRY(MMAP),
Expand Down
7 changes: 7 additions & 0 deletions src/ucm/mmap/mmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ ucs_status_t ucm_mmap_test_installed_events(int events);
ucs_status_t ucm_mmap_test_events(int events, const char *event_type);
void ucm_mmap_init();


/**
* Memory hooks mode names.
*/
extern const char *ucm_mmap_hook_modes[];


static UCS_F_ALWAYS_INLINE ucm_mmap_hook_mode_t ucm_mmap_hook_mode(void)
{
return ucm_get_hook_mode(ucm_global_opts.mmap_hook_mode);
Expand Down
13 changes: 13 additions & 0 deletions src/ucm/util/reloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
#include <link.h>
#include <limits.h>

#ifdef HAVE_DECL_GETAUXVAL
#include <sys/auxv.h>
#endif


/* Ensure this macro is defined (from <link.h>) - otherwise, cppcheck might
fail with an "unknown macro" warning */
#ifndef ElfW
Expand Down Expand Up @@ -116,6 +121,14 @@ static ucs_status_t ucm_reloc_get_aux_phsize(int *phsize_p)
return UCS_OK;
}

#ifdef HAVE_DECL_GETAUXVAL
phsize = getauxval(AT_PHENT);
if (phsize > 0) {
*phsize_p = phsize;
return UCS_OK;
}
#endif

fd = open(proc_auxv_filename, O_RDONLY);
if (fd < 0) {
ucm_error("failed to open '%s' for reading: %m", proc_auxv_filename);
Expand Down
28 changes: 11 additions & 17 deletions src/ucp/core/ucp_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -505,25 +505,19 @@ ucp_memory_detect_internal(ucp_context_h context, const void *address,
}

status = ucs_memtype_cache_lookup(address, length, mem_info);
if (status != UCS_ERR_NO_ELEM) {
if (ucs_likely(status != UCS_OK)) {
ucs_assert(status == UCS_ERR_NO_ELEM);
goto out_host_mem;
}

if ((mem_info->type != UCS_MEMORY_TYPE_UNKNOWN) &&
((mem_info->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN))) {
return;
}

/* Fall thru to slow-path memory type and system device detection by UCT
* memory domains. In any case, the memory type cache is not expected to
* return HOST memory type.
*/
ucs_assert(mem_info->type != UCS_MEMORY_TYPE_HOST);
if (ucs_likely(status == UCS_ERR_NO_ELEM)) {
goto out_host_mem;
} else if ((status == UCS_ERR_UNSUPPORTED) ||
((status == UCS_OK) &&
((mem_info->type == UCS_MEMORY_TYPE_UNKNOWN) ||
(mem_info->sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN)))) {
ucp_memory_detect_slowpath(context, address, length, mem_info);
} else {
ucs_assertv(status == UCS_OK, "%s (%d)", ucs_status_string(status),
status);
}

ucp_memory_detect_slowpath(context, address, length, mem_info);
/* Memory type and system device was detected successfully */
return;

out_host_mem:
Expand Down
6 changes: 3 additions & 3 deletions src/ucs/config/global_opts.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ucs_global_opts_t ucs_global_opts = {
.debug_signo = SIGHUP,
.log_level_trigger = UCS_LOG_LEVEL_FATAL,
.warn_unused_env_vars = 1,
.enable_memtype_cache = 1,
.enable_memtype_cache = UCS_TRY,
.async_max_events = 64,
.async_signo = SIGALRM,
.stats_dest = "",
Expand Down Expand Up @@ -145,9 +145,9 @@ static ucs_config_field_t ucs_global_opts_table[] = {
"configuration parser.",
ucs_offsetof(ucs_global_opts_t, warn_unused_env_vars), UCS_CONFIG_TYPE_BOOL},

{"MEMTYPE_CACHE", "y",
{"MEMTYPE_CACHE", "try",
"Enable memory type (cuda/rocm) cache",
ucs_offsetof(ucs_global_opts_t, enable_memtype_cache), UCS_CONFIG_TYPE_BOOL},
ucs_offsetof(ucs_global_opts_t, enable_memtype_cache), UCS_CONFIG_TYPE_TERNARY},

{"ASYNC_MAX_EVENTS", "1024", /* TODO remove this; resize mpmc */
"Maximal number of events which can be handled from one context",
Expand Down
2 changes: 1 addition & 1 deletion src/ucs/config/global_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ typedef struct {
unsigned async_max_events;

/** Memtype cache */
int enable_memtype_cache;
ucs_ternary_auto_value_t enable_memtype_cache;

/* Destination for statistics: udp:host:port / file:path / stdout
*/
Expand Down
15 changes: 4 additions & 11 deletions src/ucs/config/ucm_opts.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,13 @@

#include <ucm/api/ucm.h>
#include <ucm/util/log.h>
#include <ucm/util/sys.h>
#include <ucm/mmap/mmap.h>
#include <ucs/sys/compiler.h>


#define UCM_CONFIG_PREFIX "MEM_"

static const char *ucm_mmap_hook_modes[] = {
[UCM_MMAP_HOOK_NONE] = "none",
[UCM_MMAP_HOOK_RELOC] = UCM_MMAP_HOOK_RELOC_STR,
#if UCM_BISTRO_HOOKS
[UCM_MMAP_HOOK_BISTRO] = UCM_MMAP_HOOK_BISTRO_STR,
#endif
[UCM_MMAP_HOOK_LAST] = NULL
};

static const char *ucm_module_unload_prevent_modes[] = {
[UCM_UNLOAD_PREVENT_MODE_LAZY] = "lazy",
[UCM_UNLOAD_PREVENT_MODE_NOW] = "now",
Expand Down Expand Up @@ -71,9 +63,10 @@ static ucs_config_field_t ucm_global_config_table[] = {

{"CUDA_HOOK_MODE",
#if UCM_BISTRO_HOOKS
UCM_MMAP_HOOK_BISTRO_STR ","
#endif
UCM_MMAP_HOOK_BISTRO_STR,
#else
UCM_MMAP_HOOK_RELOC_STR,
#endif
"Cuda memory hook modes. A combination of:\n"
" none - Don't set Cuda hooks.\n"
" reloc - Use ELF relocation table to set hooks. In this mode, if any\n"
Expand Down
Loading

0 comments on commit 0796bf5

Please sign in to comment.