Skip to content

Commit

Permalink
Merge pull request openucx#12 from bureddy/cudafree_hooks
Browse files Browse the repository at this point in the history
cudaFree memory hook
  • Loading branch information
bureddy authored Sep 18, 2017
2 parents 3b08096 + 0e815cd commit 4450729
Show file tree
Hide file tree
Showing 8 changed files with 362 additions and 3 deletions.
7 changes: 7 additions & 0 deletions src/ucm/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ libucm_la_SOURCES = \
util/reloc.c \
util/sys.c

if HAVE_CUDA
libucm_la_SOURCES += \
cuda/install.c \
cuda/replace.c

endif

if HAVE_UCM_PTMALLOC283
libucm_la_CPPFLAGS += \
-I$(srcdir)/ptmalloc283/sysdeps/pthread \
Expand Down
21 changes: 20 additions & 1 deletion src/ucm/api/ucm.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
#define UCM_H_

#include <ucs/sys/compiler_def.h>

#if HAVE_CUDA
#include <cuda_runtime.h>
#include <cuda.h>
#endif
BEGIN_C_DECLS

#include <ucs/config/types.h>
Expand All @@ -32,6 +35,7 @@ typedef enum ucm_event_type {
UCM_EVENT_SHMAT = UCS_BIT(3),
UCM_EVENT_SHMDT = UCS_BIT(4),
UCM_EVENT_SBRK = UCS_BIT(5),
UCM_EVENT_CUDAFREE = UCS_BIT(6),

/* Aggregate events */
UCM_EVENT_VM_MAPPED = UCS_BIT(16),
Expand Down Expand Up @@ -113,6 +117,13 @@ typedef union ucm_event {
intptr_t increment;
} sbrk;

#if HAVE_CUDA
struct {
int result;
void *address;
} cudaFree;
#endif

/*
* UCM_EVENT_VM_MAPPED, UCM_EVENT_VM_UNMAPPED
*
Expand Down Expand Up @@ -296,6 +307,14 @@ int ucm_orig_shmdt(const void *shmaddr);
*/
void *ucm_orig_sbrk(intptr_t increment);

#if HAVE_CUDA

cudaError_t ucm_orig_cudaFree(void *address);

cudaError_t ucm_cudaFree(void *address);

#endif


/**
* @brief Call the original implementation of @ref mmap and all handlers
Expand Down
22 changes: 22 additions & 0 deletions src/ucm/cuda/cudamem.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#ifndef UCM_CUDAMEM_H_
#define UCM_CUDAMEM_H_

#include <ucm/api/ucm.h>
#include <cuda.h>
#include <cuda_runtime.h>

ucs_status_t ucm_cudamem_install(int events);

void ucm_cudamem_event_test_callback(ucm_event_type_t event_type,
ucm_event_t *event, void *arg);


cudaError_t ucm_override_cudaFree(void *addr);

#endif
154 changes: 154 additions & 0 deletions src/ucm/cuda/install.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#include "cudamem.h"

#include <ucm/api/ucm.h>
#include <ucm/event/event.h>
#include <ucm/util/log.h>
#include <ucm/util/reloc.h>
#include <ucm/util/ucm_config.h>
#include <ucs/sys/math.h>

#include <cuda.h>
#include <cuda_runtime.h>
#include <unistd.h>
#include <pthread.h>


typedef struct ucm_cudamem_func {
ucm_reloc_patch_t patch;
ucm_event_type_t event_type;
} ucm_cudamem_func_t;

static ucm_cudamem_func_t ucm_cudamem_funcs[] = {
{ {"cudaFree", ucm_override_cudaFree}, UCM_EVENT_CUDAFREE},
{ {NULL, NULL}, 0}
};

void ucm_cudamem_event_test_callback(ucm_event_type_t event_type,
ucm_event_t *event, void *arg)
{
int *out_events = arg;
*out_events |= event_type;
}

/* Called with lock held */
static ucs_status_t ucm_cudamem_test(int events)
{
static int installed_events = 0;
ucm_event_handler_t handler;
int out_events = 0;
void *p;

if (ucs_test_all_flags(installed_events, events)) {
/* All requested events are already installed */
return UCS_OK;
}

/* Install a temporary event handler which will add the supported event
* type to out_events bitmap.
*/
handler.events = events;
handler.priority = -1;
handler.cb = ucm_cudamem_event_test_callback;
handler.arg = &out_events;
out_events = 0;

ucm_event_handler_add(&handler);

if (events & (UCM_EVENT_CUDAFREE)) {
if (cudaSuccess != cudaMalloc(&p, 64)) {
ucm_error("cudaMalloc failed");
return UCS_ERR_UNSUPPORTED;
}
cudaFree(p);
}


ucm_event_handler_remove(&handler);

/* TODO check address / stop all threads */
installed_events |= out_events;
ucm_debug("cudamem test: got 0x%x out of 0x%x, total: 0x%x", out_events, events,
installed_events);

/* Return success iff we caught all wanted events */
if (!ucs_test_all_flags(out_events, events)) {
return UCS_ERR_UNSUPPORTED;
}

return UCS_OK;
}

/* Called with lock held */
static ucs_status_t ucs_cudamem_install_reloc(int events)
{
static int installed_events = 0;
ucm_cudamem_func_t *entry;
ucs_status_t status;

if (!ucm_global_config.enable_cuda_hooks) {
ucm_debug("installing cudamem relocations is disabled by configuration");
return UCS_ERR_UNSUPPORTED;
}

for (entry = ucm_cudamem_funcs; entry->patch.symbol != NULL; ++entry) {
if (!(entry->event_type & events)) {
/* Not required */
continue;
}

if (entry->event_type & installed_events) {
/* Already installed */
continue;
}

ucm_debug("cudamem: installing relocation table entry for %s = %p for event 0x%x",
entry->patch.symbol, entry->patch.value, entry->event_type);

status = ucm_reloc_modify(&entry->patch);
if (status != UCS_OK) {
ucm_warn("failed to install relocation table entry for '%s'",
entry->patch.symbol);
return status;
}

installed_events |= entry->event_type;
}

return UCS_OK;
}

ucs_status_t ucm_cudamem_install(int events)
{
static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER;
ucs_status_t status;

pthread_mutex_lock(&install_mutex);

status = ucm_cudamem_test(events);
if (status == UCS_OK) {
goto out_unlock;
}

status = ucs_cudamem_install_reloc(events);
if (status != UCS_OK) {
ucm_debug("failed to install relocations for cudamem");
goto out_unlock;
}

status = ucm_cudamem_test(events);

out_unlock:
pthread_mutex_unlock(&install_mutex);
return status;
}
102 changes: 102 additions & 0 deletions src/ucm/cuda/replace.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#include "cudamem.h"

#include <ucm/event/event.h>
#include <ucm/util/log.h>
#include <ucm/util/reloc.h>
#include <ucs/sys/compiler.h>
#include <ucs/sys/preprocessor.h>
#include <ucs/type/component.h>
#include <pthread.h>


#define MAP_FAILED ((void*)-1)

static pthread_mutex_t ucm_cudamem_get_orig_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
static pthread_t volatile ucm_cudamem_get_orig_thread = -1;


/**
* Define a replacement function to a memory-mapping function call, which calls
* the event handler, and if event handler returns error code - calls the original
* function.
*/
#define UCM_DEFINE_CUDA_FUNC(_name, _rettype, _fail_val, ...) \
\
_rettype ucm_override_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)); \
\
/* Call the original function using dlsym(RTLD_NEXT) */ \
_rettype ucm_orig_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
{ \
typedef _rettype (*func_ptr_t) (__VA_ARGS__); \
static func_ptr_t orig_func_ptr = NULL; \
\
ucm_trace("%s()", __FUNCTION__); \
\
if (ucs_unlikely(orig_func_ptr == NULL)) { \
pthread_mutex_lock(&ucm_cudamem_get_orig_lock); \
ucm_cudamem_get_orig_thread = pthread_self(); \
orig_func_ptr = ucm_reloc_get_orig(UCS_PP_QUOTE(_name), \
ucm_override_##_name); \
ucm_cudamem_get_orig_thread = -1; \
pthread_mutex_unlock(&ucm_cudamem_get_orig_lock); \
} \
return orig_func_ptr(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
} \
\
/* Define a symbol which goes to the replacement - in case we are loaded first */ \
_rettype ucm_override_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
{ \
ucm_trace("%s()", __FUNCTION__); \
\
if (ucs_unlikely(ucm_cudamem_get_orig_thread == pthread_self())) { \
return _fail_val; \
} \
return ucm_##_name(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
}

#define UCM_OVERRIDE_CUDA_FUNC(_name) \
cudaError_t _name() __attribute__ ((alias ("ucm_override_" UCS_PP_QUOTE(_name)))); \


/*
* Define argument list with given types.
*/
#define UCM_FUNC_DEFINE_ARGS(...) \
UCS_PP_FOREACH_SEP(_UCM_FUNC_ARG_DEFINE, _, \
UCS_PP_ZIP((UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))), \
(__VA_ARGS__)))

/*
* Pass auto-generated arguments to a function call.
*/
#define UCM_FUNC_PASS_ARGS(...) \
UCS_PP_FOREACH_SEP(_UCM_FUNC_ARG_PASS, _, UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__)))


/*
* Helpers
*/
#define _UCM_FUNC_ARG_DEFINE(_, _bundle) \
__UCM_FUNC_ARG_DEFINE(_, UCS_PP_TUPLE_0 _bundle, UCS_PP_TUPLE_1 _bundle)
#define __UCM_FUNC_ARG_DEFINE(_, _index, _type) \
_type UCS_PP_TOKENPASTE(arg, _index)
#define _UCM_FUNC_ARG_PASS(_, _index) \
UCS_PP_TOKENPASTE(arg, _index)


UCM_DEFINE_CUDA_FUNC(cudaFree, cudaError_t, -1, void*)

#if ENABLE_SYMBOL_OVERRIDE
UCM_OVERRIDE_CUDA_FUNC(cudaFree)
#endif

Loading

0 comments on commit 4450729

Please sign in to comment.