forked from openucx/ucx
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request openucx#12 from bureddy/cudafree_hooks
cudaFree memory hook
- Loading branch information
Showing
8 changed files
with
362 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
/** | ||
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#ifndef UCM_CUDAMEM_H_ | ||
#define UCM_CUDAMEM_H_ | ||
|
||
#include <ucm/api/ucm.h> | ||
#include <cuda.h> | ||
#include <cuda_runtime.h> | ||
|
||
ucs_status_t ucm_cudamem_install(int events); | ||
|
||
void ucm_cudamem_event_test_callback(ucm_event_type_t event_type, | ||
ucm_event_t *event, void *arg); | ||
|
||
|
||
cudaError_t ucm_override_cudaFree(void *addr); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
/** | ||
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. | ||
* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#ifdef HAVE_CONFIG_H | ||
# include "config.h" | ||
#endif | ||
|
||
#include "cudamem.h" | ||
|
||
#include <ucm/api/ucm.h> | ||
#include <ucm/event/event.h> | ||
#include <ucm/util/log.h> | ||
#include <ucm/util/reloc.h> | ||
#include <ucm/util/ucm_config.h> | ||
#include <ucs/sys/math.h> | ||
|
||
#include <cuda.h> | ||
#include <cuda_runtime.h> | ||
#include <unistd.h> | ||
#include <pthread.h> | ||
|
||
|
||
typedef struct ucm_cudamem_func { | ||
ucm_reloc_patch_t patch; | ||
ucm_event_type_t event_type; | ||
} ucm_cudamem_func_t; | ||
|
||
static ucm_cudamem_func_t ucm_cudamem_funcs[] = { | ||
{ {"cudaFree", ucm_override_cudaFree}, UCM_EVENT_CUDAFREE}, | ||
{ {NULL, NULL}, 0} | ||
}; | ||
|
||
void ucm_cudamem_event_test_callback(ucm_event_type_t event_type, | ||
ucm_event_t *event, void *arg) | ||
{ | ||
int *out_events = arg; | ||
*out_events |= event_type; | ||
} | ||
|
||
/* Called with lock held */ | ||
static ucs_status_t ucm_cudamem_test(int events) | ||
{ | ||
static int installed_events = 0; | ||
ucm_event_handler_t handler; | ||
int out_events = 0; | ||
void *p; | ||
|
||
if (ucs_test_all_flags(installed_events, events)) { | ||
/* All requested events are already installed */ | ||
return UCS_OK; | ||
} | ||
|
||
/* Install a temporary event handler which will add the supported event | ||
* type to out_events bitmap. | ||
*/ | ||
handler.events = events; | ||
handler.priority = -1; | ||
handler.cb = ucm_cudamem_event_test_callback; | ||
handler.arg = &out_events; | ||
out_events = 0; | ||
|
||
ucm_event_handler_add(&handler); | ||
|
||
if (events & (UCM_EVENT_CUDAFREE)) { | ||
if (cudaSuccess != cudaMalloc(&p, 64)) { | ||
ucm_error("cudaMalloc failed"); | ||
return UCS_ERR_UNSUPPORTED; | ||
} | ||
cudaFree(p); | ||
} | ||
|
||
|
||
ucm_event_handler_remove(&handler); | ||
|
||
/* TODO check address / stop all threads */ | ||
installed_events |= out_events; | ||
ucm_debug("cudamem test: got 0x%x out of 0x%x, total: 0x%x", out_events, events, | ||
installed_events); | ||
|
||
/* Return success iff we caught all wanted events */ | ||
if (!ucs_test_all_flags(out_events, events)) { | ||
return UCS_ERR_UNSUPPORTED; | ||
} | ||
|
||
return UCS_OK; | ||
} | ||
|
||
/* Called with lock held */ | ||
static ucs_status_t ucs_cudamem_install_reloc(int events) | ||
{ | ||
static int installed_events = 0; | ||
ucm_cudamem_func_t *entry; | ||
ucs_status_t status; | ||
|
||
if (!ucm_global_config.enable_cuda_hooks) { | ||
ucm_debug("installing cudamem relocations is disabled by configuration"); | ||
return UCS_ERR_UNSUPPORTED; | ||
} | ||
|
||
for (entry = ucm_cudamem_funcs; entry->patch.symbol != NULL; ++entry) { | ||
if (!(entry->event_type & events)) { | ||
/* Not required */ | ||
continue; | ||
} | ||
|
||
if (entry->event_type & installed_events) { | ||
/* Already installed */ | ||
continue; | ||
} | ||
|
||
ucm_debug("cudamem: installing relocation table entry for %s = %p for event 0x%x", | ||
entry->patch.symbol, entry->patch.value, entry->event_type); | ||
|
||
status = ucm_reloc_modify(&entry->patch); | ||
if (status != UCS_OK) { | ||
ucm_warn("failed to install relocation table entry for '%s'", | ||
entry->patch.symbol); | ||
return status; | ||
} | ||
|
||
installed_events |= entry->event_type; | ||
} | ||
|
||
return UCS_OK; | ||
} | ||
|
||
ucs_status_t ucm_cudamem_install(int events) | ||
{ | ||
static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER; | ||
ucs_status_t status; | ||
|
||
pthread_mutex_lock(&install_mutex); | ||
|
||
status = ucm_cudamem_test(events); | ||
if (status == UCS_OK) { | ||
goto out_unlock; | ||
} | ||
|
||
status = ucs_cudamem_install_reloc(events); | ||
if (status != UCS_OK) { | ||
ucm_debug("failed to install relocations for cudamem"); | ||
goto out_unlock; | ||
} | ||
|
||
status = ucm_cudamem_test(events); | ||
|
||
out_unlock: | ||
pthread_mutex_unlock(&install_mutex); | ||
return status; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/** | ||
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#ifdef HAVE_CONFIG_H | ||
# include "config.h" | ||
#endif | ||
|
||
#include "cudamem.h" | ||
|
||
#include <ucm/event/event.h> | ||
#include <ucm/util/log.h> | ||
#include <ucm/util/reloc.h> | ||
#include <ucs/sys/compiler.h> | ||
#include <ucs/sys/preprocessor.h> | ||
#include <ucs/type/component.h> | ||
#include <pthread.h> | ||
|
||
|
||
#define MAP_FAILED ((void*)-1) | ||
|
||
static pthread_mutex_t ucm_cudamem_get_orig_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; | ||
static pthread_t volatile ucm_cudamem_get_orig_thread = -1; | ||
|
||
|
||
/** | ||
* Define a replacement function to a memory-mapping function call, which calls | ||
* the event handler, and if event handler returns error code - calls the original | ||
* function. | ||
*/ | ||
#define UCM_DEFINE_CUDA_FUNC(_name, _rettype, _fail_val, ...) \ | ||
\ | ||
_rettype ucm_override_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)); \ | ||
\ | ||
/* Call the original function using dlsym(RTLD_NEXT) */ \ | ||
_rettype ucm_orig_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \ | ||
{ \ | ||
typedef _rettype (*func_ptr_t) (__VA_ARGS__); \ | ||
static func_ptr_t orig_func_ptr = NULL; \ | ||
\ | ||
ucm_trace("%s()", __FUNCTION__); \ | ||
\ | ||
if (ucs_unlikely(orig_func_ptr == NULL)) { \ | ||
pthread_mutex_lock(&ucm_cudamem_get_orig_lock); \ | ||
ucm_cudamem_get_orig_thread = pthread_self(); \ | ||
orig_func_ptr = ucm_reloc_get_orig(UCS_PP_QUOTE(_name), \ | ||
ucm_override_##_name); \ | ||
ucm_cudamem_get_orig_thread = -1; \ | ||
pthread_mutex_unlock(&ucm_cudamem_get_orig_lock); \ | ||
} \ | ||
return orig_func_ptr(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \ | ||
} \ | ||
\ | ||
/* Define a symbol which goes to the replacement - in case we are loaded first */ \ | ||
_rettype ucm_override_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \ | ||
{ \ | ||
ucm_trace("%s()", __FUNCTION__); \ | ||
\ | ||
if (ucs_unlikely(ucm_cudamem_get_orig_thread == pthread_self())) { \ | ||
return _fail_val; \ | ||
} \ | ||
return ucm_##_name(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \ | ||
} | ||
|
||
#define UCM_OVERRIDE_CUDA_FUNC(_name) \ | ||
cudaError_t _name() __attribute__ ((alias ("ucm_override_" UCS_PP_QUOTE(_name)))); \ | ||
|
||
|
||
/* | ||
* Define argument list with given types. | ||
*/ | ||
#define UCM_FUNC_DEFINE_ARGS(...) \ | ||
UCS_PP_FOREACH_SEP(_UCM_FUNC_ARG_DEFINE, _, \ | ||
UCS_PP_ZIP((UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))), \ | ||
(__VA_ARGS__))) | ||
|
||
/* | ||
* Pass auto-generated arguments to a function call. | ||
*/ | ||
#define UCM_FUNC_PASS_ARGS(...) \ | ||
UCS_PP_FOREACH_SEP(_UCM_FUNC_ARG_PASS, _, UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))) | ||
|
||
|
||
/* | ||
* Helpers | ||
*/ | ||
#define _UCM_FUNC_ARG_DEFINE(_, _bundle) \ | ||
__UCM_FUNC_ARG_DEFINE(_, UCS_PP_TUPLE_0 _bundle, UCS_PP_TUPLE_1 _bundle) | ||
#define __UCM_FUNC_ARG_DEFINE(_, _index, _type) \ | ||
_type UCS_PP_TOKENPASTE(arg, _index) | ||
#define _UCM_FUNC_ARG_PASS(_, _index) \ | ||
UCS_PP_TOKENPASTE(arg, _index) | ||
|
||
|
||
UCM_DEFINE_CUDA_FUNC(cudaFree, cudaError_t, -1, void*) | ||
|
||
#if ENABLE_SYMBOL_OVERRIDE | ||
UCM_OVERRIDE_CUDA_FUNC(cudaFree) | ||
#endif | ||
|
Oops, something went wrong.