Skip to content

Commit

Permalink
Merge pull request #115 from yosefe/topic/ucm-fix-init-race-int3
Browse files Browse the repository at this point in the history
UCM: Fix heap corruption caused by ucp_set_event_handler()
  • Loading branch information
yosefe authored Dec 15, 2020
2 parents 39c8adf + 8e41b36 commit 07709df
Show file tree
Hide file tree
Showing 22 changed files with 458 additions and 197 deletions.
31 changes: 23 additions & 8 deletions contrib/test_jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1091,16 +1091,21 @@ run_ucx_perftest() {
# Test malloc hooks with mpi
#
test_malloc_hooks_mpi() {
for tname in malloc_hooks malloc_hooks_unmapped external_events flag_no_install
for mode in reloc bistro
do
echo "==== Running memory hook (${tname}) on MPI ===="
$MPIRUN -np 1 $AFFINITY ./test/mpi/test_memhooks -t $tname
done
for tname in malloc_hooks malloc_hooks_unmapped external_events flag_no_install
do
echo "==== Running memory hook (${tname} mode ${mode}) on MPI ===="
$MPIRUN -np 1 $AFFINITY \
./test/mpi/test_memhooks -t $tname -m ${mode}
done

echo "==== Running memory hook (malloc_hooks) on MPI with LD_PRELOAD ===="
ucm_lib=$PWD/src/ucm/.libs/libucm.so
ls -l $ucm_lib
$MPIRUN -np 1 -x LD_PRELOAD=$ucm_lib $AFFINITY ./test/mpi/test_memhooks -t malloc_hooks
echo "==== Running memory hook (malloc_hooks mode ${mode}) on MPI with LD_PRELOAD ===="
ucm_lib=$PWD/src/ucm/.libs/libucm.so
ls -l $ucm_lib
$MPIRUN -np 1 -x LD_PRELOAD=$ucm_lib $AFFINITY \
./test/mpi/test_memhooks -t malloc_hooks -m ${mode}
done
}

#
Expand Down Expand Up @@ -1207,6 +1212,15 @@ test_ucp_dlopen() {
fi
}

test_init_mt() {
echo "==== Running multi-thread init ===="
$MAKEP
for ((i=0;i<50;++i))
do
$AFFINITY timeout 1m ./test/apps/test_init_mt
done
}

test_memtrack() {
../contrib/configure-devel --prefix=$ucx_inst
$MAKEP clean
Expand Down Expand Up @@ -1627,6 +1641,7 @@ run_tests() {
do_distributed_task 2 4 test_env_var_aliases
do_distributed_task 1 3 test_malloc_hook
do_distributed_task 0 4 test_ucp_dlopen
do_distributed_task 1 4 test_init_mt

# all are running gtest
run_gtest_default
Expand Down
26 changes: 25 additions & 1 deletion src/ucm/api/ucm.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ typedef enum ucm_event_type {
UCM_EVENT_SHMDT = UCS_BIT(4),
UCM_EVENT_SBRK = UCS_BIT(5),
UCM_EVENT_MADVISE = UCS_BIT(6),
UCM_EVENT_BRK = UCS_BIT(7),

/* Aggregate events */
UCM_EVENT_VM_MAPPED = UCS_BIT(16),
Expand Down Expand Up @@ -149,6 +150,15 @@ typedef union ucm_event {
int advice;
} madvise;

/*
* UCM_EVENT_BRK
* brk() is called.
*/
struct {
int result;
void *addr;
} brk;

/*
* UCM_EVENT_VM_MAPPED, UCM_EVENT_VM_UNMAPPED
*
Expand Down Expand Up @@ -198,7 +208,10 @@ typedef struct ucm_global_config {
} ucm_global_config_t;


/* Global UCM configuration */
/*
* Global UCM configuration to be set externally.
* @deprecated replaced by @ref ucm_library_init.
*/
extern ucm_global_config_t ucm_global_opts;


Expand Down Expand Up @@ -240,6 +253,17 @@ typedef void (*ucm_event_callback_t)(ucm_event_type_t event_type,
ucm_event_t *event, void *arg);


/**
* Initialize UCM library and set its configuration.
*
* @param [in] ucm_opts UCM library global configuration. If NULL, default
* configuration is applied.
*
* @note Calling this function more than once in the same process has no effect.
*/
void ucm_library_init(const ucm_global_config_t *ucm_opts);


/**
* @brief Install a handler for memory events.
*
Expand Down
4 changes: 2 additions & 2 deletions src/ucm/bistro/bistro.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

ucs_status_t ucm_bistro_remove_restore_point(ucm_bistro_restore_point_t *rp)
{
ucs_assert(rp != NULL);
ucm_assert(rp != NULL);
free(rp);
return UCS_OK;
}
Expand Down Expand Up @@ -103,7 +103,7 @@ ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp)

void *ucm_bistro_restore_addr(ucm_bistro_restore_point_t *rp)
{
ucs_assert(rp != NULL);
ucm_assert(rp != NULL);
return rp->addr;
}

Expand Down
2 changes: 1 addition & 1 deletion src/ucm/bistro/bistro_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ static inline void *ucm_bistro_lookup(const char *symbol)
{
void *addr;

ucs_assert(symbol != NULL);
ucm_assert(symbol != NULL);

addr = dlsym(RTLD_NEXT, symbol);
if (!addr) {
Expand Down
8 changes: 4 additions & 4 deletions src/ucm/bistro/bistro_ppc64.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ struct ucm_bistro_restore_point {
static void ucm_bistro_fill_base_patch(ucm_bistro_base_patch_t *patch,
uint32_t reg, uintptr_t value)
{
ucs_assert(patch != NULL);
ucm_assert(patch != NULL);

patch->addis = ADDIS ( reg, 0, (value >> 48));
patch->ori1 = ORI ( reg, reg, (value >> 32));
Expand All @@ -88,7 +88,7 @@ static void ucm_bistro_fill_base_patch(ucm_bistro_base_patch_t *patch,
static void ucm_bistro_fill_patch(ucm_bistro_patch_t *patch,
uint32_t reg, uintptr_t value)
{
ucs_assert(patch != NULL);
ucm_assert(patch != NULL);

ucm_bistro_fill_base_patch(&patch->super, reg, value);

Expand Down Expand Up @@ -185,7 +185,7 @@ ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp)
{
ucs_status_t status;

ucs_assert(rp != NULL);
ucm_assert(rp != NULL);

status = ucm_bistro_apply_patch(rp->func, &rp->func_patch, sizeof(rp->func_patch));
if (UCS_STATUS_IS_ERR(status)) {
Expand All @@ -202,7 +202,7 @@ ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp)

void *ucm_bistro_restore_addr(ucm_bistro_restore_point_t *rp)
{
ucs_assert(rp != NULL);
ucm_assert(rp != NULL);
return rp->entry;
}

Expand Down
74 changes: 45 additions & 29 deletions src/ucm/event/event.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ static void ucm_event_call_orig(ucm_event_type_t event_type, ucm_event_t *event,
event->shmdt.result = ucm_orig_shmdt(event->shmdt.shmaddr);
}
break;
case UCM_EVENT_BRK:
if (event->brk.result == -1) {
event->brk.result = ucm_orig_brk(event->brk.addr);
}
break;
case UCM_EVENT_SBRK:
if (event->sbrk.result == MAP_FAILED) {
event->sbrk.result = ucm_orig_sbrk(event->sbrk.increment);
Expand All @@ -119,8 +124,8 @@ static ucm_event_handler_t ucm_event_orig_handler = {
.list = UCS_LIST_INITIALIZER(&ucm_event_handlers, &ucm_event_handlers),
.events = UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | UCM_EVENT_MREMAP |
UCM_EVENT_SHMAT | UCM_EVENT_SHMDT | UCM_EVENT_SBRK |
UCM_EVENT_MADVISE, /* All events */
.priority = 0, /* Between negative and positive handlers */
UCM_EVENT_MADVISE | UCM_EVENT_BRK, /* All events */
.priority = 0, /* Between negative and positive handlers */
.cb = ucm_event_call_orig
};
static ucs_list_link_t ucm_event_handlers =
Expand Down Expand Up @@ -351,7 +356,8 @@ void *ucm_sbrk(intptr_t increment)
ucm_trace("ucm_sbrk(increment=%+ld)", increment);

if (increment < 0) {
ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), increment),
ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(ucm_get_current_brk(),
increment),
-increment);
}

Expand All @@ -360,7 +366,8 @@ void *ucm_sbrk(intptr_t increment)
ucm_event_dispatch(UCM_EVENT_SBRK, &event);

if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) {
ucm_dispatch_vm_mmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), -increment),
ucm_dispatch_vm_mmap(UCS_PTR_BYTE_OFFSET(ucm_get_current_brk(),
-increment),
increment);
}

Expand All @@ -371,38 +378,36 @@ void *ucm_sbrk(intptr_t increment)

int ucm_brk(void *addr)
{
#if UCM_BISTRO_HOOKS
void *old_addr;
intptr_t increment;
ptrdiff_t increment;
void *current_brk;
ucm_event_t event;

old_addr = ucm_brk_syscall(0);
/* in case if addr == NULL - it just returns current pointer */
increment = addr ? ((intptr_t)addr - (intptr_t)old_addr) : 0;

ucm_event_enter();

ucm_trace("ucm_brk(addr=%p)", addr);

if (addr == NULL) {
increment = 0;
} else {
current_brk = ucm_get_current_brk();
increment = UCS_PTR_BYTE_DIFF(current_brk, addr);
}

if (increment < 0) {
ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(old_addr, increment),
-increment);
ucm_dispatch_vm_munmap(addr, -increment);
}

event.sbrk.result = (void*)-1;
event.sbrk.increment = increment;
ucm_event_dispatch(UCM_EVENT_SBRK, &event);
event.brk.result = -1;
event.brk.addr = addr;
ucm_event_dispatch(UCM_EVENT_BRK, &event);

if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) {
ucm_dispatch_vm_mmap(old_addr, increment);
if ((increment > 0) && (event.brk.result != -1)) {
ucm_dispatch_vm_mmap(current_brk, increment);
}

ucm_event_leave();

return event.sbrk.result == MAP_FAILED ? -1 : 0;
#else
return -1;
#endif
return event.brk.result;
}

int ucm_madvise(void *addr, size_t length, int advice)
Expand Down Expand Up @@ -439,6 +444,18 @@ int ucm_madvise(void *addr, size_t length, int advice)
return event.madvise.result;
}

void ucm_library_init(const ucm_global_config_t *ucm_opts)
{
static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;

UCS_INIT_ONCE(&init_once) {
if (ucm_opts != NULL) {
ucm_global_opts = *ucm_opts;
}
ucm_mmap_init();
}
}

void ucm_event_handler_add(ucm_event_handler_t *handler)
{
ucm_event_handler_t *elem;
Expand Down Expand Up @@ -481,21 +498,18 @@ static int ucm_events_to_native_events(int events)

static ucs_status_t ucm_event_install(int events)
{
static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
UCS_MODULE_FRAMEWORK_DECLARE(ucm);
ucm_event_installer_t *event_installer;
int native_events, malloc_events;
ucs_status_t status;

UCS_INIT_ONCE(&init_once) {
ucm_prevent_dl_unload();
}
ucm_prevent_dl_unload();

/* Replace aggregate events with the native events which make them */
native_events = ucm_events_to_native_events(events);

/* TODO lock */
status = ucm_mmap_install(native_events);
status = ucm_mmap_install(native_events, 0);
if (status != UCS_OK) {
ucm_debug("failed to install mmap events");
goto out_unlock;
Expand Down Expand Up @@ -526,7 +540,6 @@ static ucs_status_t ucm_event_install(int events)

out_unlock:
return status;

}

ucs_status_t ucm_set_event_handler(int events, int priority,
Expand All @@ -539,7 +552,7 @@ ucs_status_t ucm_set_event_handler(int events, int priority,

if (events & ~(UCM_EVENT_MMAP|UCM_EVENT_MUNMAP|UCM_EVENT_MREMAP|
UCM_EVENT_SHMAT|UCM_EVENT_SHMDT|
UCM_EVENT_SBRK|
UCM_EVENT_BRK|UCM_EVENT_SBRK|
UCM_EVENT_MADVISE|
UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED|
UCM_EVENT_MEM_TYPE_ALLOC|UCM_EVENT_MEM_TYPE_FREE|
Expand All @@ -552,6 +565,8 @@ ucs_status_t ucm_set_event_handler(int events, int priority,
return UCS_ERR_UNSUPPORTED;
}

ucm_library_init(NULL);

/* separate event flags from real events */
flags = events & (UCM_EVENT_FLAG_NO_INSTALL |
UCM_EVENT_FLAG_EXISTING_ALLOC);
Expand Down Expand Up @@ -626,6 +641,7 @@ void ucm_unset_event_handler(int events, ucm_event_callback_t cb, void *arg)

ucs_status_t ucm_test_events(int events)
{
ucm_library_init(NULL);
return ucm_mmap_test_installed_events(ucm_events_to_native_events(events));
}

Expand Down
5 changes: 3 additions & 2 deletions src/ucm/event/event.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
#include <ucs/type/status.h>

#define UCM_NATIVE_EVENT_VM_MAPPED (UCM_EVENT_MMAP | UCM_EVENT_MREMAP | \
UCM_EVENT_SHMAT | UCM_EVENT_SBRK)
UCM_EVENT_SHMAT | UCM_EVENT_SBRK | \
UCM_EVENT_BRK)

#define UCM_NATIVE_EVENT_VM_UNMAPPED (UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | \
UCM_EVENT_MREMAP | UCM_EVENT_SHMDT | \
UCM_EVENT_SHMAT | UCM_EVENT_SBRK | \
UCM_EVENT_MADVISE)
UCM_EVENT_MADVISE | UCM_EVENT_BRK)


typedef struct ucm_event_handler {
Expand Down
6 changes: 3 additions & 3 deletions src/ucm/malloc/malloc_hook.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ static void ucm_malloc_mmaped_ptr_add(void *ptr)

hash_it = kh_put(mmap_ptrs, &ucm_malloc_hook_state.ptrs, ptr,
&hash_extra_status);
ucs_assert_always(hash_extra_status >= 0);
ucs_assert_always(hash_it != kh_end(&ucm_malloc_hook_state.ptrs));
ucm_assert_always(hash_extra_status >= 0);
ucm_assert_always(hash_it != kh_end(&ucm_malloc_hook_state.ptrs));

ucs_recursive_spin_unlock(&ucm_malloc_hook_state.lock);
}
Expand Down Expand Up @@ -557,7 +557,7 @@ static void ucm_malloc_sbrk(ucm_event_type_t event_type,
if (ucm_malloc_hook_state.heap_start == (void*)-1) {
ucm_malloc_hook_state.heap_start = event->sbrk.result; /* sbrk() returns the previous break */
}
ucm_malloc_hook_state.heap_end = ucm_orig_sbrk(0);
ucm_malloc_hook_state.heap_end = ucm_get_current_brk();

ucm_trace("sbrk(%+ld)=%p - adjusting heap to [%p..%p]",
event->sbrk.increment, event->sbrk.result,
Expand Down
Loading

0 comments on commit 07709df

Please sign in to comment.