Skip to content

Commit

Permalink
GTEST/UCM/ROCM: test hip runtime malloc/free events
Browse files Browse the repository at this point in the history
  • Loading branch information
Sourav Chakraborty committed Dec 4, 2019
1 parent 7923499 commit dd1d421
Show file tree
Hide file tree
Showing 4 changed files with 237 additions and 2 deletions.
10 changes: 8 additions & 2 deletions config/m4/rocm.m4
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ AS_IF([test "x$with_rocm" != "xno"],
[AC_MSG_NOTICE([ROCm path given as $with_rocm ...])
ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include"
ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib"
ROCM_LIBS="-lhsa-runtime64"],
ROCM_LIBS="-lhsa-runtime64"
HIP_CPPFLAGS="-D__HIP_PLATFORM_HCC__ -I$with_rocm/include/hip -I$with_rocm/include"
HIP_LDFLAGS="-L$with_rocm/hip/lib -L$with_rocm/lib"
HIP_LIBS="-lhip_hcc"],
[AC_MSG_NOTICE([ROCm flags given ...])
ROCM_PARSE_FLAGS([with_rocm],
[ROCM_LIBS], [ROCM_LDFLAGS], [ROCM_CPPFLAGS])])
Expand Down Expand Up @@ -80,7 +83,10 @@ AS_IF([test "x$with_rocm" != "xno"],
AS_IF([test "x$rocm_happy" = "xyes"],
[AC_SUBST([ROCM_CPPFLAGS])
AC_SUBST([ROCM_LDFLAGS])
AC_SUBST([ROCM_LIBS])],
AC_SUBST([ROCM_LIBS])
AC_SUBST([HIP_CPPFLAGS])
AC_SUBST([HIP_LDFLAGS])
AC_SUBST([HIP_LIBS])],
[AC_MSG_WARN([ROCm not found])])
],
[AC_MSG_WARN([ROCm was explicitly disabled])]
Expand Down
14 changes: 14 additions & 0 deletions test/gtest/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
# Copyright (C) Los Alamos National Security, LLC. 2018 ALL RIGHTS RESERVED.
# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#
Expand Down Expand Up @@ -214,6 +215,19 @@ gtest_LDADD += \
$(top_builddir)/src/uct/cuda/libuct_cuda.la
endif

if HAVE_ROCM
gtest_SOURCES += \
ucm/rocm_hooks.cc
gtest_CPPFLAGS += \
$(HIP_CPPFLAGS)
gtest_CXXFLAGS += \
-std=gnu++11
gtest_LDADD += \
$(HIP_LDFLAGS) \
$(HIP_LIBS) \
$(top_builddir)/src/uct/rocm/libuct_rocm.la
endif

noinst_HEADERS = \
common/gtest.h \
common/mem_buffer.h \
Expand Down
24 changes: 24 additions & 0 deletions test/gtest/common/mem_buffer.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -27,6 +28,19 @@

#endif

#if HAVE_ROCM
# include <hip_runtime.h>

#define ROCM_CALL(_code) \
do { \
hipError_t cerr = _code; \
if (cerr != hipSuccess) { \
UCS_TEST_ABORT(# _code << " failed"); \
} \
} while (0)

#endif


std::vector<ucs_memory_type_t> mem_buffer::supported_mem_types()
{
Expand All @@ -35,6 +49,10 @@ std::vector<ucs_memory_type_t> mem_buffer::supported_mem_types()
#if HAVE_CUDA
vec.push_back(UCS_MEMORY_TYPE_CUDA);
vec.push_back(UCS_MEMORY_TYPE_CUDA_MANAGED);
#endif
#if HAVE_ROCM
vec.push_back(UCS_MEMORY_TYPE_ROCM);
vec.push_back(UCS_MEMORY_TYPE_ROCM_MANAGED);
#endif
return vec;
}
Expand Down Expand Up @@ -75,6 +93,12 @@ void mem_buffer::release(void *ptr, ucs_memory_type_t mem_type)
case UCS_MEMORY_TYPE_CUDA_MANAGED:
CUDA_CALL(cudaFree(ptr));
break;
#endif
#if HAVE_ROCM
case UCS_MEMORY_TYPE_ROCM:
case UCS_MEMORY_TYPE_ROCM_MANAGED:
ROCM_CALL(hipFree(ptr));
break;
#endif
default:
break;
Expand Down
191 changes: 191 additions & 0 deletions test/gtest/ucm/rocm_hooks.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED.
* Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
* See file LICENSE for terms.
*/
#include <ucm/api/ucm.h>
#include <common/test.h>
#include <hip_runtime.h>

static ucm_event_t alloc_event, free_event;

static void rocm_mem_alloc_callback(ucm_event_type_t event_type,
ucm_event_t *event, void *arg)
{
alloc_event.mem_type.address = event->mem_type.address;
alloc_event.mem_type.size = event->mem_type.size;
alloc_event.mem_type.mem_type = event->mem_type.mem_type;
}

static void rocm_mem_free_callback(ucm_event_type_t event_type,
ucm_event_t *event, void *arg)
{
free_event.mem_type.address = event->mem_type.address;
free_event.mem_type.size = event->mem_type.size;
free_event.mem_type.mem_type = event->mem_type.mem_type;
}


class rocm_hooks : public ucs::test {
protected:

virtual void init() {
int dev_count;
ucs_status_t result;
hipError_t ret;
ucs::test::init();

ret = hipGetDeviceCount(&dev_count);
if (ret != hipSuccess || dev_count < 1) {
UCS_TEST_SKIP_R("no ROCm device detected");
}

if (hipSetDevice(0) != hipSuccess) {
UCS_TEST_SKIP_R("can't set ROCm device");
}

/* install memory hooks */
result = ucm_set_event_handler(UCM_EVENT_MEM_TYPE_ALLOC, 0, rocm_mem_alloc_callback,
reinterpret_cast<void*>(this));
ASSERT_UCS_OK(result);

result = ucm_set_event_handler(UCM_EVENT_MEM_TYPE_FREE, 0, rocm_mem_free_callback,
reinterpret_cast<void*>(this));
ASSERT_UCS_OK(result);
}

virtual void cleanup() {
ucm_unset_event_handler(UCM_EVENT_MEM_TYPE_ALLOC, rocm_mem_alloc_callback,
reinterpret_cast<void*>(this));
ucm_unset_event_handler(UCM_EVENT_MEM_TYPE_FREE, rocm_mem_free_callback,
reinterpret_cast<void*>(this));
ucs::test::cleanup();
}


void check_mem_alloc_events(void *ptr, size_t size,
int expect_mem_type = UCS_MEMORY_TYPE_ROCM) {
ASSERT_EQ(ptr, alloc_event.mem_type.address);
ASSERT_EQ(size, alloc_event.mem_type.size);
ASSERT_EQ(expect_mem_type, alloc_event.mem_type.mem_type);
}

void check_mem_free_events(void *ptr, size_t size,
int expect_mem_type = UCS_MEMORY_TYPE_ROCM) {
ASSERT_EQ(ptr, free_event.mem_type.address);
ASSERT_EQ(expect_mem_type, free_event.mem_type.mem_type);
}

};

UCS_TEST_F(rocm_hooks, test_hipMem_Alloc_Free) {
hipError_t ret;
void *dptr, *dptr1;

/* small allocation */
ret = hipMalloc(&dptr, 64);
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events((void *)dptr, 64);

ret = hipFree(dptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events((void *)dptr, 64);

/* large allocation */
ret = hipMalloc(&dptr, (256 * 1024 *1024));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events((void *)dptr, (256 * 1024 *1024));

ret = hipFree(dptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events((void *)dptr, (256 * 1024 *1024));

/* multiple allocations, hipfree in reverse order */
ret = hipMalloc(&dptr, (1 * 1024 *1024));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events((void *)dptr, (1 * 1024 *1024));

ret = hipMalloc(&dptr1, (1 * 1024 *1024));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events((void *)dptr1, (1 * 1024 *1024));

ret = hipFree(dptr1);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events((void *)dptr1, (1 * 1024 *1024));

ret = hipFree(dptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events((void *)dptr, (1 * 1024 *1024));
}

UCS_TEST_F(rocm_hooks, test_hipMallocManaged) {
hipError_t ret;
void * dptr;

ret = hipMallocManaged(&dptr, 64);
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events((void *)dptr, 64, UCS_MEMORY_TYPE_ROCM_MANAGED);

ret = hipFree(dptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events((void *)dptr, 0, UCS_MEMORY_TYPE_ROCM_MANAGED);
}

UCS_TEST_F(rocm_hooks, test_hipMallocPitch) {
hipError_t ret;
void * dptr;
size_t pitch;

ret = hipMallocPitch(&dptr, &pitch, 4, 8);
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events((void *)dptr, (128 * 8));

ret = hipFree(dptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events((void *)dptr, 0);
}

UCS_TEST_F(rocm_hooks, test_hip_Malloc_Free) {
hipError_t ret;
void *ptr, *ptr1;

/* small allocation */
ret = hipMalloc(&ptr, 64);
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, 64);

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, 64);

/* large allocation */
ret = hipMalloc(&ptr, (256 * 1024 *1024));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, (256 * 1024 *1024));

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, (256 * 1024 *1024));

/* multiple allocations, rocmfree in reverse order */
ret = hipMalloc(&ptr, (1 * 1024 *1024));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, (1 * 1024 *1024));

ret = hipMalloc(&ptr1, (1 * 1024 *1024));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr1, (1 * 1024 *1024));

ret = hipFree(ptr1);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr1, (1 * 1024 *1024));

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, (1 * 1024 *1024));

/* hipFree with NULL */
ret = hipFree(NULL);
ASSERT_EQ(ret, hipSuccess);
}

2 comments on commit dd1d421

@souravzzz
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@paklui I'd like to push this patch but it will conflict with the changes in config/m4/rocm.m4 in PR openucx#4434

@paklui
Copy link

@paklui paklui commented on dd1d421 Dec 5, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@souravzzz you can go ahead

Please sign in to comment.