Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TOOLS/PERF: Enable ROCM memory type support for UCP perf tests #4587

Merged
merged 2 commits into from
Dec 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/m4/rocm.m4
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ AC_ARG_WITH([rocm],
[with_rocm=guess])

rocm_happy=no
hip_happy=no
AS_IF([test "x$with_rocm" != "xno"],
[AS_CASE(["x$with_rocm"],
[x|xguess|xyes],
Expand Down
2 changes: 1 addition & 1 deletion src/tools/perf/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# See file LICENSE for terms.
#

SUBDIRS = cuda lib
SUBDIRS = cuda rocm lib
CC = $(UCX_PERFTEST_CC)

noinst_HEADERS = api/libperf.h
Expand Down
1 change: 1 addition & 0 deletions src/tools/perf/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ucx_perftest_modules=""
m4_include([src/tools/perf/lib/configure.m4])
m4_include([src/tools/perf/cuda/configure.m4])
m4_include([src/tools/perf/rocm/configure.m4])
AC_DEFINE_UNQUOTED([ucx_perftest_MODULES], ["${ucx_perftest_modules}"],
[Perftest loadable modules])

Expand Down
14 changes: 14 additions & 0 deletions src/tools/perf/perftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,12 @@ static void usage(const struct perftest_context *ctx, const char *program)
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL) {
printf(" cuda-managed - NVIDIA GPU managed/unified memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL) {
printf(" rocm - AMD/ROCm GPU memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL) {
printf(" rocm-managed - AMD/ROCm GPU managed memory\n");
}
printf(" -n <iters> number of iterations to run (%ld)\n", ctx->params.max_iter);
printf(" -w <iters> number of warm-up iterations (%zu)\n",
ctx->params.warmup_iter);
Expand Down Expand Up @@ -673,6 +679,14 @@ static ucs_status_t parse_test_params(ucx_perf_params_t *params, char opt, const
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_CUDA_MANAGED;
return UCS_OK;
} else if (!strcmp(optarg, "rocm") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM;
return UCS_OK;
} else if (!strcmp(optarg, "rocm-managed") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED;
return UCS_OK;
}

ucs_error("Unsupported memory type: \"%s\"", optarg);
Expand Down
17 changes: 17 additions & 0 deletions src/tools/perf/rocm/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#
# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

if HAVE_HIP

module_LTLIBRARIES = libucx_perftest_rocm.la
libucx_perftest_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(HIP_CPPFLAGS)
libucx_perftest_rocm_la_CFLAGS = $(BASE_CFLAGS) $(HIP_CFLAGS)
libucx_perftest_rocm_la_LDFLAGS = $(HIP_LDFLAGS) $(HIP_LIBS) -version-info $(SOVERSION)
libucx_perftest_rocm_la_SOURCES = rocm_alloc.c

include $(top_srcdir)/config/module.am

endif
11 changes: 11 additions & 0 deletions src/tools/perf/rocm/configure.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#
# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

UCX_CHECK_ROCM

AS_IF([test "x$rocm_happy" = "xyes"], [ucx_perftest_modules="${ucx_perftest_modules}:rocm"])

AC_CONFIG_FILES([src/tools/perf/rocm/Makefile])
189 changes: 189 additions & 0 deletions src/tools/perf/rocm/rocm_alloc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/**
* Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#include <tools/perf/lib/libperf_int.h>

#include <hip/hip_runtime.h>
#include <ucs/sys/compiler.h>


static ucs_status_t ucx_perf_rocm_init(ucx_perf_context_t *perf)
{
hipError_t ret;
unsigned group_index;
int num_gpus;
int gpu_index;

group_index = rte_call(perf, group_index);

ret = hipGetDeviceCount(&num_gpus);
if (ret != hipSuccess) {
return UCS_ERR_NO_DEVICE;
}

gpu_index = group_index % num_gpus;

ret = hipSetDevice(gpu_index);
if (ret != hipSuccess) {
return UCS_ERR_NO_DEVICE;
}

return UCS_OK;
}

static inline ucs_status_t ucx_perf_rocm_alloc(size_t length,
ucs_memory_type_t mem_type,
void **address_p)
{
hipError_t ret;

ucs_assert((mem_type == UCS_MEMORY_TYPE_ROCM) ||
(mem_type == UCS_MEMORY_TYPE_ROCM_MANAGED));

ret = ((mem_type == UCS_MEMORY_TYPE_ROCM) ?
hipMalloc(address_p, length) :
hipMallocManaged(address_p, length, hipMemAttachGlobal));
if (ret != hipSuccess) {
ucs_error("failed to allocate memory");
return UCS_ERR_NO_MEMORY;
}

return UCS_OK;
}

static ucs_status_t ucp_perf_rocm_alloc(const ucx_perf_context_t *perf, size_t length,
void **address_p, ucp_mem_h *memh_p,
int non_blk_flag)
{
return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM, address_p);
}

static ucs_status_t ucp_perf_rocm_alloc_managed(const ucx_perf_context_t *perf,
size_t length, void **address_p,
ucp_mem_h *memh_p, int non_blk_flag)
{
return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM_MANAGED, address_p);
}

static void ucp_perf_rocm_free(const ucx_perf_context_t *perf,
void *address, ucp_mem_h memh)
{
hipFree(address);
}

static inline ucs_status_t
uct_perf_rocm_alloc_reg_mem(const ucx_perf_context_t *perf,
size_t length,
ucs_memory_type_t mem_type,
unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
ucs_status_t status;

status = ucx_perf_rocm_alloc(length, mem_type, &alloc_mem->address);
if (status != UCS_OK) {
return status;
}

status = uct_md_mem_reg(perf->uct.md, alloc_mem->address,
length, flags, &alloc_mem->memh);
if (status != UCS_OK) {
hipFree(alloc_mem->address);
ucs_error("failed to register memory");
return status;
}

alloc_mem->mem_type = mem_type;
alloc_mem->md = perf->uct.md;

return UCS_OK;
}

static ucs_status_t uct_perf_rocm_alloc(const ucx_perf_context_t *perf,
size_t length, unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM,
flags, alloc_mem);
}

static ucs_status_t uct_perf_rocm_managed_alloc(const ucx_perf_context_t *perf,
size_t length, unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM_MANAGED,
flags, alloc_mem);
}

static void uct_perf_rocm_free(const ucx_perf_context_t *perf,
uct_allocated_memory_t *alloc_mem)
{
ucs_status_t status;

ucs_assert(alloc_mem->md == perf->uct.md);

status = uct_md_mem_dereg(perf->uct.md, alloc_mem->memh);
if (status != UCS_OK) {
ucs_error("failed to deregister memory");
}

hipFree(alloc_mem->address);
}

static void ucx_perf_rocm_memcpy(void *dst, ucs_memory_type_t dst_mem_type,
const void *src, ucs_memory_type_t src_mem_type,
size_t count)
{
hipError_t ret;

ret = hipMemcpy(dst, src, count, hipMemcpyDefault);
if (ret != hipSuccess) {
ucs_error("failed to copy memory: %s", hipGetErrorString(ret));
}
}

static void* ucx_perf_rocm_memset(void *dst, int value, size_t count)
{
hipError_t ret;

ret = hipMemset(dst, value, count);
if (ret != hipSuccess) {
ucs_error("failed to set memory: %s", hipGetErrorString(ret));
}

return dst;
}

UCS_STATIC_INIT {
static ucx_perf_allocator_t rocm_allocator = {
.mem_type = UCS_MEMORY_TYPE_ROCM,
.init = ucx_perf_rocm_init,
.ucp_alloc = ucp_perf_rocm_alloc,
.ucp_free = ucp_perf_rocm_free,
.uct_alloc = uct_perf_rocm_alloc,
.uct_free = uct_perf_rocm_free,
.memcpy = ucx_perf_rocm_memcpy,
.memset = ucx_perf_rocm_memset
};
static ucx_perf_allocator_t rocm_managed_allocator = {
.mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED,
.init = ucx_perf_rocm_init,
.ucp_alloc = ucp_perf_rocm_alloc_managed,
.ucp_free = ucp_perf_rocm_free,
.uct_alloc = uct_perf_rocm_managed_alloc,
.uct_free = uct_perf_rocm_free,
.memcpy = ucx_perf_rocm_memcpy,
.memset = ucx_perf_rocm_memset
};

ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = &rocm_allocator;
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = &rocm_managed_allocator;
}
UCS_STATIC_CLEANUP {
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = NULL;
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = NULL;

}