Skip to content

Commit

Permalink
Merge pull request #4587 from ROCmSoftwarePlatform/topic/sourav/add-r…
Browse files Browse the repository at this point in the history
…ocm-perftest

TOOLS/PERF: Enable ROCM memory type support for UCP perf tests
  • Loading branch information
yosefe authored Dec 20, 2019
2 parents 4ae9c2b + b11e3e2 commit 79c4dde
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 1 deletion.
1 change: 1 addition & 0 deletions config/m4/rocm.m4
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ AC_ARG_WITH([rocm],
[with_rocm=guess])
rocm_happy=no
hip_happy=no
AS_IF([test "x$with_rocm" != "xno"],
[AS_CASE(["x$with_rocm"],
[x|xguess|xyes],
Expand Down
2 changes: 1 addition & 1 deletion src/tools/perf/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# See file LICENSE for terms.
#

SUBDIRS = cuda lib
SUBDIRS = cuda rocm lib
CC = $(UCX_PERFTEST_CC)

noinst_HEADERS = api/libperf.h
Expand Down
1 change: 1 addition & 0 deletions src/tools/perf/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ucx_perftest_modules=""
m4_include([src/tools/perf/lib/configure.m4])
m4_include([src/tools/perf/cuda/configure.m4])
m4_include([src/tools/perf/rocm/configure.m4])
AC_DEFINE_UNQUOTED([ucx_perftest_MODULES], ["${ucx_perftest_modules}"],
[Perftest loadable modules])

Expand Down
14 changes: 14 additions & 0 deletions src/tools/perf/perftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,12 @@ static void usage(const struct perftest_context *ctx, const char *program)
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL) {
printf(" cuda-managed - NVIDIA GPU managed/unified memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL) {
printf(" rocm - AMD/ROCm GPU memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL) {
printf(" rocm-managed - AMD/ROCm GPU managed memory\n");
}
printf(" -n <iters> number of iterations to run (%ld)\n", ctx->params.max_iter);
printf(" -w <iters> number of warm-up iterations (%zu)\n",
ctx->params.warmup_iter);
Expand Down Expand Up @@ -673,6 +679,14 @@ static ucs_status_t parse_test_params(ucx_perf_params_t *params, char opt, const
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_CUDA_MANAGED;
return UCS_OK;
} else if (!strcmp(optarg, "rocm") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM;
return UCS_OK;
} else if (!strcmp(optarg, "rocm-managed") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED;
return UCS_OK;
}

ucs_error("Unsupported memory type: \"%s\"", optarg);
Expand Down
17 changes: 17 additions & 0 deletions src/tools/perf/rocm/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#
# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

if HAVE_HIP

module_LTLIBRARIES = libucx_perftest_rocm.la
libucx_perftest_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(HIP_CPPFLAGS)
libucx_perftest_rocm_la_CFLAGS = $(BASE_CFLAGS) $(HIP_CFLAGS)
libucx_perftest_rocm_la_LDFLAGS = $(HIP_LDFLAGS) $(HIP_LIBS) -version-info $(SOVERSION)
libucx_perftest_rocm_la_SOURCES = rocm_alloc.c

include $(top_srcdir)/config/module.am

endif
11 changes: 11 additions & 0 deletions src/tools/perf/rocm/configure.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#
# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

UCX_CHECK_ROCM

AS_IF([test "x$rocm_happy" = "xyes"], [ucx_perftest_modules="${ucx_perftest_modules}:rocm"])

AC_CONFIG_FILES([src/tools/perf/rocm/Makefile])
189 changes: 189 additions & 0 deletions src/tools/perf/rocm/rocm_alloc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/**
* Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#include <tools/perf/lib/libperf_int.h>

#include <hip/hip_runtime.h>
#include <ucs/sys/compiler.h>


static ucs_status_t ucx_perf_rocm_init(ucx_perf_context_t *perf)
{
hipError_t ret;
unsigned group_index;
int num_gpus;
int gpu_index;

group_index = rte_call(perf, group_index);

ret = hipGetDeviceCount(&num_gpus);
if (ret != hipSuccess) {
return UCS_ERR_NO_DEVICE;
}

gpu_index = group_index % num_gpus;

ret = hipSetDevice(gpu_index);
if (ret != hipSuccess) {
return UCS_ERR_NO_DEVICE;
}

return UCS_OK;
}

static inline ucs_status_t ucx_perf_rocm_alloc(size_t length,
ucs_memory_type_t mem_type,
void **address_p)
{
hipError_t ret;

ucs_assert((mem_type == UCS_MEMORY_TYPE_ROCM) ||
(mem_type == UCS_MEMORY_TYPE_ROCM_MANAGED));

ret = ((mem_type == UCS_MEMORY_TYPE_ROCM) ?
hipMalloc(address_p, length) :
hipMallocManaged(address_p, length, hipMemAttachGlobal));
if (ret != hipSuccess) {
ucs_error("failed to allocate memory");
return UCS_ERR_NO_MEMORY;
}

return UCS_OK;
}

static ucs_status_t ucp_perf_rocm_alloc(const ucx_perf_context_t *perf, size_t length,
void **address_p, ucp_mem_h *memh_p,
int non_blk_flag)
{
return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM, address_p);
}

static ucs_status_t ucp_perf_rocm_alloc_managed(const ucx_perf_context_t *perf,
size_t length, void **address_p,
ucp_mem_h *memh_p, int non_blk_flag)
{
return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM_MANAGED, address_p);
}

static void ucp_perf_rocm_free(const ucx_perf_context_t *perf,
void *address, ucp_mem_h memh)
{
hipFree(address);
}

static inline ucs_status_t
uct_perf_rocm_alloc_reg_mem(const ucx_perf_context_t *perf,
size_t length,
ucs_memory_type_t mem_type,
unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
ucs_status_t status;

status = ucx_perf_rocm_alloc(length, mem_type, &alloc_mem->address);
if (status != UCS_OK) {
return status;
}

status = uct_md_mem_reg(perf->uct.md, alloc_mem->address,
length, flags, &alloc_mem->memh);
if (status != UCS_OK) {
hipFree(alloc_mem->address);
ucs_error("failed to register memory");
return status;
}

alloc_mem->mem_type = mem_type;
alloc_mem->md = perf->uct.md;

return UCS_OK;
}

static ucs_status_t uct_perf_rocm_alloc(const ucx_perf_context_t *perf,
size_t length, unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM,
flags, alloc_mem);
}

static ucs_status_t uct_perf_rocm_managed_alloc(const ucx_perf_context_t *perf,
size_t length, unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM_MANAGED,
flags, alloc_mem);
}

static void uct_perf_rocm_free(const ucx_perf_context_t *perf,
uct_allocated_memory_t *alloc_mem)
{
ucs_status_t status;

ucs_assert(alloc_mem->md == perf->uct.md);

status = uct_md_mem_dereg(perf->uct.md, alloc_mem->memh);
if (status != UCS_OK) {
ucs_error("failed to deregister memory");
}

hipFree(alloc_mem->address);
}

static void ucx_perf_rocm_memcpy(void *dst, ucs_memory_type_t dst_mem_type,
const void *src, ucs_memory_type_t src_mem_type,
size_t count)
{
hipError_t ret;

ret = hipMemcpy(dst, src, count, hipMemcpyDefault);
if (ret != hipSuccess) {
ucs_error("failed to copy memory: %s", hipGetErrorString(ret));
}
}

static void* ucx_perf_rocm_memset(void *dst, int value, size_t count)
{
hipError_t ret;

ret = hipMemset(dst, value, count);
if (ret != hipSuccess) {
ucs_error("failed to set memory: %s", hipGetErrorString(ret));
}

return dst;
}

UCS_STATIC_INIT {
static ucx_perf_allocator_t rocm_allocator = {
.mem_type = UCS_MEMORY_TYPE_ROCM,
.init = ucx_perf_rocm_init,
.ucp_alloc = ucp_perf_rocm_alloc,
.ucp_free = ucp_perf_rocm_free,
.uct_alloc = uct_perf_rocm_alloc,
.uct_free = uct_perf_rocm_free,
.memcpy = ucx_perf_rocm_memcpy,
.memset = ucx_perf_rocm_memset
};
static ucx_perf_allocator_t rocm_managed_allocator = {
.mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED,
.init = ucx_perf_rocm_init,
.ucp_alloc = ucp_perf_rocm_alloc_managed,
.ucp_free = ucp_perf_rocm_free,
.uct_alloc = uct_perf_rocm_managed_alloc,
.uct_free = uct_perf_rocm_free,
.memcpy = ucx_perf_rocm_memcpy,
.memset = ucx_perf_rocm_memset
};

ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = &rocm_allocator;
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = &rocm_managed_allocator;
}
UCS_STATIC_CLEANUP {
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = NULL;
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = NULL;

}

0 comments on commit 79c4dde

Please sign in to comment.