Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TOOLS/PERF: changes to enable the rocm perf modules #4434

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions config/m4/rocm.m4
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ AS_IF([test "x$with_rocm" != "xno"],
[x|xguess|xyes],
[AC_MSG_NOTICE([ROCm path was not specified. Guessing ...])
with_rocm=/opt/rocm
ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include"
ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib"
ROCM_LIBS="-lhsa-runtime64"],
ROCM_CPPFLAGS="-I$with_rocm/include/hsa -D__HIP_PLATFORM_HCC__= -I$with_rocm/hip/include -I$with_rocm/hcc/include -I$with_rocm/hsa/include"
ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib -lhip_hcc -lhsa-runtime64"
ROCM_LIBS="-lhip_hcc -lhsa-runtime64"],
[x/*],
[AC_MSG_NOTICE([ROCm path given as $with_rocm ...])
ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include"
ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib"
ROCM_LIBS="-lhsa-runtime64"],
ROCM_CPPFLAGS="-I$with_rocm/include/hsa -D__HIP_PLATFORM_HCC__= -I$with_rocm/hip/include -I$with_rocm/hcc/include -I$with_rocm/hsa/include"
ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib -lhip_hcc -lhsa-runtime64"
ROCM_LIBS="-lhip_hcc -lhsa-runtime64"],
[AC_MSG_NOTICE([ROCm flags given ...])
ROCM_PARSE_FLAGS([with_rocm],
[ROCM_LIBS], [ROCM_LDFLAGS], [ROCM_CPPFLAGS])])
Expand Down
2 changes: 1 addition & 1 deletion src/tools/perf/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# See file LICENSE for terms.
#

SUBDIRS = cuda lib
SUBDIRS = cuda rocm lib
CC = $(UCX_PERFTEST_CC)

noinst_HEADERS = api/libperf.h
Expand Down
1 change: 1 addition & 0 deletions src/tools/perf/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ucx_perftest_modules=""
m4_include([src/tools/perf/lib/configure.m4])
m4_include([src/tools/perf/cuda/configure.m4])
m4_include([src/tools/perf/rocm/configure.m4])
AC_DEFINE_UNQUOTED([ucx_perftest_MODULES], ["${ucx_perftest_modules}"],
[Perftest loadable modules])

Expand Down
14 changes: 14 additions & 0 deletions src/tools/perf/perftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,15 @@ static void usage(const struct perftest_context *ctx, const char *program)
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] != NULL) {
printf(" cuda - NVIDIA GPU memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL) {
printf(" rocm - ROCm GPU memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL) {
printf(" cuda-managed - NVIDIA cuda managed/unified memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL) {
printf(" rocm-managed - AMD ROCm managed/unified memory\n");
}
printf(" -n <iters> number of iterations to run (%ld)\n", ctx->params.max_iter);
printf(" -w <iters> number of warm-up iterations (%zu)\n",
ctx->params.warmup_iter);
Expand Down Expand Up @@ -669,10 +675,18 @@ static ucs_status_t parse_test_params(ucx_perf_params_t *params, char opt, const
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_CUDA;
return UCS_OK;
} else if (!strcmp(optarg, "rocm") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM;
return UCS_OK;
} else if (!strcmp(optarg, "cuda-managed") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_CUDA_MANAGED;
return UCS_OK;
} else if (!strcmp(optarg, "rocm-managed") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED;
return UCS_OK;
}

ucs_error("Unsupported memory type: \"%s\"", optarg);
Expand Down
17 changes: 17 additions & 0 deletions src/tools/perf/rocm/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

if HAVE_ROCM

module_LTLIBRARIES = libucx_perftest_rocm.la
libucx_perftest_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(ROCM_CPPFLAGS) -D__HIP_PLATFORM_HCC__= -I/opt/rocm/hip/include -I/opt/rocm/hcc/include -I/opt/rocm/hsa/include
libucx_perftest_rocm_la_CFLAGS = $(BASE_CFLAGS) $(ROCM_CFLAGS) -D__HIP_PLATFORM_HCC__= -I/opt/rocm/hip/include -I/opt/rocm/hcc/include -I/opt/rocm/hsa/include
libucx_perftest_rocm_la_LDFLAGS = $(ROCM_LDFLAGS) -version-info $(SOVERSION)
libucx_perftest_rocm_la_SOURCES = rocm_alloc.c

include $(top_srcdir)/config/module.am

endif
11 changes: 11 additions & 0 deletions src/tools/perf/rocm/configure.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

UCX_CHECK_ROCM

AS_IF([test "x$rocm_happy" = "xyes"], [ucx_perftest_modules="${ucx_perftest_modules}:rocm"])

AC_CONFIG_FILES([src/tools/perf/rocm/Makefile])
189 changes: 189 additions & 0 deletions src/tools/perf/rocm/rocm_alloc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED.
paklui marked this conversation as resolved.
Show resolved Hide resolved
*
* See file LICENSE for terms.
*/

#include <tools/perf/lib/libperf_int.h>

#include "hip/hip_runtime.h"
#include <ucs/sys/compiler.h>


static ucs_status_t ucx_perf_rocm_init(ucx_perf_context_t *perf)
{
hipError_t hiperr;
unsigned group_index;
int num_gpus;
int gpu_index;

group_index = rte_call(perf, group_index);

hiperr = hipGetDeviceCount(&num_gpus);
if (hiperr != hipSuccess) {
return UCS_ERR_NO_DEVICE;
}

gpu_index = group_index % num_gpus;

hiperr = hipSetDevice(gpu_index);
if (hiperr != hipSuccess) {
return UCS_ERR_NO_DEVICE;
}

return UCS_OK;
}

static inline ucs_status_t ucx_perf_rocm_alloc(size_t length,
ucs_memory_type_t mem_type,
void **address_p)
{
hipError_t hiperr;

ucs_assert((mem_type == UCS_MEMORY_TYPE_ROCM) ||
(mem_type == UCS_MEMORY_TYPE_ROCM_MANAGED));

hiperr = ((mem_type == UCS_MEMORY_TYPE_ROCM) ?
hipMalloc(address_p, length) :
hipMallocManaged(address_p, length, hipMemAttachGlobal));
if (hiperr != hipSuccess) {
ucs_error("failed to allocate memory");
return UCS_ERR_NO_MEMORY;
}

return UCS_OK;
}

static ucs_status_t ucp_perf_rocm_alloc(const ucx_perf_context_t *perf, size_t length,
void **address_p, ucp_mem_h *memh_p,
int non_blk_flag)
{
return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM, address_p);
}

static ucs_status_t ucp_perf_rocm_alloc_managed(const ucx_perf_context_t *perf,
size_t length, void **address_p,
ucp_mem_h *memh_p, int non_blk_flag)
{
return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM_MANAGED, address_p);
}

static void ucp_perf_rocm_free(const ucx_perf_context_t *perf,
void *address, ucp_mem_h memh)
{
hipFree(address);
}

static inline ucs_status_t
uct_perf_rocm_alloc_reg_mem(const ucx_perf_context_t *perf,
size_t length,
ucs_memory_type_t mem_type,
unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
ucs_status_t status;

status = ucx_perf_rocm_alloc(length, mem_type, &alloc_mem->address);
if (status != UCS_OK) {
return status;
}

status = uct_md_mem_reg(perf->uct.md, alloc_mem->address,
length, flags, &alloc_mem->memh);
if (status != UCS_OK) {
hipFree(alloc_mem->address);
ucs_error("failed to register memory");
return status;
}

alloc_mem->mem_type = mem_type;
alloc_mem->md = perf->uct.md;

return UCS_OK;
}

static ucs_status_t uct_perf_rocm_alloc(const ucx_perf_context_t *perf,
size_t length, unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM,
flags, alloc_mem);
}

static ucs_status_t uct_perf_rocm_managed_alloc(const ucx_perf_context_t *perf,
size_t length, unsigned flags,
uct_allocated_memory_t *alloc_mem)
{
return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM_MANAGED,
flags, alloc_mem);
}

static void uct_perf_rocm_free(const ucx_perf_context_t *perf,
uct_allocated_memory_t *alloc_mem)
{
ucs_status_t status;

ucs_assert(alloc_mem->md == perf->uct.md);

status = uct_md_mem_dereg(perf->uct.md, alloc_mem->memh);
if (status != UCS_OK) {
ucs_error("failed to deregister memory");
}

hipFree(alloc_mem->address);
}

static void ucx_perf_rocm_memcpy(void *dst, ucs_memory_type_t dst_mem_type,
const void *src, ucs_memory_type_t src_mem_type,
size_t count)
{
hipError_t hiperr;

hiperr = hipMemcpy(dst, src, count, hipMemcpyDefault);
if (hiperr != hipSuccess) {
ucs_error("failed to copy memory: %s", hipGetErrorString(hiperr));
}
}

static void* ucx_perf_rocm_memset(void *dst, int value, size_t count)
{
hipError_t hiperr;

hiperr = hipMemset(dst, value, count);
if (hiperr != hipSuccess) {
ucs_error("failed to set memory: %s", hipGetErrorString(hiperr));
}

return dst;
}

UCS_STATIC_INIT {
static ucx_perf_allocator_t hip_allocator = {
.mem_type = UCS_MEMORY_TYPE_ROCM,
.init = ucx_perf_rocm_init,
.ucp_alloc = ucp_perf_rocm_alloc,
.ucp_free = ucp_perf_rocm_free,
.uct_alloc = uct_perf_rocm_alloc,
.uct_free = uct_perf_rocm_free,
.memcpy = ucx_perf_rocm_memcpy,
.memset = ucx_perf_rocm_memset
};
static ucx_perf_allocator_t hip_managed_allocator = {
.mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED,
.init = ucx_perf_rocm_init,
.ucp_alloc = ucp_perf_rocm_alloc_managed,
.ucp_free = ucp_perf_rocm_free,
.uct_alloc = uct_perf_rocm_managed_alloc,
.uct_free = uct_perf_rocm_free,
.memcpy = ucx_perf_rocm_memcpy,
.memset = ucx_perf_rocm_memset
};

ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = &hip_allocator;
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = &hip_managed_allocator;
}
UCS_STATIC_CLEANUP {
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = NULL;
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = NULL;

paklui marked this conversation as resolved.
Show resolved Hide resolved
}