-
Notifications
You must be signed in to change notification settings - Fork 423
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4587 from ROCmSoftwarePlatform/topic/sourav/add-r…
…ocm-perftest TOOLS/PERF: Enable ROCM memory type support for UCP perf tests
- Loading branch information
Showing
7 changed files
with
234 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# | ||
# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. | ||
# | ||
# See file LICENSE for terms. | ||
# | ||
|
||
if HAVE_HIP | ||
|
||
module_LTLIBRARIES = libucx_perftest_rocm.la | ||
libucx_perftest_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(HIP_CPPFLAGS) | ||
libucx_perftest_rocm_la_CFLAGS = $(BASE_CFLAGS) $(HIP_CFLAGS) | ||
libucx_perftest_rocm_la_LDFLAGS = $(HIP_LDFLAGS) $(HIP_LIBS) -version-info $(SOVERSION) | ||
libucx_perftest_rocm_la_SOURCES = rocm_alloc.c | ||
|
||
include $(top_srcdir)/config/module.am | ||
|
||
endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# | ||
# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. | ||
# | ||
# See file LICENSE for terms. | ||
# | ||
|
||
UCX_CHECK_ROCM | ||
|
||
AS_IF([test "x$rocm_happy" = "xyes"], [ucx_perftest_modules="${ucx_perftest_modules}:rocm"]) | ||
|
||
AC_CONFIG_FILES([src/tools/perf/rocm/Makefile]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
/** | ||
* Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. | ||
* | ||
* See file LICENSE for terms. | ||
*/ | ||
|
||
#include <tools/perf/lib/libperf_int.h> | ||
|
||
#include <hip/hip_runtime.h> | ||
#include <ucs/sys/compiler.h> | ||
|
||
|
||
static ucs_status_t ucx_perf_rocm_init(ucx_perf_context_t *perf) | ||
{ | ||
hipError_t ret; | ||
unsigned group_index; | ||
int num_gpus; | ||
int gpu_index; | ||
|
||
group_index = rte_call(perf, group_index); | ||
|
||
ret = hipGetDeviceCount(&num_gpus); | ||
if (ret != hipSuccess) { | ||
return UCS_ERR_NO_DEVICE; | ||
} | ||
|
||
gpu_index = group_index % num_gpus; | ||
|
||
ret = hipSetDevice(gpu_index); | ||
if (ret != hipSuccess) { | ||
return UCS_ERR_NO_DEVICE; | ||
} | ||
|
||
return UCS_OK; | ||
} | ||
|
||
static inline ucs_status_t ucx_perf_rocm_alloc(size_t length, | ||
ucs_memory_type_t mem_type, | ||
void **address_p) | ||
{ | ||
hipError_t ret; | ||
|
||
ucs_assert((mem_type == UCS_MEMORY_TYPE_ROCM) || | ||
(mem_type == UCS_MEMORY_TYPE_ROCM_MANAGED)); | ||
|
||
ret = ((mem_type == UCS_MEMORY_TYPE_ROCM) ? | ||
hipMalloc(address_p, length) : | ||
hipMallocManaged(address_p, length, hipMemAttachGlobal)); | ||
if (ret != hipSuccess) { | ||
ucs_error("failed to allocate memory"); | ||
return UCS_ERR_NO_MEMORY; | ||
} | ||
|
||
return UCS_OK; | ||
} | ||
|
||
static ucs_status_t ucp_perf_rocm_alloc(const ucx_perf_context_t *perf, size_t length, | ||
void **address_p, ucp_mem_h *memh_p, | ||
int non_blk_flag) | ||
{ | ||
return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM, address_p); | ||
} | ||
|
||
static ucs_status_t ucp_perf_rocm_alloc_managed(const ucx_perf_context_t *perf, | ||
size_t length, void **address_p, | ||
ucp_mem_h *memh_p, int non_blk_flag) | ||
{ | ||
return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM_MANAGED, address_p); | ||
} | ||
|
||
static void ucp_perf_rocm_free(const ucx_perf_context_t *perf, | ||
void *address, ucp_mem_h memh) | ||
{ | ||
hipFree(address); | ||
} | ||
|
||
static inline ucs_status_t | ||
uct_perf_rocm_alloc_reg_mem(const ucx_perf_context_t *perf, | ||
size_t length, | ||
ucs_memory_type_t mem_type, | ||
unsigned flags, | ||
uct_allocated_memory_t *alloc_mem) | ||
{ | ||
ucs_status_t status; | ||
|
||
status = ucx_perf_rocm_alloc(length, mem_type, &alloc_mem->address); | ||
if (status != UCS_OK) { | ||
return status; | ||
} | ||
|
||
status = uct_md_mem_reg(perf->uct.md, alloc_mem->address, | ||
length, flags, &alloc_mem->memh); | ||
if (status != UCS_OK) { | ||
hipFree(alloc_mem->address); | ||
ucs_error("failed to register memory"); | ||
return status; | ||
} | ||
|
||
alloc_mem->mem_type = mem_type; | ||
alloc_mem->md = perf->uct.md; | ||
|
||
return UCS_OK; | ||
} | ||
|
||
static ucs_status_t uct_perf_rocm_alloc(const ucx_perf_context_t *perf, | ||
size_t length, unsigned flags, | ||
uct_allocated_memory_t *alloc_mem) | ||
{ | ||
return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM, | ||
flags, alloc_mem); | ||
} | ||
|
||
static ucs_status_t uct_perf_rocm_managed_alloc(const ucx_perf_context_t *perf, | ||
size_t length, unsigned flags, | ||
uct_allocated_memory_t *alloc_mem) | ||
{ | ||
return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM_MANAGED, | ||
flags, alloc_mem); | ||
} | ||
|
||
static void uct_perf_rocm_free(const ucx_perf_context_t *perf, | ||
uct_allocated_memory_t *alloc_mem) | ||
{ | ||
ucs_status_t status; | ||
|
||
ucs_assert(alloc_mem->md == perf->uct.md); | ||
|
||
status = uct_md_mem_dereg(perf->uct.md, alloc_mem->memh); | ||
if (status != UCS_OK) { | ||
ucs_error("failed to deregister memory"); | ||
} | ||
|
||
hipFree(alloc_mem->address); | ||
} | ||
|
||
static void ucx_perf_rocm_memcpy(void *dst, ucs_memory_type_t dst_mem_type, | ||
const void *src, ucs_memory_type_t src_mem_type, | ||
size_t count) | ||
{ | ||
hipError_t ret; | ||
|
||
ret = hipMemcpy(dst, src, count, hipMemcpyDefault); | ||
if (ret != hipSuccess) { | ||
ucs_error("failed to copy memory: %s", hipGetErrorString(ret)); | ||
} | ||
} | ||
|
||
static void* ucx_perf_rocm_memset(void *dst, int value, size_t count) | ||
{ | ||
hipError_t ret; | ||
|
||
ret = hipMemset(dst, value, count); | ||
if (ret != hipSuccess) { | ||
ucs_error("failed to set memory: %s", hipGetErrorString(ret)); | ||
} | ||
|
||
return dst; | ||
} | ||
|
||
UCS_STATIC_INIT { | ||
static ucx_perf_allocator_t rocm_allocator = { | ||
.mem_type = UCS_MEMORY_TYPE_ROCM, | ||
.init = ucx_perf_rocm_init, | ||
.ucp_alloc = ucp_perf_rocm_alloc, | ||
.ucp_free = ucp_perf_rocm_free, | ||
.uct_alloc = uct_perf_rocm_alloc, | ||
.uct_free = uct_perf_rocm_free, | ||
.memcpy = ucx_perf_rocm_memcpy, | ||
.memset = ucx_perf_rocm_memset | ||
}; | ||
static ucx_perf_allocator_t rocm_managed_allocator = { | ||
.mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED, | ||
.init = ucx_perf_rocm_init, | ||
.ucp_alloc = ucp_perf_rocm_alloc_managed, | ||
.ucp_free = ucp_perf_rocm_free, | ||
.uct_alloc = uct_perf_rocm_managed_alloc, | ||
.uct_free = uct_perf_rocm_free, | ||
.memcpy = ucx_perf_rocm_memcpy, | ||
.memset = ucx_perf_rocm_memset | ||
}; | ||
|
||
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = &rocm_allocator; | ||
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = &rocm_managed_allocator; | ||
} | ||
UCS_STATIC_CLEANUP { | ||
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = NULL; | ||
ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = NULL; | ||
|
||
} |