Skip to content

Commit

Permalink
Merge pull request #8330 from edgargabriel/pr/rocm-v1.13-fixes2
Browse files Browse the repository at this point in the history
GTEST/ROCM and UCM/ROCM: fixes for the v1.13 release
  • Loading branch information
yosefe authored Jun 21, 2022
2 parents ad4b171 + ee3aee9 commit a8ee387
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 63 deletions.
12 changes: 1 addition & 11 deletions src/ucm/rocm/rocmmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,24 +111,14 @@ hsa_status_t ucm_hsa_amd_memory_pool_allocate(
hsa_amd_memory_pool_t memory_pool, size_t size,
uint32_t flags, void** ptr)
{
ucs_memory_type_t type = UCS_MEMORY_TYPE_ROCM;
uint32_t pool_flags = 0;
hsa_status_t status;

status = hsa_amd_memory_pool_get_info(memory_pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
&pool_flags);
if (status == HSA_STATUS_SUCCESS &&
!(pool_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)) {
type = UCS_MEMORY_TYPE_ROCM_MANAGED;
}

ucm_event_enter();

status = ucm_orig_hsa_amd_memory_pool_allocate(memory_pool, size, flags, ptr);
if (status == HSA_STATUS_SUCCESS) {
ucm_trace("ucm_hsa_amd_memory_pool_allocate(ptr=%p size:%lu)", *ptr, size);
ucm_dispatch_mem_type_alloc(*ptr, size, type);
ucm_dispatch_mem_type_alloc(*ptr, size, UCS_MEMORY_TYPE_UNKNOWN);
}

ucm_event_leave();
Expand Down
55 changes: 54 additions & 1 deletion src/uct/rocm/copy/rocm_copy_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,61 @@ static uct_iface_ops_t uct_rocm_copy_iface_ops = {
.iface_is_reachable = uct_rocm_copy_iface_is_reachable,
};


static ucs_status_t
uct_rocm_copy_estimate_perf(uct_iface_h tl_iface, uct_perf_attr_t *perf_attr)
{
if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_BANDWIDTH) {
perf_attr->bandwidth.dedicated = 0;
if (!(perf_attr->field_mask & UCT_PERF_ATTR_FIELD_OPERATION)) {
perf_attr->bandwidth.shared = 0;
} else {
switch (perf_attr->operation) {
case UCT_EP_OP_GET_SHORT:
perf_attr->bandwidth.shared = 2000.0 * UCS_MBYTE;
break;
case UCT_EP_OP_GET_ZCOPY:
perf_attr->bandwidth.shared = 8000.0 * UCS_MBYTE;
break;
case UCT_EP_OP_PUT_SHORT:
perf_attr->bandwidth.shared = 10500.0 * UCS_MBYTE;
break;
case UCT_EP_OP_PUT_ZCOPY:
perf_attr->bandwidth.shared = 9500.0 * UCS_MBYTE;
break;
default:
perf_attr->bandwidth.shared = 0;
break;
}
}
}

if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_SEND_PRE_OVERHEAD) {
perf_attr->send_pre_overhead = 0;
}

if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_SEND_POST_OVERHEAD) {
perf_attr->send_post_overhead = 0;
}

if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_RECV_OVERHEAD) {
perf_attr->recv_overhead = 0;
}

if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_LATENCY) {
perf_attr->latency = ucs_linear_func_make(10e-6, 0);
}

if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_MAX_INFLIGHT_EPS) {
perf_attr->max_inflight_eps = SIZE_MAX;
}

return UCS_OK;
}


static uct_iface_internal_ops_t uct_rocm_copy_iface_internal_ops = {
.iface_estimate_perf = uct_base_iface_estimate_perf,
.iface_estimate_perf = uct_rocm_copy_estimate_perf,
.iface_vfs_refresh = (uct_iface_vfs_refresh_func_t)ucs_empty_function,
.ep_query = (uct_ep_query_func_t)ucs_empty_function_return_unsupported,
.ep_invalidate = (uct_ep_invalidate_func_t)ucs_empty_function_return_unsupported
Expand Down
22 changes: 16 additions & 6 deletions test/gtest/common/mem_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,22 @@ bool mem_buffer::is_gpu_supported()
bool mem_buffer::is_rocm_managed_supported()
{
#if HAVE_ROCM
int device_id, has_managed_mem;
return ((hipGetDevice(&device_id) == hipSuccess) &&
(hipDeviceGetAttribute(&has_managed_mem,
hipDeviceAttributeManagedMemory,
device_id) == hipSuccess) &&
has_managed_mem);
hipError_t ret;
void *dptr;
hipPointerAttribute_t attr;

ret = hipMallocManaged(&dptr, 64);
if (ret != hipSuccess) {
return false;
}

ret = hipPointerGetAttributes(&attr, dptr);
if (ret != hipSuccess) {
return false;
}

hipFree(dptr);
return attr.memoryType == hipMemoryTypeUnified;
#else
return false;
#endif
Expand Down
47 changes: 2 additions & 45 deletions test/gtest/ucm/rocm_hooks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class rocm_hooks : public ucs::test {
int expect_mem_type = UCS_MEMORY_TYPE_ROCM) {
ASSERT_EQ(ptr, alloc_event.mem_type.address);
ASSERT_EQ(size, alloc_event.mem_type.size);
ASSERT_EQ(expect_mem_type, alloc_event.mem_type.mem_type);
EXPECT_TRUE((alloc_event.mem_type.mem_type == expect_mem_type) ||
(alloc_event.mem_type.mem_type == UCS_MEMORY_TYPE_UNKNOWN));
}

void check_mem_free_events(void *ptr, size_t size,
Expand Down Expand Up @@ -148,47 +149,3 @@ UCS_TEST_F(rocm_hooks, test_hipMallocPitch) {
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events((void *)dptr, 0);
}

UCS_TEST_F(rocm_hooks, test_hip_Malloc_Free) {
hipError_t ret;
void *ptr, *ptr1;

/* small allocation */
ret = hipMalloc(&ptr, 64);
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, 64);

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, 64);

/* large allocation */
ret = hipMalloc(&ptr, (256 * UCS_MBYTE));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, (256 * UCS_MBYTE));

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, (256 * UCS_MBYTE));

/* multiple allocations, rocmfree in reverse order */
ret = hipMalloc(&ptr, (1 * UCS_MBYTE));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, (1 * UCS_MBYTE));

ret = hipMalloc(&ptr1, (1 * UCS_MBYTE));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr1, (1 * UCS_MBYTE));

ret = hipFree(ptr1);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr1, (1 * UCS_MBYTE));

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, (1 * UCS_MBYTE));

/* hipFree with NULL */
ret = hipFree(NULL);
ASSERT_EQ(ret, hipSuccess);
}

0 comments on commit a8ee387

Please sign in to comment.