From a049c271a7bb494cde5f38cf7f66725e1f13aae7 Mon Sep 17 00:00:00 2001 From: akolliasAMD Date: Tue, 14 Jun 2022 11:56:39 +0000 Subject: [PATCH 1/3] GTEST/UCM/ROCM: Fix is_rocm_managed_supported gtest (cherry picked from commit 0b6a34dc215f5869b9fe782b168b6ae0011e1b99) --- test/gtest/common/mem_buffer.cc | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/test/gtest/common/mem_buffer.cc b/test/gtest/common/mem_buffer.cc index ac8f55e651c..b3a4178e4f5 100644 --- a/test/gtest/common/mem_buffer.cc +++ b/test/gtest/common/mem_buffer.cc @@ -76,12 +76,22 @@ bool mem_buffer::is_gpu_supported() bool mem_buffer::is_rocm_managed_supported() { #if HAVE_ROCM - int device_id, has_managed_mem; - return ((hipGetDevice(&device_id) == hipSuccess) && - (hipDeviceGetAttribute(&has_managed_mem, - hipDeviceAttributeManagedMemory, - device_id) == hipSuccess) && - has_managed_mem); + hipError_t ret; + void *dptr; + hipPointerAttribute_t attr; + + ret = hipMallocManaged(&dptr, 64); + if (ret != hipSuccess) { + return false; + } + + ret = hipPointerGetAttributes(&attr, dptr); + if (ret != hipSuccess) { + return false; + } + + hipFree(dptr); + return attr.memoryType == hipMemoryTypeUnified; #else return false; #endif From 21ca9045d2f1cc837c125170c1724804b583f8ab Mon Sep 17 00:00:00 2001 From: Edgar Date: Sun, 22 May 2022 13:23:12 -0400 Subject: [PATCH 2/3] UCT/ROCM/COPY: implement estimate_perf function provide an implementation of estimate_performance for the internal ops. This helps pass a test in the gtest suite. (cherry picked from commit b38c71e94ccbbafbaa308f04ad2539425f345483) --- src/uct/rocm/copy/rocm_copy_iface.c | 55 ++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/uct/rocm/copy/rocm_copy_iface.c b/src/uct/rocm/copy/rocm_copy_iface.c index 71e8c19467f..513e9048888 100644 --- a/src/uct/rocm/copy/rocm_copy_iface.c +++ b/src/uct/rocm/copy/rocm_copy_iface.c @@ -130,8 +130,61 @@ static uct_iface_ops_t uct_rocm_copy_iface_ops = { .iface_is_reachable = uct_rocm_copy_iface_is_reachable, }; + +static ucs_status_t +uct_rocm_copy_estimate_perf(uct_iface_h tl_iface, uct_perf_attr_t *perf_attr) +{ + if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_BANDWIDTH) { + perf_attr->bandwidth.dedicated = 0; + if (!(perf_attr->field_mask & UCT_PERF_ATTR_FIELD_OPERATION)) { + perf_attr->bandwidth.shared = 0; + } else { + switch (perf_attr->operation) { + case UCT_EP_OP_GET_SHORT: + perf_attr->bandwidth.shared = 2000.0 * UCS_MBYTE; + break; + case UCT_EP_OP_GET_ZCOPY: + perf_attr->bandwidth.shared = 8000.0 * UCS_MBYTE; + break; + case UCT_EP_OP_PUT_SHORT: + perf_attr->bandwidth.shared = 10500.0 * UCS_MBYTE; + break; + case UCT_EP_OP_PUT_ZCOPY: + perf_attr->bandwidth.shared = 9500.0 * UCS_MBYTE; + break; + default: + perf_attr->bandwidth.shared = 0; + break; + } + } + } + + if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_SEND_PRE_OVERHEAD) { + perf_attr->send_pre_overhead = 0; + } + + if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_SEND_POST_OVERHEAD) { + perf_attr->send_post_overhead = 0; + } + + if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_RECV_OVERHEAD) { + perf_attr->recv_overhead = 0; + } + + if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_LATENCY) { + perf_attr->latency = ucs_linear_func_make(10e-6, 0); + } + + if (perf_attr->field_mask & UCT_PERF_ATTR_FIELD_MAX_INFLIGHT_EPS) { + perf_attr->max_inflight_eps = SIZE_MAX; + } + + return UCS_OK; +} + + static uct_iface_internal_ops_t uct_rocm_copy_iface_internal_ops = { - .iface_estimate_perf = uct_base_iface_estimate_perf, + .iface_estimate_perf = uct_rocm_copy_estimate_perf, .iface_vfs_refresh = (uct_iface_vfs_refresh_func_t)ucs_empty_function, .ep_query = (uct_ep_query_func_t)ucs_empty_function_return_unsupported, .ep_invalidate = (uct_ep_invalidate_func_t)ucs_empty_function_return_unsupported From ee3aee9cf4893360196a98162bb2cec699c4c8d7 Mon Sep 17 00:00:00 2001 From: Edgar Date: Sat, 4 Jun 2022 09:52:28 -0400 Subject: [PATCH 3/3] UCM/ROCM: delay memtype detection to md's Delay the memtype detection to rocm md. Otherwise the code path for the detection of rocm between ucm/rocm and uct/rocm can easily diverge and lead to unintended consequences. Fixes an issue observed with a testcase on rocm 5.1 with older linux kernels. (cherry picked from commit cfb0466b666881a6755a9722c60dc51c0fd2dc40) --- src/ucm/rocm/rocmmem.c | 12 +-------- test/gtest/ucm/rocm_hooks.cc | 47 ++---------------------------------- 2 files changed, 3 insertions(+), 56 deletions(-) diff --git a/src/ucm/rocm/rocmmem.c b/src/ucm/rocm/rocmmem.c index 89a9e94e5e9..c406729e553 100644 --- a/src/ucm/rocm/rocmmem.c +++ b/src/ucm/rocm/rocmmem.c @@ -111,24 +111,14 @@ hsa_status_t ucm_hsa_amd_memory_pool_allocate( hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void** ptr) { - ucs_memory_type_t type = UCS_MEMORY_TYPE_ROCM; - uint32_t pool_flags = 0; hsa_status_t status; - status = hsa_amd_memory_pool_get_info(memory_pool, - HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, - &pool_flags); - if (status == HSA_STATUS_SUCCESS && - !(pool_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)) { - type = UCS_MEMORY_TYPE_ROCM_MANAGED; - } - ucm_event_enter(); status = ucm_orig_hsa_amd_memory_pool_allocate(memory_pool, size, flags, ptr); if (status == HSA_STATUS_SUCCESS) { ucm_trace("ucm_hsa_amd_memory_pool_allocate(ptr=%p size:%lu)", *ptr, size); - ucm_dispatch_mem_type_alloc(*ptr, size, type); + ucm_dispatch_mem_type_alloc(*ptr, size, UCS_MEMORY_TYPE_UNKNOWN); } ucm_event_leave(); diff --git a/test/gtest/ucm/rocm_hooks.cc b/test/gtest/ucm/rocm_hooks.cc index 548656fcb36..ca8eee4b029 100644 --- a/test/gtest/ucm/rocm_hooks.cc +++ b/test/gtest/ucm/rocm_hooks.cc @@ -69,7 +69,8 @@ class rocm_hooks : public ucs::test { int expect_mem_type = UCS_MEMORY_TYPE_ROCM) { ASSERT_EQ(ptr, alloc_event.mem_type.address); ASSERT_EQ(size, alloc_event.mem_type.size); - ASSERT_EQ(expect_mem_type, alloc_event.mem_type.mem_type); + EXPECT_TRUE((alloc_event.mem_type.mem_type == expect_mem_type) || + (alloc_event.mem_type.mem_type == UCS_MEMORY_TYPE_UNKNOWN)); } void check_mem_free_events(void *ptr, size_t size, @@ -148,47 +149,3 @@ UCS_TEST_F(rocm_hooks, test_hipMallocPitch) { ASSERT_EQ(ret, hipSuccess); check_mem_free_events((void *)dptr, 0); } - -UCS_TEST_F(rocm_hooks, test_hip_Malloc_Free) { - hipError_t ret; - void *ptr, *ptr1; - - /* small allocation */ - ret = hipMalloc(&ptr, 64); - ASSERT_EQ(ret, hipSuccess); - check_mem_alloc_events(ptr, 64); - - ret = hipFree(ptr); - ASSERT_EQ(ret, hipSuccess); - check_mem_free_events(ptr, 64); - - /* large allocation */ - ret = hipMalloc(&ptr, (256 * UCS_MBYTE)); - ASSERT_EQ(ret, hipSuccess); - check_mem_alloc_events(ptr, (256 * UCS_MBYTE)); - - ret = hipFree(ptr); - ASSERT_EQ(ret, hipSuccess); - check_mem_free_events(ptr, (256 * UCS_MBYTE)); - - /* multiple allocations, rocmfree in reverse order */ - ret = hipMalloc(&ptr, (1 * UCS_MBYTE)); - ASSERT_EQ(ret, hipSuccess); - check_mem_alloc_events(ptr, (1 * UCS_MBYTE)); - - ret = hipMalloc(&ptr1, (1 * UCS_MBYTE)); - ASSERT_EQ(ret, hipSuccess); - check_mem_alloc_events(ptr1, (1 * UCS_MBYTE)); - - ret = hipFree(ptr1); - ASSERT_EQ(ret, hipSuccess); - check_mem_free_events(ptr1, (1 * UCS_MBYTE)); - - ret = hipFree(ptr); - ASSERT_EQ(ret, hipSuccess); - check_mem_free_events(ptr, (1 * UCS_MBYTE)); - - /* hipFree with NULL */ - ret = hipFree(NULL); - ASSERT_EQ(ret, hipSuccess); -}