Skip to content

Commit

Permalink
UCM/ROCM: delay memtype detection to md's
Browse files Browse the repository at this point in the history
Delay the memtype detection to rocm md. Otherwise
the code path for the detection of rocm between
ucm/rocm and uct/rocm can easily diverge and lead to unintended
consequences.

Fixes an issue observed with a testcase on rocm 5.1 with older linux kernels.

(cherry picked from commit cfb0466)
  • Loading branch information
edgargabriel committed Jun 20, 2022
1 parent 21ca904 commit ee3aee9
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 56 deletions.
12 changes: 1 addition & 11 deletions src/ucm/rocm/rocmmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,24 +111,14 @@ hsa_status_t ucm_hsa_amd_memory_pool_allocate(
hsa_amd_memory_pool_t memory_pool, size_t size,
uint32_t flags, void** ptr)
{
ucs_memory_type_t type = UCS_MEMORY_TYPE_ROCM;
uint32_t pool_flags = 0;
hsa_status_t status;

status = hsa_amd_memory_pool_get_info(memory_pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS,
&pool_flags);
if (status == HSA_STATUS_SUCCESS &&
!(pool_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)) {
type = UCS_MEMORY_TYPE_ROCM_MANAGED;
}

ucm_event_enter();

status = ucm_orig_hsa_amd_memory_pool_allocate(memory_pool, size, flags, ptr);
if (status == HSA_STATUS_SUCCESS) {
ucm_trace("ucm_hsa_amd_memory_pool_allocate(ptr=%p size:%lu)", *ptr, size);
ucm_dispatch_mem_type_alloc(*ptr, size, type);
ucm_dispatch_mem_type_alloc(*ptr, size, UCS_MEMORY_TYPE_UNKNOWN);
}

ucm_event_leave();
Expand Down
47 changes: 2 additions & 45 deletions test/gtest/ucm/rocm_hooks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class rocm_hooks : public ucs::test {
int expect_mem_type = UCS_MEMORY_TYPE_ROCM) {
ASSERT_EQ(ptr, alloc_event.mem_type.address);
ASSERT_EQ(size, alloc_event.mem_type.size);
ASSERT_EQ(expect_mem_type, alloc_event.mem_type.mem_type);
EXPECT_TRUE((alloc_event.mem_type.mem_type == expect_mem_type) ||
(alloc_event.mem_type.mem_type == UCS_MEMORY_TYPE_UNKNOWN));
}

void check_mem_free_events(void *ptr, size_t size,
Expand Down Expand Up @@ -148,47 +149,3 @@ UCS_TEST_F(rocm_hooks, test_hipMallocPitch) {
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events((void *)dptr, 0);
}

UCS_TEST_F(rocm_hooks, test_hip_Malloc_Free) {
hipError_t ret;
void *ptr, *ptr1;

/* small allocation */
ret = hipMalloc(&ptr, 64);
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, 64);

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, 64);

/* large allocation */
ret = hipMalloc(&ptr, (256 * UCS_MBYTE));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, (256 * UCS_MBYTE));

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, (256 * UCS_MBYTE));

/* multiple allocations, rocmfree in reverse order */
ret = hipMalloc(&ptr, (1 * UCS_MBYTE));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr, (1 * UCS_MBYTE));

ret = hipMalloc(&ptr1, (1 * UCS_MBYTE));
ASSERT_EQ(ret, hipSuccess);
check_mem_alloc_events(ptr1, (1 * UCS_MBYTE));

ret = hipFree(ptr1);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr1, (1 * UCS_MBYTE));

ret = hipFree(ptr);
ASSERT_EQ(ret, hipSuccess);
check_mem_free_events(ptr, (1 * UCS_MBYTE));

/* hipFree with NULL */
ret = hipFree(NULL);
ASSERT_EQ(ret, hipSuccess);
}

0 comments on commit ee3aee9

Please sign in to comment.