From 8659677e6b94e8d9b69c5a566947c45df13b9cf3 Mon Sep 17 00:00:00 2001 From: dmitrygx Date: Wed, 1 Dec 2021 18:19:39 +0200 Subject: [PATCH] UCP/CORE: Fix memory cache lookup retrun value handling --- src/ucp/core/ucp_context.h | 28 +++++++++++---------------- src/ucs/memory/memtype_cache.c | 6 +++++- src/ucs/memory/memtype_cache.h | 4 +++- src/uct/cuda/cuda_copy/cuda_copy_ep.c | 8 ++++++-- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/ucp/core/ucp_context.h b/src/ucp/core/ucp_context.h index e9e3d0a01c8..92d7ce8e1c1 100644 --- a/src/ucp/core/ucp_context.h +++ b/src/ucp/core/ucp_context.h @@ -497,25 +497,19 @@ ucp_memory_detect_internal(ucp_context_h context, const void *address, } status = ucs_memtype_cache_lookup(address, length, mem_info); - if (status != UCS_ERR_NO_ELEM) { - if (ucs_likely(status != UCS_OK)) { - ucs_assert(status == UCS_ERR_NO_ELEM); - goto out_host_mem; - } - - if ((mem_info->type != UCS_MEMORY_TYPE_UNKNOWN) && - ((mem_info->sys_dev != UCS_SYS_DEVICE_ID_UNKNOWN))) { - return; - } - - /* Fall thru to slow-path memory type and system device detection by UCT - * memory domains. In any case, the memory type cache is not expected to - * return HOST memory type. - */ - ucs_assert(mem_info->type != UCS_MEMORY_TYPE_HOST); + if (ucs_likely(status == UCS_ERR_NO_ELEM)) { + goto out_host_mem; + } else if ((status == UCS_ERR_UNSUPPORTED) || + ((status == UCS_OK) && + ((mem_info->type == UCS_MEMORY_TYPE_UNKNOWN) || + (mem_info->sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN)))) { + ucp_memory_detect_slowpath(context, address, length, mem_info); + } else { + ucs_assertv(status == UCS_OK, "%s (%d)", ucs_status_string(status), + status); } - ucp_memory_detect_slowpath(context, address, length, mem_info); + /* Memory type and system device was detected successfully */ return; out_host_mem: diff --git a/src/ucs/memory/memtype_cache.c b/src/ucs/memory/memtype_cache.c index 23c87386661..f290ab413c7 100644 --- a/src/ucs/memory/memtype_cache.c +++ b/src/ucs/memory/memtype_cache.c @@ -325,7 +325,7 @@ UCS_PROFILE_FUNC(ucs_status_t, ucs_memtype_cache_lookup, ucs_status_t status; if (memtype_cache == NULL) { - return UCS_ERR_NO_ELEM; + return UCS_ERR_UNSUPPORTED; } pthread_rwlock_rdlock(&memtype_cache->lock); @@ -345,6 +345,10 @@ UCS_PROFILE_FUNC(ucs_status_t, ucs_memtype_cache_lookup, } status = UCS_OK; + /* The memory type cache is not expected to return HOST memory type */ + ucs_assertv(mem_info->type != UCS_MEMORY_TYPE_HOST, "%s (%d)", + ucs_memory_type_names[mem_info->type], mem_info->type); + out_unlock: pthread_rwlock_unlock(&memtype_cache->lock); return status; diff --git a/src/ucs/memory/memtype_cache.h b/src/ucs/memory/memtype_cache.h index 55d3bfe9529..8f0877060a2 100644 --- a/src/ucs/memory/memtype_cache.h +++ b/src/ucs/memory/memtype_cache.h @@ -60,7 +60,9 @@ struct ucs_memtype_cache { * means the memory type is an unknown non-host * memory, and should be detected in another way. * - * @return Error code. + * @return UCS_OK - an element was found and the memory info is valid. + * @return UCS_ERR_NO_ELEM - an element was not found. + * @return UCS_ERR_UNSUPPORTED - the memory type cache is disabled. */ ucs_status_t ucs_memtype_cache_lookup(const void *address, size_t size, ucs_memory_info_t *mem_info); diff --git a/src/uct/cuda/cuda_copy/cuda_copy_ep.c b/src/uct/cuda/cuda_copy/cuda_copy_ep.c index c3eba64ba98..7f6f8c10b82 100644 --- a/src/uct/cuda/cuda_copy/cuda_copy_ep.c +++ b/src/uct/cuda/cuda_copy/cuda_copy_ep.c @@ -81,8 +81,12 @@ uct_cuda_copy_get_mem_type(uct_md_h md, void *address, size_t length) ucs_status_t status; status = ucs_memtype_cache_lookup(address, length, &mem_info); - if ((status == UCS_ERR_NO_ELEM) || - ((mem_info.type == UCS_MEMORY_TYPE_UNKNOWN))) { + if (status == UCS_ERR_NO_ELEM) { + return UCS_MEMORY_TYPE_HOST; + } + + if ((status == UCS_ERR_UNSUPPORTED) || + (mem_info.type == UCS_MEMORY_TYPE_UNKNOWN)) { status = uct_cuda_base_detect_memory_type(md, address, length, &mem_info.type); if (status != UCS_OK) {