diff --git a/src/ucp/core/ucp_context.c b/src/ucp/core/ucp_context.c index 9de316690ca..119385fe7eb 100644 --- a/src/ucp/core/ucp_context.c +++ b/src/ucp/core/ucp_context.c @@ -47,7 +47,7 @@ static const char * ucp_rndv_modes[] = { uct_memory_type_t ucm_to_uct_mem_type_map[] = { [UCM_MEM_TYPE_CUDA] = UCT_MD_MEM_TYPE_CUDA, - [UCM_MEM_TYPE_CUDA_MANAGED] = UCT_MD_MEM_TYPE_HOST + [UCM_MEM_TYPE_CUDA_MANAGED] = UCT_MD_MEM_TYPE_CUDA_MANAGED }; static ucs_config_field_t ucp_config_table[] = { diff --git a/src/ucp/core/ucp_mm.h b/src/ucp/core/ucp_mm.h index e986a511164..f098eb6a30d 100644 --- a/src/ucp/core/ucp_mm.h +++ b/src/ucp/core/ucp_mm.h @@ -175,5 +175,6 @@ ucp_memh2uct(ucp_mem_h memh, ucp_md_index_t md_idx) }) #define UCP_MEM_IS_HOST(_mem_type) ((_mem_type) == UCT_MD_MEM_TYPE_HOST) +#define UCP_MEM_IS_CUDA_MANAGED(_mem_type) ((_mem_type) == UCT_MD_MEM_TYPE_CUDA_MANAGED) #endif diff --git a/src/ucp/core/ucp_request.inl b/src/ucp/core/ucp_request.inl index 678dee9e6a0..2d67db86e57 100644 --- a/src/ucp/core/ucp_request.inl +++ b/src/ucp/core/ucp_request.inl @@ -428,7 +428,8 @@ ucp_request_recv_data_unpack(ucp_request_t *req, const void *data, switch (req->recv.datatype & UCP_DATATYPE_CLASS_MASK) { case UCP_DATATYPE_CONTIG: - if (ucs_likely(UCP_MEM_IS_HOST(req->recv.mem_type))) { + if ((ucs_likely(UCP_MEM_IS_HOST(req->recv.mem_type))) || + (ucs_likely(UCP_MEM_IS_CUDA_MANAGED(req->recv.mem_type)))) { UCS_PROFILE_NAMED_CALL("memcpy_recv", memcpy, req->recv.buffer + offset, data, length); } else { diff --git a/src/ucp/dt/dt.c b/src/ucp/dt/dt.c index e8da59d55cb..1c0fa8484ed 100644 --- a/src/ucp/dt/dt.c +++ b/src/ucp/dt/dt.c @@ -102,7 +102,8 @@ size_t ucp_dt_pack(ucp_worker_h worker, ucp_datatype_t datatype, switch (datatype & UCP_DATATYPE_CLASS_MASK) { case UCP_DATATYPE_CONTIG: - if (ucs_likely(UCP_MEM_IS_HOST(mem_type))) { + if ((ucs_likely(UCP_MEM_IS_HOST(mem_type))) || + (ucs_likely(UCP_MEM_IS_CUDA_MANAGED(mem_type)))) { UCS_PROFILE_CALL(memcpy, dest, src + state->offset, length); } else { ucp_mem_type_pack(worker, dest, src + state->offset, length, mem_type); diff --git a/src/ucp/dt/dt.inl b/src/ucp/dt/dt.inl index ec6f8e38e50..99f8f31d2be 100644 --- a/src/ucp/dt/dt.inl +++ b/src/ucp/dt/dt.inl @@ -56,7 +56,8 @@ ucp_dt_unpack_only(ucp_worker_h worker, void *buffer, size_t count, ucs_unlikely(length > (buffer_size = ucp_contig_dt_length(datatype, count)))) { goto err_truncated; } - if (ucs_likely(UCP_MEM_IS_HOST(mem_type))) { + if (ucs_likely(UCP_MEM_IS_HOST(mem_type)) || + (ucs_likely(UCP_MEM_IS_CUDA_MANAGED(mem_type)))) { UCS_PROFILE_NAMED_CALL("memcpy_recv", memcpy, buffer, data, length); } else { ucp_mem_type_unpack(worker, buffer, data, length, mem_type); diff --git a/src/uct/api/uct.h b/src/uct/api/uct.h index 64251f0dc17..c08db1ce7ea 100644 --- a/src/uct/api/uct.h +++ b/src/uct/api/uct.h @@ -414,6 +414,7 @@ enum { typedef enum { UCT_MD_MEM_TYPE_HOST = 0, /**< Default system memory */ UCT_MD_MEM_TYPE_CUDA, /**< NVIDIA CUDA memory */ + UCT_MD_MEM_TYPE_CUDA_MANAGED, /**< NVIDIA CUDA managed (or unified) memory*/ UCT_MD_MEM_TYPE_LAST } uct_memory_type_t; diff --git a/test/gtest/uct/test_md.cc b/test/gtest/uct/test_md.cc index eb9e451f11c..76405aaf46e 100644 --- a/test/gtest/uct/test_md.cc +++ b/test/gtest/uct/test_md.cc @@ -24,7 +24,7 @@ extern "C" { #include #endif -std::string const test_md::mem_types[] = {"host", "cuda"}; +std::string const test_md::mem_types[] = {"host", "cuda", "cuda-managed"}; void* test_md::alloc_thread(void *arg) {