From 1b06d65a9e03b9eb45042d103bda68aa47aed518 Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Mon, 14 Feb 2022 01:28:37 +0200 Subject: [PATCH 1/2] UCP/RNDV: Disable put-pipeline when sender side is host memory --- src/ucp/rndv/rndv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ucp/rndv/rndv.c b/src/ucp/rndv/rndv.c index a90ed70dd6c..b2b388c0169 100644 --- a/src/ucp/rndv/rndv.c +++ b/src/ucp/rndv/rndv.c @@ -116,10 +116,15 @@ static int ucp_rndv_is_recv_pipeline_needed(ucp_request_t *rndv_req, static UCS_F_ALWAYS_INLINE int ucp_rndv_is_put_pipeline_needed(uintptr_t remote_address, size_t length, + const void *rkey_buf, const ucp_ep_rndv_zcopy_config_t *get_zcopy, const ucp_ep_rndv_zcopy_config_t *put_zcopy, int is_get_zcopy_failed) { + if (ucp_rkey_packed_mem_type(rkey_buf) == UCS_MEMORY_TYPE_HOST) { + return 0; + } + /* Fallback to PUT pipeline if: */ return /* Remote mem type is non-HOST memory OR can't do GET ZCOPY */ ((remote_address == 0) || (get_zcopy->max == 0) || @@ -1507,8 +1512,8 @@ UCS_PROFILE_FUNC_VOID(ucp_rndv_receive, (worker, rreq, rndv_rts_hdr, rkey_buf), put_zcopy = &ep_config->rndv.put_zcopy; ucp_rndv_recv_data_init(rreq, rndv_rts_hdr->size); if (ucp_rndv_is_put_pipeline_needed(rndv_rts_hdr->address, - rndv_rts_hdr->size, get_zcopy, - put_zcopy, + rndv_rts_hdr->size, rkey_buf, + get_zcopy, put_zcopy, is_get_zcopy_failed)) { /* send FRAG RTR for sender to PUT the fragment. */ ucp_rndv_send_frag_rtr(worker, rndv_req, rreq, rndv_rts_hdr); From c6b02854210291465f41a52f2e7d5fd37517fa75 Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Mon, 14 Feb 2022 16:24:44 +0200 Subject: [PATCH 2/2] NEWS: Add host->cuda pipeline fix --- NEWS | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS b/NEWS index 522af96714b..a7757750067 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,7 @@ * Fixed continuously triggering wakeup fd when keepalive is used * Fixed memtype cache fallback when memory hooks are not installed * Fixed parsing header flags of worker address +* Fixed pipeline protocol when sending from host memory to GPU memory ### Important changes * If Cuda memory hooks on driver API cannot be installed, memory type cache and memory registration cache will be disabled. This may lead to lower performance