From b4cbd74fec28152158d4b1eef48241d0e67a619f Mon Sep 17 00:00:00 2001 From: Mikhail Brinskii Date: Mon, 16 Dec 2019 20:55:00 +0200 Subject: [PATCH] UCP/TAG: Tag offload bounce buffer optimization --- src/ucp/tag/offload.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ucp/tag/offload.c b/src/ucp/tag/offload.c index b56398cc511..44bb68c5300 100644 --- a/src/ucp/tag/offload.c +++ b/src/ucp/tag/offload.c @@ -256,7 +256,10 @@ ucp_tag_offload_do_post(ucp_request_t *req) mdi = context->tl_rscs[wiface->rsc_index].md_index; - if (ucs_unlikely(length >= worker->tm.offload.zcopy_thresh)) { + /* Do not use bounce buffer for receives to GPU memory to avoid + * cost of h2d transfers (i.e. cuda_copy from staging to dest memory). */ + if ((length >= worker->tm.offload.zcopy_thresh) || + !UCP_MEM_IS_ACCESSIBLE_FROM_CPU(req->recv.mem_type)) { if (length > wiface->attr.cap.tag.recv.max_zcopy) { /* Post maximum allowed length. If sender sends smaller message * (which is allowed per MPI standard), max recv should fit it.