From 57c8744358b74b31863d8ccb9d896db2fb4ae3db Mon Sep 17 00:00:00 2001 From: Devendar Bureddy Date: Thu, 15 Nov 2018 20:36:17 +0200 Subject: [PATCH 1/3] UCT/GDR_COPY: align address to GPU PAGE SIZE when rcache is turned off --- src/uct/cuda/gdr_copy/gdr_copy_md.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/uct/cuda/gdr_copy/gdr_copy_md.c b/src/uct/cuda/gdr_copy/gdr_copy_md.c index d79acd4d505..3ffa1303e46 100644 --- a/src/uct/cuda/gdr_copy/gdr_copy_md.c +++ b/src/uct/cuda/gdr_copy/gdr_copy_md.c @@ -178,8 +178,7 @@ static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t unsigned flags, uct_mem_h *memh_p) { uct_gdr_copy_mem_t *mem_hndl = NULL; - size_t reg_size; - void *ptr; + uintptr_t start, end; ucs_status_t status; mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_t), "gdr_copy handle"); @@ -188,10 +187,10 @@ static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t return UCS_ERR_NO_MEMORY; } - reg_size = (length + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK; - ptr = (void *) ((uintptr_t)address & GPU_PAGE_MASK); + start = ucs_align_down_pow2((uintptr_t)address, GPU_PAGE_SIZE); + end = ucs_align_up_pow2((uintptr_t)address + length, GPU_PAGE_SIZE); - status = uct_gdr_copy_mem_reg_internal(uct_md, ptr, reg_size, 0, mem_hndl); + status = uct_gdr_copy_mem_reg_internal(uct_md, (void *)start, (end - start), 0, mem_hndl); if (status != UCS_OK) { ucs_free(mem_hndl); return status; From 3b7f72e5eee2fece8c6cbc194ce878c93fc7cd0d Mon Sep 17 00:00:00 2001 From: Devendar Bureddy Date: Thu, 15 Nov 2018 20:53:25 +0200 Subject: [PATCH 2/3] UCT/GDR_COPY: avoid registering whole allocation addr range --- src/uct/cuda/gdr_copy/gdr_copy_md.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/uct/cuda/gdr_copy/gdr_copy_md.c b/src/uct/cuda/gdr_copy/gdr_copy_md.c index 3ffa1303e46..e18300c1116 100644 --- a/src/uct/cuda/gdr_copy/gdr_copy_md.c +++ b/src/uct/cuda/gdr_copy/gdr_copy_md.c @@ -282,17 +282,8 @@ uct_gdr_copy_mem_rcache_reg(uct_md_h uct_md, void *address, size_t length, ucs_rcache_region_t *rregion; ucs_status_t status; uct_gdr_copy_mem_t *memh; - CUdeviceptr d_ptr; - size_t d_length; - - status = UCT_CUDADRV_FUNC(cuMemGetAddressRange(&d_ptr, &d_length, - (CUdeviceptr)address)); - if (status != UCS_OK) { - return status; - } - - status = ucs_rcache_get(md->rcache, (void *)d_ptr, d_length, PROT_READ|PROT_WRITE, + status = ucs_rcache_get(md->rcache, (void *)address, length, PROT_READ|PROT_WRITE, &flags, &rregion); if (status != UCS_OK) { return status; From bbd0058360ca31bc67f1bc6aff5665e4f257cd3e Mon Sep 17 00:00:00 2001 From: Devendar Bureddy Date: Fri, 16 Nov 2018 07:46:47 +0200 Subject: [PATCH 3/3] UCT/GDR_COPY: Fix review commnets --- src/uct/cuda/gdr_copy/gdr_copy_md.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/uct/cuda/gdr_copy/gdr_copy_md.c b/src/uct/cuda/gdr_copy/gdr_copy_md.c index e18300c1116..10aea7d28b9 100644 --- a/src/uct/cuda/gdr_copy/gdr_copy_md.c +++ b/src/uct/cuda/gdr_copy/gdr_copy_md.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -178,7 +179,7 @@ static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t unsigned flags, uct_mem_h *memh_p) { uct_gdr_copy_mem_t *mem_hndl = NULL; - uintptr_t start, end; + void *start, *end; ucs_status_t status; mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_t), "gdr_copy handle"); @@ -187,10 +188,11 @@ static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t return UCS_ERR_NO_MEMORY; } - start = ucs_align_down_pow2((uintptr_t)address, GPU_PAGE_SIZE); - end = ucs_align_up_pow2((uintptr_t)address + length, GPU_PAGE_SIZE); + start = ucs_align_down_pow2_ptr(address, GPU_PAGE_SIZE); + end = ucs_align_up_pow2_ptr(address + length, GPU_PAGE_SIZE); + ucs_assert_always(start <= end); - status = uct_gdr_copy_mem_reg_internal(uct_md, (void *)start, (end - start), 0, mem_hndl); + status = uct_gdr_copy_mem_reg_internal(uct_md, start, end - start, 0, mem_hndl); if (status != UCS_OK) { ucs_free(mem_hndl); return status;