Skip to content

Commit

Permalink
Merge pull request openucx#8 from bureddy/gdr-copy-uct
Browse files Browse the repository at this point in the history
gdr_copy uct implementation
  • Loading branch information
bureddy authored Sep 12, 2017
2 parents eeea863 + 7efe846 commit b2a20af
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 46 deletions.
31 changes: 20 additions & 11 deletions src/uct/cuda/gdr_copy/gdr_copy_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

#include "gdr_copy_ep.h"
#include "gdr_copy_md.h"
#include "gdr_copy_iface.h"

#include <uct/base/uct_log.h>
Expand All @@ -31,17 +32,25 @@ UCS_CLASS_DEFINE_NEW_FUNC(uct_gdr_copy_ep_t, uct_ep_t, uct_iface_t*,
UCS_CLASS_DEFINE_DELETE_FUNC(uct_gdr_copy_ep_t, uct_ep_t);


ucs_status_t uct_gdr_copy_ep_put_short(uct_ep_h tl_ep, const void *buffer,
unsigned length, uint64_t remote_addr,
uct_rkey_t rkey)
ucs_status_t uct_gdr_copy_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt,
uint64_t remote_addr, uct_rkey_t rkey,
uct_completion_t *comp)
{
/* Code for PUT here */
return UCS_ERR_UNSUPPORTED;
}
uct_gdr_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_gdr_copy_iface_t);
uct_gdr_copy_md_t *md = (uct_gdr_copy_md_t *)iface->super.md;
uct_gdr_copy_mem_h *mem_hndl = (uct_gdr_copy_mem_h *) rkey;
gdr_info_t gdr_info;
size_t bar_off;

ucs_status_t uct_gdr_copy_ep_am_short(uct_ep_h ep, uint8_t id, uint64_t header,
const void *payload, unsigned length)
{
return UCS_ERR_UNSUPPORTED;
}
assert(iovcnt == 1);

if (gdr_get_info(md->gdrcpy_ctx, mem_hndl->mh, &gdr_info) != 0) {
ucs_error("gdr_get_info failed. ");
return UCS_ERR_IO_ERROR;
}
bar_off = remote_addr - gdr_info.va;

gdr_copy_to_bar ((mem_hndl->bar_ptr + bar_off), iov[0].buffer, iov[0].length);

return UCS_OK;
}
9 changes: 5 additions & 4 deletions src/uct/cuda/gdr_copy/gdr_copy_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ UCS_CLASS_DECLARE_NEW_FUNC(uct_gdr_copy_ep_t, uct_ep_t, uct_iface_t*,
const uct_device_addr_t *, const uct_iface_addr_t *);
UCS_CLASS_DECLARE_DELETE_FUNC(uct_gdr_copy_ep_t, uct_ep_t);

ucs_status_t uct_gdr_copy_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length,
uint64_t remote_addr, uct_rkey_t rkey);
ucs_status_t uct_gdr_copy_ep_am_short(uct_ep_h ep, uint8_t id, uint64_t header,
const void *payload, unsigned length);
ucs_status_t uct_gdr_copy_ep_put_zcopy(uct_ep_h tl_ep,
const uct_iov_t *iov, size_t iovcnt,
uint64_t remote_addr, uct_rkey_t rkey,
uct_completion_t *comp);

#endif
5 changes: 2 additions & 3 deletions src/uct/cuda/gdr_copy/gdr_copy_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ static ucs_status_t uct_gdr_copy_iface_query(uct_iface_h iface,
}

static uct_iface_ops_t uct_gdr_copy_iface_ops = {
.ep_put_short = uct_gdr_copy_ep_put_short,
.ep_am_short = uct_gdr_copy_ep_am_short,
.ep_put_zcopy = uct_gdr_copy_ep_put_zcopy,
.ep_flush = uct_base_ep_flush,
.ep_fence = uct_base_ep_fence,
.ep_create_connected = UCS_CLASS_NEW_FUNC_NAME(uct_gdr_copy_ep_t),
Expand Down Expand Up @@ -160,4 +159,4 @@ UCT_TL_COMPONENT_DEFINE(uct_gdr_copy_tl,
"CUDA_",
uct_gdr_copy_iface_config_table,
uct_gdr_copy_iface_config_t);
UCT_MD_REGISTER_TL(&uct_gdr_copy_md, &uct_gdr_copy_tl);
UCT_MD_REGISTER_TL(&uct_gdr_copy_md_component, &uct_gdr_copy_tl);
1 change: 0 additions & 1 deletion src/uct/cuda/gdr_copy/gdr_copy_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,4 @@ typedef struct uct_gdr_copy_iface_config {
uct_iface_config_t super;
} uct_gdr_copy_iface_config_t;


#endif
116 changes: 91 additions & 25 deletions src/uct/cuda/gdr_copy/gdr_copy_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
#include <cuda_runtime.h>
#include <cuda.h>

static ucs_config_field_t uct_gdr_copy_md_config_table[] = {
{"", "", NULL,
ucs_offsetof(uct_gdr_copy_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)},

{NULL}
};

static ucs_status_t uct_gdr_copy_md_query(uct_md_h md, uct_md_attr_t *md_attr)
{
Expand Down Expand Up @@ -50,32 +56,69 @@ static ucs_status_t uct_gdr_copy_rkey_release(uct_md_component_t *mdc, uct_rkey_
return UCS_OK;
}

static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h md, void *address, size_t length,

static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t length,
unsigned flags, uct_mem_h *memh_p)
{
ucs_status_t rc;
uct_mem_h * mem_hndl = NULL;
mem_hndl = ucs_malloc(sizeof(void *), "gdr_copy handle for test passing");
uct_gdr_copy_mem_h * mem_hndl = NULL;
uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t);
gdr_mh_t mh;
size_t reg_size;
void *bar_ptr;

CUdeviceptr d_ptr = ((CUdeviceptr )(char *) address);

mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_h), "gdr_copy handle");
if (NULL == mem_hndl) {
ucs_error("Failed to allocate memory for gni_mem_handle_t");
rc = UCS_ERR_NO_MEMORY;
goto mem_err;
ucs_error("Failed to allocate memory for uct_gdr_copy_mem_h");
return UCS_ERR_NO_MEMORY;
}

reg_size = (length + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;

if (gdr_pin_buffer(md->gdrcpy_ctx, (d_ptr & GPU_PAGE_MASK), reg_size, 0, 0, &mh) != 0) {
ucs_error("gdr_pin_buffer Failed. length :%lu pin_size:%lu ", length, reg_size);
return UCS_ERR_IO_ERROR;

}
if (mh == 0) {
ucs_error("gdr_pin_buffer Failed. length :%lu pin_size:%lu ", length, reg_size);
return UCS_ERR_IO_ERROR;
}

if (gdr_map(md->gdrcpy_ctx, mh, &bar_ptr, reg_size) !=0) {
ucs_error("gdr_map failed. length :%lu pin_size:%lu ", length, reg_size);
return UCS_ERR_IO_ERROR;
}

mem_hndl->mh = mh;
mem_hndl->bar_ptr = bar_ptr;
mem_hndl->reg_size = reg_size;

*memh_p = mem_hndl;
return UCS_OK;
mem_err:
return rc;
}

static ucs_status_t uct_gdr_copy_mem_dereg(uct_md_h md, uct_mem_h memh)
static ucs_status_t uct_gdr_copy_mem_dereg(uct_md_h uct_md, uct_mem_h memh)
{
ucs_free(memh);
uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t);
uct_gdr_copy_mem_h *mem_hndl = memh;

if (gdr_unmap(md->gdrcpy_ctx, mem_hndl->mh, mem_hndl->bar_ptr, mem_hndl->reg_size) !=0) {
ucs_error("gdr_unmap Failed. unpin_size:%lu ", mem_hndl->reg_size);
return UCS_ERR_IO_ERROR;
}
if (gdr_unpin_buffer(md->gdrcpy_ctx, mem_hndl->mh) !=0) {
ucs_error("gdr_unpin_buffer failed ");
return UCS_ERR_IO_ERROR;
}

free(mem_hndl);
return UCS_OK;
}

static ucs_status_t uct_gdr_copy_mem_detect(uct_md_h md, void *addr, uint64_t *dn_mask)
{
#if HAVE_CUDA
int memory_type;
cudaError_t cuda_err = cudaSuccess;
struct cudaPointerAttributes attributes;
Expand All @@ -100,40 +143,63 @@ static ucs_status_t uct_gdr_copy_mem_detect(uct_md_h md, void *addr, uint64_t *d
} else if (memory_type == CU_MEMORYTYPE_DEVICE) {
(*dn_mask) = UCT_MD_ADDR_DOMAIN_CUDA;
}
#else
(*dn_mask) = 0;
#endif

return UCS_OK;
}

static ucs_status_t uct_gdr_copy_query_md_resources(uct_md_resource_desc_t **resources_p,
unsigned *num_resources_p)
{
return uct_single_md_resource(&uct_gdr_copy_md, resources_p, num_resources_p);

return uct_single_md_resource(&uct_gdr_copy_md_component, resources_p, num_resources_p);
}

static void uct_gdr_copy_md_close(uct_md_h uct_md)
{
uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t);

if (gdr_close(md->gdrcpy_ctx) != 0) {
ucs_error("Failed to close gdrcopy");
}

ucs_free(md);
}

static ucs_status_t uct_gdr_copy_md_open(const char *md_name, const uct_md_config_t *md_config,
uct_md_h *md_p)
{
uct_gdr_copy_md_t *md;

static uct_md_ops_t md_ops = {
.close = (void*)ucs_empty_function,
.close = uct_gdr_copy_md_close,
.query = uct_gdr_copy_md_query,
.mkey_pack = uct_gdr_copy_mkey_pack,
.mem_reg = uct_gdr_copy_mem_reg,
.mem_dereg = uct_gdr_copy_mem_dereg,
.mem_detect = uct_gdr_copy_mem_detect
};
static uct_md_t md = {
.ops = &md_ops,
.component = &uct_gdr_copy_md
};

*md_p = &md;
md = ucs_malloc(sizeof(uct_gdr_copy_md_t), "uct_gdr_copy_md_t");
if (NULL == md) {
ucs_error("Failed to allocate memory for uct_gdr_copy_md_t");
return UCS_ERR_NO_MEMORY;
}

md->super.ops = &md_ops;
md->super.component = &uct_gdr_copy_md_component;

md->gdrcpy_ctx = gdr_open();
if (md->gdrcpy_ctx == (void *)0) {
ucs_error("Failed to open gdrcopy ");
return UCS_ERR_IO_ERROR;
}

*md_p = (uct_md_h) md;
return UCS_OK;
}

UCT_MD_COMPONENT_DEFINE(uct_gdr_copy_md, UCT_CUDA_MD_NAME,
UCT_MD_COMPONENT_DEFINE(uct_gdr_copy_md_component, UCT_GDR_COPY_MD_NAME,
uct_gdr_copy_query_md_resources, uct_gdr_copy_md_open, NULL,
uct_gdr_copy_rkey_unpack, uct_gdr_copy_rkey_release, "CUDA_",
uct_md_config_table, uct_md_config_t);
uct_gdr_copy_rkey_unpack, uct_gdr_copy_rkey_release, "GDR_COPY_MD_",
uct_gdr_copy_md_config_table, uct_gdr_copy_md_config_t);

32 changes: 30 additions & 2 deletions src/uct/cuda/gdr_copy/gdr_copy_md.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,37 @@
#define UCT_CUDA_CONTEXT_H

#include <uct/base/uct_md.h>
#include "gdrapi.h"

#define UCT_GDR_COPY_MD_NAME "gdr_copy"

extern uct_md_component_t uct_gdr_copy_md_component;

/**
* @brief gdr_copy MD descriptor
*/
typedef struct uct_gdr_copy_md {
struct uct_md super; /**< Domain info */
gdr_t gdrcpy_ctx; /**< gdr copy context */
} uct_gdr_copy_md_t;

/**
* gdr copy domain configuration.
*/
typedef struct uct_gdr_copy_md_config {
uct_md_config_t super;
} uct_gdr_copy_md_config_t;


/**
* @brief gdr copy mem handle
*/
typedef struct uct_gdr_copy_mem {
gdr_mh_t mh;
void *bar_ptr;
size_t reg_size;
} uct_gdr_copy_mem_h;

#define UCT_CUDA_MD_NAME "gpu"

extern uct_md_component_t uct_gdr_copy_md;

#endif

0 comments on commit b2a20af

Please sign in to comment.