Skip to content

Commit

Permalink
UCT/CUDA: create cuda streams on first use
Browse files Browse the repository at this point in the history
  • Loading branch information
bureddy committed Feb 13, 2018
1 parent 3aef6b6 commit 94a632c
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 22 deletions.
22 changes: 18 additions & 4 deletions src/uct/cuda/cuda_copy/cuda_copy_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,13 @@ uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src, size_t
return UCS_ERR_NO_MEMORY;
}

status = CUDA_FUNC(cudaMemcpyAsync(dst, src, length, direction, stream));
status = UCT_CUDA_FUNC(cudaMemcpyAsync(dst, src, length, direction, stream));
if (UCS_OK != status) {
ucs_error("cudaMemcpyAsync Failed ");
return UCS_ERR_IO_ERROR;
}

status = CUDA_FUNC(cudaEventRecord(cuda_event->event, stream));
status = UCT_CUDA_FUNC(cudaEventRecord(cuda_event->event, stream));
if (UCS_OK != status) {
ucs_error("cudaEventRecord Failed ");
return UCS_ERR_IO_ERROR;
}
ucs_queue_push(outstanding_queue, &cuda_event->queue);
Expand All @@ -81,6 +79,14 @@ ucs_status_t uct_cuda_copy_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, si
uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t);
ucs_status_t status;

if (iface->stream_d2h == 0) {
status = UCT_CUDA_FUNC(cudaStreamCreateWithFlags(&iface->stream_d2h,
cudaStreamNonBlocking));
if (UCS_OK != status) {
return UCS_ERR_IO_ERROR;
}
}

status = uct_cuda_copy_post_cuda_async_copy(tl_ep, iov[0].buffer, (void *)remote_addr,
iov[0].length, cudaMemcpyDeviceToHost,
iface->stream_d2h,
Expand All @@ -101,6 +107,14 @@ ucs_status_t uct_cuda_copy_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, si
uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t);
ucs_status_t status;

if (iface->stream_h2d == 0) {
status = UCT_CUDA_FUNC(cudaStreamCreateWithFlags(&iface->stream_h2d,
cudaStreamNonBlocking));
if (UCS_OK != status) {
return UCS_ERR_IO_ERROR;
}
}

status = uct_cuda_copy_post_cuda_async_copy(tl_ep, (void *)remote_addr, iov[0].buffer,
iov[0].length, cudaMemcpyHostToDevice,
iface->stream_h2d,
Expand Down
29 changes: 15 additions & 14 deletions src/uct/cuda/cuda_copy/cuda_copy_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ static void uct_cuda_copy_event_desc_init(ucs_mpool_t *mp, void *obj, void *chun
ucs_status_t status;

memset(base, 0 , sizeof(*base));
status = CUDA_FUNC(cudaEventCreateWithFlags(&(base->event), cudaEventDisableTiming));
status = UCT_CUDA_FUNC(cudaEventCreateWithFlags(&(base->event),
cudaEventDisableTiming));
if (UCS_OK != status) {
ucs_error("cudaEventCreateWithFlags Failed");
}
Expand All @@ -188,7 +189,7 @@ static void uct_cuda_copy_event_desc_cleanup(ucs_mpool_t *mp, void *obj)
uct_cuda_copy_event_desc_t *base = (uct_cuda_copy_event_desc_t *) obj;
ucs_status_t status;

status = CUDA_FUNC(cudaEventDestroy(base->event));
status = UCT_CUDA_FUNC(cudaEventDestroy(base->event));
if (UCS_OK != status) {
ucs_error("cudaEventDestroy Failed");
}
Expand All @@ -215,7 +216,7 @@ static UCS_CLASS_INIT_FUNC(uct_cuda_copy_iface_t, uct_md_h md, uct_worker_h work

if (strncmp(params->mode.device.dev_name,
UCT_CUDA_DEV_NAME, strlen(UCT_CUDA_DEV_NAME)) != 0) {
ucs_error("No device was found: %s", params->mode.device.dev_name);
ucs_error("no device was found: %s", params->mode.device.dev_name);
return UCS_ERR_NO_DEVICE;
}

Expand All @@ -232,20 +233,12 @@ static UCS_CLASS_INIT_FUNC(uct_cuda_copy_iface_t, uct_md_h md, uct_worker_h work
"CUDA EVENT objects");

if (UCS_OK != status) {
ucs_error("Mpool creation failed");
ucs_error("mpool creation failed");
return UCS_ERR_IO_ERROR;
}

status = CUDA_FUNC(cudaStreamCreateWithFlags(&self->stream_d2h, cudaStreamNonBlocking));
if (UCS_OK != status) {
ucs_error("cudaStreamCreateWithFlags creation failed");
return UCS_ERR_IO_ERROR;
}
status = CUDA_FUNC(cudaStreamCreateWithFlags(&self->stream_h2d, cudaStreamNonBlocking));
if (UCS_OK != status) {
ucs_error("cudaStreamCreateWithFlags creation failed");
return UCS_ERR_IO_ERROR;
}
self->stream_d2h = 0;
self->stream_h2d = 0;

ucs_queue_head_init(&self->outstanding_d2h_cuda_event_q);
ucs_queue_head_init(&self->outstanding_h2d_cuda_event_q);
Expand All @@ -257,6 +250,14 @@ static UCS_CLASS_CLEANUP_FUNC(uct_cuda_copy_iface_t)
{
uct_base_iface_progress_disable(&self->super.super,
UCT_PROGRESS_SEND | UCT_PROGRESS_RECV);
if (self->stream_h2d != 0) {
UCT_CUDA_FUNC(cudaStreamDestroy(self->stream_h2d));
}

if (self->stream_d2h != 0) {
UCT_CUDA_FUNC(cudaStreamDestroy(self->stream_d2h));
}

ucs_mpool_cleanup(&self->cuda_event_desc, 1);
}

Expand Down
9 changes: 5 additions & 4 deletions src/uct/cuda/cuda_copy/cuda_copy_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@

#include <uct/base/uct_iface.h>
#include <ucs/arch/cpu.h>
#include <ucs/sys/preprocessor.h>
#include <cuda_runtime.h>
#include <cuda.h>


#define UCT_CUDA_COPY_TL_NAME "cuda_copy"
#define UCT_CUDA_DEV_NAME "cudacopy0"

#define CUDA_FUNC(func) ({ \
#define UCT_CUDA_FUNC(_func) ({ \
ucs_status_t _status = UCS_OK; \
do { \
cudaError_t _result = (func); \
cudaError_t _result = (_func); \
if (cudaSuccess != _result) { \
ucs_error("[%s:%d] cuda failed with %d \n", \
__FILE__, __LINE__,_result); \
ucs_error("%s failed with %d \n", \
UCS_PP_MAKE_STRING(_func), _result); \
_status = UCS_ERR_IO_ERROR; \
} \
} while (0); \
Expand Down

0 comments on commit 94a632c

Please sign in to comment.