From 834e858a951c6f34c8484b4de7f90db7c4cfcbec Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 12 Mar 2018 14:17:58 -0600 Subject: [PATCH] uct/ugni: fix internal locking There is no requirement in ugni to serialize on the actual device. Serialization must occur on the virtual device (CDM). This commit fixes the locking and greatly improves multi-threaded performance. Signed-off-by: Nathan Hjelm (cherry picked from commit fed3fc51263fe19a06862a5cfcc71949d14f5404) Signed-off-by: Nathan Hjelm --- src/uct/ugni/base/ugni_def.h | 21 ++++++++--------- src/uct/ugni/base/ugni_device.c | 35 +++++++++++------------------ src/uct/ugni/base/ugni_ep.c | 16 ++++++------- src/uct/ugni/base/ugni_md.c | 8 +++---- src/uct/ugni/base/ugni_types.h | 8 ++++--- src/uct/ugni/rdma/ugni_rdma_ep.c | 8 +++---- src/uct/ugni/rdma/ugni_rdma_iface.c | 8 +++---- src/uct/ugni/smsg/ugni_smsg_ep.c | 20 ++++++++--------- src/uct/ugni/smsg/ugni_smsg_iface.c | 20 ++++++++--------- src/uct/ugni/udt/ugni_udt_ep.c | 12 +++++----- src/uct/ugni/udt/ugni_udt_iface.c | 28 +++++++++++------------ src/uct/ugni/udt/ugni_udt_iface.h | 4 ++-- 12 files changed, 91 insertions(+), 97 deletions(-) diff --git a/src/uct/ugni/base/ugni_def.h b/src/uct/ugni/base/ugni_def.h index b5ecbd552c7..88701e8bf92 100644 --- a/src/uct/ugni/base/ugni_def.h +++ b/src/uct/ugni/base/ugni_def.h @@ -1,5 +1,6 @@ /** * Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. + * Copyright (c) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ @@ -45,23 +46,23 @@ do {\ #if ENABLE_MT #define uct_ugni_check_lock_needed(_cdm) UCS_THREAD_MODE_MULTI == (_cdm)->thread_mode -#define uct_ugni_device_init_lock(_dev) ucs_spinlock_init(&(_dev)->lock) -#define uct_ugni_device_destroy_lock(_dev) ucs_spinlock_destroy(&(_dev)->lock) -#define uct_ugni_device_lock(_cdm) \ +#define uct_ugni_cdm_init_lock(_cdm) ucs_spinlock_init(&(_cdm)->lock) +#define uct_ugni_cdm_destroy_lock(_cdm) ucs_spinlock_destroy(&(_cdm)->lock) +#define uct_ugni_cdm_lock(_cdm) \ if (uct_ugni_check_lock_needed(_cdm)) { \ ucs_trace_async("Taking lock"); \ - ucs_spin_lock(&(_cdm)->dev->lock); \ + ucs_spin_lock(&(_cdm)->lock); \ } -#define uct_ugni_device_unlock(_cdm) \ +#define uct_ugni_cdm_unlock(_cdm) \ if (uct_ugni_check_lock_needed(_cdm)) { \ ucs_trace_async("Releasing lock"); \ - ucs_spin_unlock(&(_cdm)->dev->lock); \ + ucs_spin_unlock(&(_cdm)->lock); \ } #else -#define uct_ugni_device_init_lock(x) UCS_OK -#define uct_ugni_device_destroy_lock(x) UCS_OK -#define uct_ugni_device_lock(x) -#define uct_ugni_device_unlock(x) +#define uct_ugni_cdm_init_lock(x) UCS_OK +#define uct_ugni_cdm_destroy_lock(x) UCS_OK +#define uct_ugni_cdm_lock(x) +#define uct_ugni_cdm_unlock(x) #define uct_ugni_check_lock_needed(x) 0 #endif diff --git a/src/uct/ugni/base/ugni_device.c b/src/uct/ugni/base/ugni_device.c index b588a201154..7795f6f8e22 100644 --- a/src/uct/ugni/base/ugni_device.c +++ b/src/uct/ugni/base/ugni_device.c @@ -1,6 +1,7 @@ /** * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * Copyright (c) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ @@ -370,22 +371,11 @@ ucs_status_t uct_ugni_device_create(int dev_id, int index, uct_ugni_device_t *de ucs_snprintf_zero(dev_p->fname, sizeof(dev_p->fname), "%s:%d", dev_p->type_name, index); - status = uct_ugni_device_init_lock(dev_p); - if (UCS_OK != status) { - ucs_error("Couldn't initalize device lock."); - return status; - } return UCS_OK; } void uct_ugni_device_destroy(uct_ugni_device_t *dev) { - ucs_status_t status; - - status = uct_ugni_device_destroy_lock(dev); - if (UCS_OK != status) { - ucs_error("Couldn't destroy device lock."); - } } ucs_status_t uct_ugni_iface_get_dev_address(uct_iface_t *tl_iface, uct_device_addr_t *addr) @@ -413,7 +403,6 @@ ucs_status_t uct_ugni_create_cdm(uct_ugni_cdm_t *cdm, uct_ugni_device_t *device, cdm->thread_mode = thread_mode; cdm->dev = device; - uct_ugni_device_lock(cdm); cdm->domain_id = job_info->pmi_rank_id + job_info->pmi_num_of_ranks * ucs_atomic_fadd32(&ugni_domain_counter,1); ucs_debug("Creating new command domain with id %d (%d + %d * %d)", cdm->domain_id, job_info->pmi_rank_id, @@ -425,8 +414,7 @@ ucs_status_t uct_ugni_create_cdm(uct_ugni_cdm_t *cdm, uct_ugni_device_t *device, if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); - status = UCS_ERR_NO_DEVICE; - goto out_unlock; + return UCS_ERR_NO_DEVICE; } ugni_rc = GNI_CdmAttach(cdm->cdm_handle, device->device_id, @@ -440,8 +428,11 @@ ucs_status_t uct_ugni_create_cdm(uct_ugni_cdm_t *cdm, uct_ugni_device_t *device, status = UCS_ERR_NO_DEVICE; } -out_unlock: - uct_ugni_device_unlock(cdm); + status = uct_ugni_cdm_init_lock(cdm); + if (UCS_OK != status) { + ucs_error("Couldn't initalize CDM lock."); + } + if (UCS_OK == status) { ucs_debug("Made ugni cdm. nic_addr = %i domain_id = %i", device->address, cdm->domain_id); } @@ -456,11 +447,15 @@ ucs_status_t uct_ugni_create_md_cdm(uct_ugni_cdm_t *cdm) ucs_status_t uct_ugni_destroy_cdm(uct_ugni_cdm_t *cdm) { gni_return_t ugni_rc; + ucs_status_t status; + + status = uct_ugni_cdm_destroy_lock(cdm); + if (UCS_OK != status) { + ucs_error("Couldn't destroy cdm lock."); + } ucs_trace_func("cdm=%p", cdm); - uct_ugni_device_lock(cdm); ugni_rc = GNI_CdmDestroy(cdm->cdm_handle); - uct_ugni_device_unlock(cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmDestroy error status: %s (%d)", gni_err_str[ugni_rc], ugni_rc); @@ -473,11 +468,9 @@ ucs_status_t uct_ugni_create_cq(gni_cq_handle_t *cq, unsigned cq_size, uct_ugni_ { gni_return_t ugni_rc; - uct_ugni_device_lock(cdm); ugni_rc = GNI_CqCreate(cdm->nic_handle, UCT_UGNI_LOCAL_CQ, 0, GNI_CQ_NOBLOCK, NULL, NULL, cq); - uct_ugni_device_unlock(cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CqCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); @@ -491,9 +484,7 @@ ucs_status_t uct_ugni_destroy_cq(gni_cq_handle_t cq, uct_ugni_cdm_t *cdm) { gni_return_t ugni_rc; - uct_ugni_device_lock(cdm); ugni_rc = GNI_CqDestroy(cq); - uct_ugni_device_unlock(cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_warn("GNI_CqDestroy failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); diff --git a/src/uct/ugni/base/ugni_ep.c b/src/uct/ugni/base/ugni_ep.c index 38f190796b5..51d588af404 100644 --- a/src/uct/ugni/base/ugni_ep.c +++ b/src/uct/ugni/base/ugni_ep.c @@ -162,13 +162,13 @@ ucs_status_t ugni_connect_ep(uct_ugni_iface_t *iface, uct_ugni_ep_t *ep){ gni_return_t ugni_rc; - uct_ugni_device_lock(&iface->cdm); + uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpBind(ep->ep, dev_addr->nic_addr, iface_addr->domain_id); - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { - uct_ugni_device_lock(&iface->cdm); + uct_ugni_cdm_lock(&iface->cdm); (void)GNI_EpDestroy(ep->ep); - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); ucs_error("GNI_EpBind failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return UCS_ERR_UNREACHABLE; @@ -200,9 +200,9 @@ UCS_CLASS_INIT_FUNC(uct_ugni_ep_t, uct_iface_t *tl_iface, self->flush_group->flush_comp.func = NULL; self->flush_group->parent = NULL; #endif - uct_ugni_device_lock(&iface->cdm); + uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpCreate(uct_ugni_iface_nic_handle(iface), iface->local_cq, &self->ep); - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_CdmCreate failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); @@ -235,9 +235,9 @@ static UCS_CLASS_CLEANUP_FUNC(uct_ugni_ep_t) ucs_arbiter_group_purge(&iface->arbiter, &self->arb_group, uct_ugni_ep_abriter_purge_cb, NULL); - uct_ugni_device_lock(&iface->cdm); + uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_EpDestroy(self->ep); - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_warn("GNI_EpDestroy failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); diff --git a/src/uct/ugni/base/ugni_md.c b/src/uct/ugni/base/ugni_md.c index e0b477cc45c..085c8841b81 100644 --- a/src/uct/ugni/base/ugni_md.c +++ b/src/uct/ugni/base/ugni_md.c @@ -64,12 +64,12 @@ static ucs_status_t uct_ugni_mem_reg(uct_md_h md, void *address, size_t length, goto mem_err; } - uct_ugni_device_lock(&ugni_md->cdm); + uct_ugni_cdm_lock(&ugni_md->cdm); ugni_rc = GNI_MemRegister(ugni_md->cdm.nic_handle, (uint64_t)address, length, NULL, GNI_MEM_READWRITE | GNI_MEM_RELAXED_PI_ORDERING, -1, mem_hndl); - uct_ugni_device_unlock(&ugni_md->cdm); + uct_ugni_cdm_unlock(&ugni_md->cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_MemRegister failed (addr %p, size %zu), Error status: %s %d", address, length, gni_err_str[ugni_rc], ugni_rc); @@ -94,9 +94,9 @@ static ucs_status_t uct_ugni_mem_dereg(uct_md_h md, uct_mem_h memh) gni_return_t ugni_rc; ucs_status_t status = UCS_OK; - uct_ugni_device_lock(&ugni_md->cdm); + uct_ugni_cdm_lock(&ugni_md->cdm); ugni_rc = GNI_MemDeregister(ugni_md->cdm.nic_handle, mem_hndl); - uct_ugni_device_unlock(&ugni_md->cdm); + uct_ugni_cdm_unlock(&ugni_md->cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_MemDeregister failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); diff --git a/src/uct/ugni/base/ugni_types.h b/src/uct/ugni/base/ugni_types.h index 8bfdc5aaf81..5c2f7ec0e55 100644 --- a/src/uct/ugni/base/ugni_types.h +++ b/src/uct/ugni/base/ugni_types.h @@ -1,5 +1,6 @@ /** * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (c) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. * See file LICENSE for terms. */ @@ -21,9 +22,6 @@ typedef struct uct_ugni_device { uint32_t cpu_id; /**< CPU attached directly to the device */ cpu_set_t cpu_mask; /**< CPU mask */ -#if ENABLE_MT - ucs_spinlock_t lock; /**< Device lock */ -#endif /* TBD - reference counter */ } uct_ugni_device_t; @@ -34,6 +32,10 @@ typedef struct uct_ugni_cdm { ucs_thread_mode_t thread_mode; uint32_t address; uint16_t domain_id; + +#if ENABLE_MT + ucs_spinlock_t lock; /**< Device lock */ +#endif } uct_ugni_cdm_t; /** diff --git a/src/uct/ugni/rdma/ugni_rdma_ep.c b/src/uct/ugni/rdma/ugni_rdma_ep.c index 69371e13fef..63994f8e5a7 100644 --- a/src/uct/ugni/rdma/ugni_rdma_ep.c +++ b/src/uct/ugni/rdma/ugni_rdma_ep.c @@ -94,9 +94,9 @@ static inline ucs_status_t uct_ugni_post_rdma(uct_ugni_rdma_iface_t *iface, ucs_mpool_put(rdma); return UCS_ERR_NO_RESOURCE; } - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_PostRdma(ep->ep, &rdma->desc); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_mpool_put(rdma); if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { @@ -127,9 +127,9 @@ static inline ssize_t uct_ugni_post_fma(uct_ugni_rdma_iface_t *iface, ucs_mpool_put(fma); return UCS_ERR_NO_RESOURCE; } - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_PostFma(ep->ep, &fma->desc); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_mpool_put(fma); if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { diff --git a/src/uct/ugni/rdma/ugni_rdma_iface.c b/src/uct/ugni/rdma/ugni_rdma_iface.c index f375b4875e6..a07bdce51d1 100644 --- a/src/uct/ugni/rdma/ugni_rdma_iface.c +++ b/src/uct/ugni/rdma/ugni_rdma_iface.c @@ -114,21 +114,21 @@ unsigned uct_ugni_progress(void *arg) unsigned count = 0; while (1) { - uct_ugni_device_lock(&iface->cdm); + uct_ugni_cdm_lock(&iface->cdm); ugni_rc = GNI_CqGetEvent(iface->local_cq, &event_data); if (GNI_RC_NOT_DONE == ugni_rc) { - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); break; } if ((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)) { - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); ucs_error("GNI_CqGetEvent falied. Error status %s %d ", gni_err_str[ugni_rc], ugni_rc); return count; } ugni_rc = GNI_GetCompleted(iface->local_cq, event_data, &event_post_desc_ptr); - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); if (GNI_RC_SUCCESS != ugni_rc && GNI_RC_TRANSACTION_ERROR != ugni_rc) { ucs_error("GNI_GetCompleted falied. Error status %s %d", gni_err_str[ugni_rc], ugni_rc); diff --git a/src/uct/ugni/smsg/ugni_smsg_ep.c b/src/uct/ugni/smsg/ugni_smsg_ep.c index 4950fbeaad1..393263bd600 100644 --- a/src/uct/ugni/smsg/ugni_smsg_ep.c +++ b/src/uct/ugni/smsg/ugni_smsg_ep.c @@ -52,12 +52,12 @@ static ucs_status_t uct_ugni_smsg_mbox_reg(uct_ugni_smsg_iface_t *iface, uct_ugn return UCS_ERR_INVALID_PARAM; } - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_MemRegister(uct_ugni_iface_nic_handle(&iface->super), (uint64_t)address, iface->bytes_per_mbox, iface->remote_cq, GNI_MEM_READWRITE, -1, &(mbox->gni_mem)); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_MemRegister failed (addr %p, size %zu), Error status: %s %d", address, iface->bytes_per_mbox, gni_err_str[ugni_rc], ugni_rc); @@ -72,9 +72,9 @@ static ucs_status_t uct_ugni_smsg_mbox_reg(uct_ugni_smsg_iface_t *iface, uct_ugn static ucs_status_t uct_ugni_smsg_mbox_dereg(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_mbox_t *mbox){ gni_return_t ugni_rc; - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_MemDeregister(uct_ugni_iface_nic_handle(&iface->super), &mbox->gni_mem); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_MemDeregister failed Error status: %s %d", @@ -162,9 +162,9 @@ ucs_status_t uct_ugni_smsg_ep_connect_to_ep(uct_ep_h tl_ep, ucs_error("Could not connect ep in smsg"); return rc; } - uct_ugni_device_lock(&iface->cdm); + uct_ugni_cdm_lock(&iface->cdm); gni_rc = GNI_SmsgInit(ep->super.ep, local_attr, &remote_attr); - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); if(GNI_RC_SUCCESS != gni_rc){ ucs_error("Failed to initalize smsg. %s [%i]", gni_err_str[gni_rc], gni_rc); @@ -176,9 +176,9 @@ ucs_status_t uct_ugni_smsg_ep_connect_to_ep(uct_ep_h tl_ep, } ep_hash = (uint32_t)iface_addr->ep_hash; - uct_ugni_device_lock(&iface->cdm); + uct_ugni_cdm_lock(&iface->cdm); gni_rc = GNI_EpSetEventData(ep->super.ep, iface->cdm.domain_id, ep_hash); - uct_ugni_device_unlock(&iface->cdm); + uct_ugni_cdm_unlock(&iface->cdm); if(GNI_RC_SUCCESS != gni_rc){ ucs_error("Could not set GNI_EpSetEventData!"); @@ -199,10 +199,10 @@ uct_ugni_smsg_ep_am_common_send(uct_ugni_smsg_ep_t *ep, uct_ugni_smsg_iface_t *i desc->msg_id = iface->smsg_id++; desc->flush_group = ep->super.flush_group; - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); gni_rc = GNI_SmsgSendWTag(ep->super.ep, header, header_length, payload, payload_length, desc->msg_id, am_id); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if(GNI_RC_SUCCESS != gni_rc){ goto exit_no_res; } diff --git a/src/uct/ugni/smsg/ugni_smsg_iface.c b/src/uct/ugni/smsg/ugni_smsg_iface.c index 2e31606b6fc..777ddb1abc1 100644 --- a/src/uct/ugni/smsg/ugni_smsg_iface.c +++ b/src/uct/ugni/smsg/ugni_smsg_iface.c @@ -32,9 +32,9 @@ static ucs_status_t progress_local_cq(uct_ugni_smsg_iface_t *iface){ uct_ugni_smsg_desc_t message_data; uct_ugni_smsg_desc_t *message_pointer; - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_CqGetEvent(iface->super.local_cq, &event_data); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if(GNI_RC_NOT_DONE == ugni_rc){ return UCS_OK; } @@ -69,9 +69,9 @@ static void process_mbox(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_ep_t *ep){ } while(1){ tag = GNI_SMSG_ANY_TAG; - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_SmsgGetNextWTag(ep->super.ep, (void **)&data_ptr, &tag); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); /* Yes, GNI_RC_NOT_DONE means that you're done with the smsg mailbox */ if(GNI_RC_NOT_DONE == ugni_rc){ break; @@ -92,9 +92,9 @@ static void process_mbox(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_ep_t *ep){ uct_iface_invoke_am(&iface->super.super, tag, user_data, header->length, 0); - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_SmsgRelease(ep->super.ep); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if(GNI_RC_SUCCESS != ugni_rc){ ucs_error("Unhandled smsg error in GNI_SmsgRelease: %s %d", gni_err_str[ugni_rc], ugni_rc); break; @@ -111,11 +111,11 @@ static void uct_ugni_smsg_handle_remote_overflow(uct_ugni_smsg_iface_t *iface){ uct_ugni_smsg_ep_t *ep; /* We don't know which EP dropped a completion entry, so flush everything */ - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); do{ ugni_rc = GNI_CqGetEvent(iface->remote_cq, &event_data); } while(GNI_RC_NOT_DONE != ugni_rc); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); current_ep = sglib_hashed_uct_ugni_ep_t_it_init(&ep_iterator, iface->super.eps); while(NULL != current_ep){ @@ -133,9 +133,9 @@ ucs_status_t progress_remote_cq(uct_ugni_smsg_iface_t *iface) uct_ugni_ep_t *ugni_ep; uct_ugni_smsg_ep_t *ep; - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_CqGetEvent(iface->remote_cq, &event_data); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if(GNI_RC_NOT_DONE == ugni_rc){ return UCS_OK; } diff --git a/src/uct/ugni/udt/ugni_udt_ep.c b/src/uct/ugni/udt/ugni_udt_ep.c index 72523deeee1..aa2ffe2740e 100644 --- a/src/uct/ugni/udt/ugni_udt_ep.c +++ b/src/uct/ugni/udt/ugni_udt_ep.c @@ -83,9 +83,9 @@ static UCS_CLASS_CLEANUP_FUNC(uct_ugni_udt_ep_t) if (self->posted_desc) { ucs_debug("Cleaning outstanding request"); - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataCancelById(self->super.ep, self->super.hash_key); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if (GNI_RC_SUCCESS != ugni_rc) { if (GNI_RC_NO_MATCH == ugni_rc) { /* We raced with the async thread, it recieved and cleaned up this reply. It's fine. */ @@ -95,9 +95,9 @@ static UCS_CLASS_CLEANUP_FUNC(uct_ugni_udt_ep_t) gni_err_str[ugni_rc], ugni_rc); return; } - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataWaitById(self->super.ep, self->super.hash_key, 100, &post_state, &rem_addr, &rem_id); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_warn("GNI_EpPostDataWaitById failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); @@ -176,12 +176,12 @@ uct_ugni_udt_ep_am_common_send(const unsigned is_short, uct_ugni_udt_ep_t *ep, u ucs_assertv(msg_length <= GNI_DATAGRAM_MAXSIZE, "msg_length=%u", msg_length); - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataWId(ep->super.ep, sheader, msg_length, rheader, (uint16_t)iface->config.udt_seg_size, ep->super.hash_key); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); UCT_UGNI_UDT_CHECK_RC(ugni_rc, desc); diff --git a/src/uct/ugni/udt/ugni_udt_iface.c b/src/uct/ugni/udt/ugni_udt_iface.c index 788d14271b6..1b3935b7e32 100644 --- a/src/uct/ugni/udt/ugni_udt_iface.c +++ b/src/uct/ugni/udt/ugni_udt_iface.c @@ -64,9 +64,9 @@ static ucs_status_t recieve_datagram(uct_ugni_udt_iface_t *iface, uint64_t id, u } *ep_out = ep; - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataWaitById(gni_ep, id, -1, &post_state, &rem_addr, &rem_id); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { ucs_error("GNI_EpPostDataWaitById, id=%lu Error status: %s %d", id, gni_err_str[ugni_rc], ugni_rc); @@ -203,9 +203,9 @@ void uct_ugni_proccess_datagram_pipe(int event_id, void *arg) { ucs_trace_func(""); - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_PostDataProbeById(uct_ugni_udt_iface_nic_handle(iface), &id); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); while (GNI_RC_SUCCESS == ugni_rc) { status = recieve_datagram(iface, id, &ep); if (UCS_INPROGRESS == status) { @@ -239,9 +239,9 @@ void uct_ugni_proccess_datagram_pipe(int event_id, void *arg) { } } } - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_PostDataProbeById(uct_ugni_udt_iface_nic_handle(iface), &id); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); } ucs_async_pipe_drain(&iface->event_pipe); @@ -258,10 +258,10 @@ static void uct_ugni_udt_clean_wildcard(uct_ugni_udt_iface_t *iface) gni_return_t ugni_rc; uint32_t rem_addr, rem_id; gni_post_state_t post_state; - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataCancelById(iface->ep_any, UCT_UGNI_UDT_ANY); if (GNI_RC_SUCCESS != ugni_rc) { - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); ucs_error("GNI_EpPostDataCancel failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return; @@ -269,7 +269,7 @@ static void uct_ugni_udt_clean_wildcard(uct_ugni_udt_iface_t *iface) ugni_rc = GNI_EpPostDataTestById(iface->ep_any, UCT_UGNI_UDT_ANY, &post_state, &rem_addr, &rem_id); if (GNI_RC_SUCCESS != ugni_rc) { if (GNI_RC_NO_MATCH != ugni_rc) { - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); ucs_error("GNI_EpPostDataTestById failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return; @@ -284,7 +284,7 @@ static void uct_ugni_udt_clean_wildcard(uct_ugni_udt_iface_t *iface) ucs_error("GNI_EpDestroy failed, Error status: %s %d\n", gni_err_str[ugni_rc], ugni_rc); } - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); } /* Before this function is called, you MUST @@ -297,10 +297,10 @@ static inline void uct_ugni_udt_terminate_thread(uct_ugni_udt_iface_t *iface) gni_return_t ugni_rc; gni_ep_handle_t ep; - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(iface), iface->super.local_cq, &ep); if (GNI_RC_SUCCESS != ugni_rc) { - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); ucs_error("GNI_EpCreate, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return; @@ -308,7 +308,7 @@ static inline void uct_ugni_udt_terminate_thread(uct_ugni_udt_iface_t *iface) ugni_rc = GNI_EpBind(ep, iface->super.cdm.dev->address, iface->super.cdm.domain_id); if (GNI_RC_SUCCESS != ugni_rc) { GNI_EpDestroy(ep); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); ucs_error("GNI_EpBind failed, Error status: %s %d", gni_err_str[ugni_rc], ugni_rc); return; @@ -323,7 +323,7 @@ static inline void uct_ugni_udt_terminate_thread(uct_ugni_udt_iface_t *iface) } /* When the gni_ep is destroyed the above post will be canceled */ ugni_rc = GNI_EpDestroy(ep); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_EpDestroy failed, Error status: %s %d\n", gni_err_str[ugni_rc], ugni_rc); diff --git a/src/uct/ugni/udt/ugni_udt_iface.h b/src/uct/ugni/udt/ugni_udt_iface.h index 4bb53254cd5..ca3283b5731 100644 --- a/src/uct/ugni/udt/ugni_udt_iface.h +++ b/src/uct/ugni/udt/ugni_udt_iface.h @@ -93,14 +93,14 @@ static inline int uct_ugni_udt_ep_any_post(uct_ugni_udt_iface_t *iface) gni_return_t ugni_rc; uct_ugni_udt_reset_desc(iface->desc_any, iface); - uct_ugni_device_lock(&iface->super.cdm); + uct_ugni_cdm_lock(&iface->super.cdm); ugni_rc = GNI_EpPostDataWId(iface->ep_any, uct_ugni_udt_get_sheader(iface->desc_any, iface), iface->config.udt_seg_size, uct_ugni_udt_get_rheader(iface->desc_any, iface), iface->config.udt_seg_size, UCT_UGNI_UDT_ANY); - uct_ugni_device_unlock(&iface->super.cdm); + uct_ugni_cdm_unlock(&iface->super.cdm); UCT_UGNI_UDT_CHECK_RC(ugni_rc, iface->desc_any); return UCS_OK; }