Skip to content

Commit

Permalink
smcuda: fixes when using enable-mca-dso
Browse files Browse the repository at this point in the history
related to open-mpi#11354

Signed-off-by: Howard Pritchard <howardp@lanl.gov>
  • Loading branch information
hppritcha authored and Yicheng Li committed Jun 20, 2023
1 parent ba85e65 commit a279541
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 16 deletions.
14 changes: 13 additions & 1 deletion opal/mca/btl/smcuda/btl_smcuda_accelerator.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/*
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2022 IBM Corporation. All rights reserved.
* Copyright (c) 2023 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -35,6 +37,8 @@ static int accelerator_event_max = 400;
static int accelerator_event_ipc_most = 0;
static bool smcuda_accelerator_initialized = false;

static void mca_btl_smcuda_accelerator_fini(void);

int mca_btl_smcuda_accelerator_init(void)
{
int rc = OPAL_SUCCESS;
Expand Down Expand Up @@ -79,6 +83,14 @@ int mca_btl_smcuda_accelerator_init(void)
goto cleanup_and_error;
}

/*
* add smcuda acclerator fini code to opal's list of cleanup functions.
* Cleanups are called before all the MCA frameworks are closed, so by
* adding this function to the callback list, we avoid issues with ordering
* of the closing of the BTL framework with the accelerator framework, etc. etc.
*/
opal_finalize_register_cleanup(mca_btl_smcuda_accelerator_fini);

smcuda_accelerator_initialized = true;

cleanup_and_error:
Expand All @@ -103,7 +115,7 @@ int mca_btl_smcuda_accelerator_init(void)
return rc;
}

void mca_btl_smcuda_accelerator_fini(void)
static void mca_btl_smcuda_accelerator_fini(void)
{
int i;

Expand Down
3 changes: 2 additions & 1 deletion opal/mca/btl/smcuda/btl_smcuda_accelerator.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/*
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights
* reserved.
* Copyright (c) 2023 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -17,7 +19,6 @@
#include "opal/mca/btl/btl.h"

OPAL_DECLSPEC int mca_btl_smcuda_accelerator_init(void);
OPAL_DECLSPEC void mca_btl_smcuda_accelerator_fini(void);
OPAL_DECLSPEC int mca_btl_smcuda_progress_one_ipc_event(struct mca_btl_base_descriptor_t **frag);
OPAL_DECLSPEC int mca_btl_smcuda_memcpy(void *dst, void *src, size_t amount, char *msg,
struct mca_btl_base_descriptor_t *frag);
Expand Down
33 changes: 19 additions & 14 deletions opal/mca/btl/smcuda/btl_smcuda_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2018-2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2022 IBM Corporation. All rights reserved.
* Copyright (c) 2023 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -69,6 +71,8 @@ static int smcuda_register(void);
static mca_btl_base_module_t **
mca_btl_smcuda_component_init(int *num_btls, bool enable_progress_threads, bool enable_mpi_threads);

static void mca_btl_smcuda_component_fini(void);

typedef enum {
MCA_BTL_SM_RNDV_MOD_SM = 0,
MCA_BTL_SM_RNDV_MOD_MPOOL
Expand Down Expand Up @@ -214,7 +218,7 @@ static int smcuda_register(void)
if (0 == mca_btl_smcuda.super.btl_accelerator_eager_limit) {
mca_btl_smcuda.super.btl_accelerator_eager_limit = SIZE_MAX; /* magic number */
}
#endif
#endif /* OPAL_CUDA_SUPPORT */
return mca_btl_smcuda_component_verify();
}

Expand Down Expand Up @@ -260,16 +264,24 @@ static int mca_btl_smcuda_component_open(void)
OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_max, opal_free_list_t);
OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_user, opal_free_list_t);
OBJ_CONSTRUCT(&mca_btl_smcuda_component.pending_send_fl, opal_free_list_t);

opal_finalize_register_cleanup(mca_btl_smcuda_component_fini);

return OPAL_SUCCESS;
}

static int mca_btl_smcuda_component_close(void)
{
return OPAL_SUCCESS;
}

/*
* component cleanup - sanity checking of queue lengths
*/

static int mca_btl_smcuda_component_close(void)
static void mca_btl_smcuda_component_fini(void)
{
int return_value = OPAL_SUCCESS;
int rc;

OBJ_DESTRUCT(&mca_btl_smcuda_component.sm_lock);
/**
Expand All @@ -282,11 +294,10 @@ static int mca_btl_smcuda_component_close(void)

/* unmap the shared memory control structure */
if (mca_btl_smcuda_component.sm_seg != NULL) {
return_value = mca_common_sm_fini(mca_btl_smcuda_component.sm_seg);
if (OPAL_SUCCESS != return_value) {
return_value = OPAL_ERROR;
rc = mca_common_sm_fini(mca_btl_smcuda_component.sm_seg);
if (OPAL_SUCCESS != rc) {
opal_output(0, " mca_common_sm_fini failed\n");
goto CLEANUP;
return;
}

/* unlink file, so that it will be deleted when all references
Expand All @@ -310,13 +321,7 @@ static int mca_btl_smcuda_component_close(void)
unlink(mca_btl_smcuda_component.sm_fifo_path);
}
#endif

CLEANUP:

mca_btl_smcuda_accelerator_fini();

/* return */
return return_value;
return;
}

/*
Expand Down

0 comments on commit a279541

Please sign in to comment.