diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 264c1763f42..71dac2fa061 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -26,6 +26,7 @@ * Copyright (c) 2021 Nanook Consulting. All rights reserved. * Copyright (c) 2018-2022 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -215,6 +216,11 @@ int ompi_comm_set_nb (ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm, if (NULL == newcomm) { return OMPI_ERR_OUT_OF_RESOURCE; } + newcomm->c_name = (char*) malloc (OPAL_MAX_OBJECT_NAME); + if (NULL == newcomm->c_name) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + newcomm->c_name[0] = '\0'; newcomm->super.s_info = NULL; /* fill in the inscribing hyper-cube dimensions */ newcomm->c_cube_dim = opal_cube_dim(local_size); diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 393d7ce164f..59aa0f38422 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -25,6 +25,7 @@ * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018-2022 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -169,8 +170,7 @@ int ompi_comm_init(void) (void)opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, 0, &ompi_mpi_comm_null); (void)opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, 1, &ompi_mpi_comm_null); - opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL", - sizeof(ompi_mpi_comm_null.comm.c_name)); + ompi_mpi_comm_null.comm.c_name = strdup ("MPI_COMM_NULL"); ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | OMPI_COMM_GLOBAL_INDEX; @@ -221,8 +221,7 @@ int ompi_comm_init_mpi3 (void) OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm); opal_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world); - opal_string_copy(ompi_mpi_comm_world.comm.c_name, "MPI_COMM_WORLD", - sizeof(ompi_mpi_comm_world.comm.c_name)); + ompi_mpi_comm_world.comm.c_name = strdup("MPI_COMM_WORLD"); ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | OMPI_COMM_GLOBAL_INDEX; ompi_mpi_comm_world.comm.instance = group->grp_instance; @@ -280,8 +279,7 @@ int ompi_comm_init_mpi3 (void) OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self.comm); opal_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self); - opal_string_copy(ompi_mpi_comm_self.comm.c_name, "MPI_COMM_SELF", - sizeof(ompi_mpi_comm_self.comm.c_name)); + ompi_mpi_comm_self.comm.c_name = strdup("MPI_COMM_SELF"); ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | OMPI_COMM_GLOBAL_INDEX; ompi_mpi_comm_self.comm.instance = group->grp_instance; @@ -412,7 +410,7 @@ static int ompi_comm_finalize (void) static void ompi_comm_construct(ompi_communicator_t* comm) { int idx; - comm->c_name[0] = '\0'; + comm->c_name = NULL; comm->c_index = MPI_UNDEFINED; comm->c_flags = 0; comm->c_my_rank = 0; @@ -444,7 +442,7 @@ static void ompi_comm_construct(ompi_communicator_t* comm) this communicator */ comm->c_keyhash = NULL; - comm->errhandler_type = OMPI_ERRHANDLER_TYPE_COMM; + comm->error_handler = &ompi_mpi_errors_are_fatal.eh; #ifdef OMPI_WANT_PERUSE comm->c_peruse_handles = NULL; #endif @@ -520,6 +518,11 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) comm->error_handler = NULL; } + if (NULL != comm->c_name) { + free (comm->c_name); + comm->c_name = NULL; + } + #if OPAL_ENABLE_FT_MPI if( NULL != comm->agreement_specific ) { OBJ_RELEASE( comm->agreement_specific ); diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index fd9ef4aab8b..79e8d5f0588 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -24,6 +24,7 @@ * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018-2022 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -261,7 +262,7 @@ struct ompi_communicator_t { opal_infosubscriber_t super; opal_mutex_t c_lock; /* mutex for name and potentially attributes */ - char c_name[MPI_MAX_OBJECT_NAME]; + char *c_name; ompi_comm_extended_cid_t c_contextid; ompi_comm_extended_cid_block_t c_contextidb; uint32_t c_index; @@ -269,12 +270,22 @@ struct ompi_communicator_t { uint32_t c_flags; /* flags, e.g. intercomm, topology, etc. */ uint32_t c_assertions; /* info assertions */ - int c_id_available; /* the currently available Cid for allocation - to a child*/ - int c_id_start_index; /* the starting index of the block of cids - allocated to this communicator*/ +#if OPAL_ENABLE_FT_MPI uint32_t c_epoch; /* Identifier used to differentiate between two communicators using the same c_contextid (not at the same time, obviously) */ +#endif + /* Non-blocking collective tag. These tags might be shared between + * all non-blocking collective modules (to avoid message collision + * between them in the case where multiple outstanding non-blocking + * collective coexists using multiple backends). + */ + opal_atomic_int32_t c_nbc_tag; + + /**< inscribing cube dimension */ + int c_cube_dim; + + /* index in Fortran <-> C translation array */ + int c_f_to_c_index; ompi_group_t *c_local_group; ompi_group_t *c_remote_group; @@ -287,16 +298,10 @@ struct ompi_communicator_t { /* Attributes */ struct opal_hash_table_t *c_keyhash; - /**< inscribing cube dimension */ - int c_cube_dim; - /* Standard information about the selected topology module (or NULL if this is not a cart, graph or dist graph communicator) */ struct mca_topo_base_module_t* c_topo; - /* index in Fortran <-> C translation array */ - int c_f_to_c_index; - #ifdef OMPI_WANT_PERUSE /* * Place holder for the PERUSE events. @@ -307,9 +312,7 @@ struct ompi_communicator_t { /* Error handling. This field does not have the "c_" prefix so that the OMPI_ERRHDL_* macros can find it, regardless of whether it's a comm, window, or file. */ - ompi_errhandler_t *error_handler; - ompi_errhandler_type_t errhandler_type; /* Hooks for PML to hang things */ struct mca_pml_comm_t *c_pml_comm; @@ -320,21 +323,14 @@ struct ompi_communicator_t { /* Collectives module interface and data */ mca_coll_base_comm_coll_t *c_coll; - /* Non-blocking collective tag. These tags might be shared between - * all non-blocking collective modules (to avoid message collision - * between them in the case where multiple outstanding non-blocking - * collective coexists using multiple backends). - */ - opal_atomic_int32_t c_nbc_tag; - /* instance that this comm belongs to */ ompi_instance_t* instance; #if OPAL_ENABLE_FT_MPI - /** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */ - int any_source_offset; /** agreement caching info for topology and previous returned decisions */ opal_object_t *agreement_specific; + /** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */ + int any_source_offset; /** Are MPI_ANY_SOURCE operations enabled? - OMPI_Comm_failure_ack */ bool any_source_enabled; /** Has this communicator been revoked - OMPI_Comm_revoke() */ @@ -437,7 +433,7 @@ typedef struct ompi_communicator_t ompi_communicator_t; * the PREDEFINED_COMMUNICATOR_PAD macro? * A: Most likely not, but it would be good to check. */ -#define PREDEFINED_COMMUNICATOR_PAD 1024 +#define PREDEFINED_COMMUNICATOR_PAD 512 struct ompi_predefined_communicator_t { struct ompi_communicator_t comm; diff --git a/ompi/debuggers/predefined_gap_test.c b/ompi/debuggers/predefined_gap_test.c index 0129eb63a23..4ad207988a1 100644 --- a/ompi/debuggers/predefined_gap_test.c +++ b/ompi/debuggers/predefined_gap_test.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2009 Sun Microsystems, Inc All rights reserved. * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 The University of Tennessee and The University + * Copyright (c) 2012-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. @@ -59,8 +59,6 @@ int main(int argc, char **argv) { GAP_CHECK("c_contextid", test_comm, c_contextid, c_name, 1); GAP_CHECK("c_my_rank", test_comm, c_my_rank, c_contextid, 1); GAP_CHECK("c_flags", test_comm, c_flags, c_my_rank, 1); - GAP_CHECK("c_id_available", test_comm, c_id_available, c_flags, 1); - GAP_CHECK("c_id_start_index", test_comm, c_id_start_index, c_id_available, 1); GAP_CHECK("c_remote_group", test_comm, c_remote_group, c_local_group, 1); GAP_CHECK("c_local_comm", test_comm, c_local_comm, c_remote_group, 1); GAP_CHECK("c_keyhash", test_comm, c_keyhash, c_local_comm, 1); @@ -73,8 +71,6 @@ int main(int argc, char **argv) { #else GAP_CHECK("error_handler", test_comm, error_handler, c_f_to_c_index, 1); #endif - GAP_CHECK("errhandler_type", test_comm, errhandler_type, error_handler, 1); - GAP_CHECK("c_pml_comm", test_comm, c_pml_comm, errhandler_type, 1); GAP_CHECK("c_coll", test_comm, c_coll, c_pml_comm, 1); /* Test Predefined group sizes */ @@ -129,8 +125,6 @@ int main(int argc, char **argv) { GAP_CHECK("w_keyhash", test_win, w_keyhash, w_flags, 1); GAP_CHECK("w_f_to_c_index", test_win, w_f_to_c_index, w_keyhash, 1); GAP_CHECK("error_handler", test_win, error_handler, w_f_to_c_index, 1); - GAP_CHECK("errhandler_type", test_win, errhandler_type, error_handler, 1); - GAP_CHECK("w_osc_module", test_win, w_osc_module, errhandler_type, 1); /* Test Predefined info sizes */ printf("=============================================\n"); @@ -151,8 +145,6 @@ int main(int argc, char **argv) { GAP_CHECK("f_flags", test_file, f_flags, f_amode, 1); GAP_CHECK("f_f_to_c_index", test_file, f_f_to_c_index, f_flags, 1); GAP_CHECK("error_handler", test_file, error_handler, f_f_to_c_index, 1); - GAP_CHECK("errhandler_type", test_file, errhandler_type, error_handler, 1); - GAP_CHECK("f_io_version", test_file, f_io_version, errhandler_type, 1); GAP_CHECK("f_io_selected_component", test_file, f_io_selected_component, f_io_version, 1); GAP_CHECK("f_io_selected_module", test_file, f_io_selected_module, f_io_selected_component, 1); GAP_CHECK("f_io_selected_data", test_file, f_io_selected_data, f_io_selected_module, 1); diff --git a/ompi/errhandler/errhandler.c b/ompi/errhandler/errhandler.c index 41c426905fc..bcdd2f345c6 100644 --- a/ompi/errhandler/errhandler.c +++ b/ompi/errhandler/errhandler.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2022 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -388,10 +388,10 @@ int ompi_errhandler_proc_failed_internal(ompi_proc_t* ompi_proc, int status, boo OMPI_NAME_PRINT(&ompi_proc->super.proc_name), ompi_comm_print_cid(comm), proc_rank, - (OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->errhandler_type ? "P" : - (OMPI_ERRHANDLER_TYPE_COMM == comm->errhandler_type ? "C" : - (OMPI_ERRHANDLER_TYPE_WIN == comm->errhandler_type ? "W" : - (OMPI_ERRHANDLER_TYPE_FILE == comm->errhandler_type ? "F" : "U") ) ) ) + (OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->error_handler->eh_mpi_object_type ? "P" : + (OMPI_ERRHANDLER_TYPE_COMM == comm->error_handler->eh_mpi_object_type ? "C" : + (OMPI_ERRHANDLER_TYPE_WIN == comm->error_handler->eh_mpi_object_type ? "W" : + (OMPI_ERRHANDLER_TYPE_FILE == comm->error_handler->eh_mpi_object_type ? "F" : "U") ) ) ) )); } diff --git a/ompi/errhandler/errhandler.h b/ompi/errhandler/errhandler.h index 5b35ca0df1d..519a84d9961 100644 --- a/ompi/errhandler/errhandler.h +++ b/ompi/errhandler/errhandler.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2022 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -238,7 +238,7 @@ extern opal_atomic_int32_t ompi_instance_count; #define OMPI_ERRHANDLER_INVOKE(mpi_object, err_code, message) \ ompi_errhandler_invoke((mpi_object)->error_handler, \ (mpi_object), \ - (int)(mpi_object)->errhandler_type, \ + (int)(mpi_object)->error_handler->eh_mpi_object_type, \ ompi_errcode_get_mpi_code(err_code), \ (message)); @@ -269,7 +269,7 @@ extern opal_atomic_int32_t ompi_instance_count; int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \ ompi_errhandler_invoke((mpi_object)->error_handler, \ (mpi_object), \ - (int) (mpi_object)->errhandler_type, \ + (int) (mpi_object)->error_handler->eh_mpi_object_type, \ (__mpi_err_code), \ (message)); \ return (__mpi_err_code); \ @@ -307,7 +307,7 @@ extern opal_atomic_int32_t ompi_instance_count; int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \ ompi_errhandler_invoke((mpi_object)->error_handler, \ (mpi_object), \ - (int)(mpi_object)->errhandler_type, \ + (int)(mpi_object)->error_handler->eh_mpi_object_type, \ (__mpi_err_code), \ (message)); \ return (__mpi_err_code); \ diff --git a/ompi/errhandler/errhandler_invoke.c b/ompi/errhandler/errhandler_invoke.c index 3cc7e2009fb..33526911765 100644 --- a/ompi/errhandler/errhandler_invoke.c +++ b/ompi/errhandler/errhandler_invoke.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2020 The University of Tennessee and The University + * Copyright (c) 2004-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -183,19 +183,19 @@ int ompi_errhandler_request_invoke(int count, case OMPI_REQUEST_COLL: return ompi_errhandler_invoke(mpi_object.comm->error_handler, mpi_object.comm, - mpi_object.comm->errhandler_type, + mpi_object.comm->error_handler->eh_mpi_object_type, ec, message); break; case OMPI_REQUEST_IO: return ompi_errhandler_invoke(mpi_object.file->error_handler, mpi_object.file, - mpi_object.file->errhandler_type, + mpi_object.file->error_handler->eh_mpi_object_type, ec, message); break; case OMPI_REQUEST_WIN: return ompi_errhandler_invoke(mpi_object.win->error_handler, mpi_object.win, - mpi_object.win->errhandler_type, + mpi_object.win->error_handler->eh_mpi_object_type, ec, message); break; default: diff --git a/ompi/include/ompi/memchecker.h b/ompi/include/ompi/memchecker.h index f6237ef9678..15b7111877f 100644 --- a/ompi/include/ompi/memchecker.h +++ b/ompi/include/ompi/memchecker.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. - * Copyright (c) 2010-2017 The University of Tennessee and The University + * Copyright (c) 2010-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -220,8 +220,6 @@ static inline int memchecker_comm(MPI_Comm comm) opal_memchecker_base_isdefined (&comm->c_name, MPI_MAX_OBJECT_NAME); opal_memchecker_base_isdefined (&comm->c_my_rank, sizeof(int)); opal_memchecker_base_isdefined (&comm->c_flags, sizeof(uint32_t)); - opal_memchecker_base_isdefined (&comm->c_id_available, sizeof(int)); - opal_memchecker_base_isdefined (&comm->c_id_start_index, sizeof(int)); opal_memchecker_base_isdefined (&comm->c_local_group, sizeof(ompi_group_t *)); opal_memchecker_base_isdefined (&comm->c_remote_group, sizeof(ompi_group_t *)); opal_memchecker_base_isdefined (&comm->c_keyhash, sizeof(struct opal_hash_table_t *));