-
Notifications
You must be signed in to change notification settings - Fork 859
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Mellanox collective communication library (MCCL) integration layer
- Loading branch information
Valentin Petrov
committed
Feb 17, 2020
1 parent
1275766
commit 6247e31
Showing
10 changed files
with
915 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
dnl -*- shell-script -*- | ||
dnl | ||
dnl Copyright (c) 2011 Mellanox Technologies. All rights reserved. | ||
dnl Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. | ||
dnl Copyright (c) 2015 Research Organization for Information Science | ||
dnl and Technology (RIST). All rights reserved. | ||
dnl $COPYRIGHT$ | ||
dnl | ||
dnl Additional copyrights may follow | ||
dnl | ||
dnl $HEADER$ | ||
dnl | ||
|
||
# OMPI_CHECK_MCCL(prefix, [action-if-found], [action-if-not-found]) | ||
# -------------------------------------------------------- | ||
# check if mccl support can be found. sets prefix_{CPPFLAGS, | ||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is | ||
# support, otherwise executes action-if-not-found | ||
AC_DEFUN([OMPI_CHECK_MCCL],[ | ||
OPAL_VAR_SCOPE_PUSH([ompi_check_mccl_dir ompi_check_mccl_libs ompi_check_mccl_happy CPPFLAGS_save LDFLAGS_save LIBS_save]) | ||
|
||
AC_ARG_WITH([mccl], | ||
[AC_HELP_STRING([--with-mccl(=DIR)], | ||
[Build mccl (Unified Communication Hierarchical collectives) support, optionally adding | ||
DIR/include and DIR/lib or DIR/lib64 to the search path for headers and libraries])]) | ||
|
||
AS_IF([test "$with_mccl" != "no"], | ||
[ompi_check_mccl_libs=mccl | ||
AS_IF([test ! -z "$with_mccl" && test "$with_mccl" != "yes"], | ||
[ompi_check_mccl_dir=$with_mccl]) | ||
|
||
CPPFLAGS_save=$CPPFLAGS | ||
LDFLAGS_save=$LDFLAGS | ||
LIBS_save=$LIBS | ||
|
||
OPAL_LOG_MSG([$1_CPPFLAGS : $$1_CPPFLAGS], 1) | ||
OPAL_LOG_MSG([$1_LDFLAGS : $$1_LDFLAGS], 1) | ||
OPAL_LOG_MSG([$1_LIBS : $$1_LIBS], 1) | ||
|
||
OPAL_CHECK_PACKAGE([$1], | ||
[api/mccl.h], | ||
[$ompi_check_mccl_libs], | ||
[mccl_init_context], | ||
[], | ||
[$ompi_check_mccl_dir], | ||
[], | ||
[ompi_check_mccl_happy="yes"], | ||
[ompi_check_mccl_happy="no"]) | ||
|
||
AS_IF([test "$ompi_check_mccl_happy" = "yes"], | ||
[ | ||
CPPFLAGS=$coll_mccl_CPPFLAGS | ||
LDFLAGS=$coll_mccl_LDFLAGS | ||
LIBS=$coll_mccl_LIBS | ||
AC_CHECK_FUNCS(mccl_comm_free, [], []) | ||
], | ||
[]) | ||
|
||
CPPFLAGS=$CPPFLAGS_save | ||
LDFLAGS=$LDFLAGS_save | ||
LIBS=$LIBS_save], | ||
[ompi_check_mccl_happy=no]) | ||
|
||
AS_IF([test "$ompi_check_mccl_happy" = "yes" && test "$enable_progress_threads" = "yes"], | ||
[AC_MSG_WARN([mccl driver does not currently support progress threads. Disabling MCCL.]) | ||
ompi_check_mccl_happy="no"]) | ||
|
||
AS_IF([test "$ompi_check_mccl_happy" = "yes"], | ||
[$2], | ||
[AS_IF([test ! -z "$with_mccl" && test "$with_mccl" != "no"], | ||
[AC_MSG_ERROR([MCCL support requested but not found. Aborting])]) | ||
$3]) | ||
|
||
OPAL_VAR_SCOPE_POP | ||
]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# -*- shell-script -*- | ||
# | ||
# | ||
# Copyright (c) 2020 Mellanox Technologies. All rights reserved. | ||
# $COPYRIGHT$ | ||
# | ||
# Additional copyrights may follow | ||
# | ||
# $HEADER$ | ||
# | ||
# | ||
|
||
AM_CPPFLAGS = $(coll_mccl_CPPFLAGS) | ||
|
||
coll_mccl_sources = \ | ||
coll_mccl.h \ | ||
coll_mccl_debug.h \ | ||
coll_mccl_dtypes.h \ | ||
coll_mccl_module.c \ | ||
coll_mccl_component.c \ | ||
coll_mccl_ops.c | ||
|
||
# Make the output library in this directory, and name it either | ||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la | ||
# (for static builds). | ||
|
||
if MCA_BUILD_ompi_coll_mccl_DSO | ||
component_noinst = | ||
component_install = mca_coll_mccl.la | ||
else | ||
component_noinst = libmca_coll_mccl.la | ||
component_install = | ||
endif | ||
|
||
mcacomponentdir = $(ompilibdir) | ||
mcacomponent_LTLIBRARIES = $(component_install) | ||
mca_coll_mccl_la_SOURCES = $(coll_mccl_sources) | ||
mca_coll_mccl_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ | ||
$(coll_mccl_LIBS) | ||
mca_coll_mccl_la_LDFLAGS = -module -avoid-version $(coll_mccl_LDFLAGS) | ||
|
||
noinst_LTLIBRARIES = $(component_noinst) | ||
libmca_coll_mccl_la_SOURCES = $(coll_mccl_sources) | ||
libmca_coll_mccl_la_LIBADD = $(coll_mccl_LIBS) | ||
libmca_coll_mccl_la_LDFLAGS = -module -avoid-version $(coll_mccl_LDFLAGS) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
/** | ||
Copyright (c) 2020 Mellanox Technologies. All rights reserved. | ||
$COPYRIGHT$ | ||
Additional copyrights may follow | ||
$HEADER$ | ||
*/ | ||
|
||
#ifndef MCA_COLL_MCCL_H | ||
#define MCA_COLL_FcaMCCL_H | ||
|
||
#include "ompi_config.h" | ||
|
||
#include "mpi.h" | ||
#include "ompi/mca/mca.h" | ||
#include "opal/memoryhooks/memory.h" | ||
#include "opal/mca/memory/base/base.h" | ||
#include "ompi/mca/coll/coll.h" | ||
#include "ompi/request/request.h" | ||
#include "ompi/mca/pml/pml.h" | ||
#include "ompi/mca/coll/base/coll_tags.h" | ||
#include "ompi/communicator/communicator.h" | ||
#include "ompi/attribute/attribute.h" | ||
#include "ompi/op/op.h" | ||
|
||
#include "orte/runtime/orte_globals.h" | ||
|
||
#include "api/mccl.h" | ||
|
||
#include "coll_mccl_debug.h" | ||
#ifndef MCCL_VERSION | ||
#define MCCL_VERSION(major, minor) (((major)<<MCCL_MAJOR_BIT)|((minor)<<MCCL_MINOR_BIT)) | ||
#endif | ||
BEGIN_C_DECLS | ||
|
||
struct mca_coll_mccl_component_t { | ||
/** Base coll component */ | ||
mca_coll_base_component_2_0_0_t super; | ||
|
||
/** MCA parameter: Priority of this component */ | ||
int mccl_priority; | ||
|
||
/** MCA parameter: Verbose level of this component */ | ||
int mccl_verbose; | ||
|
||
/** MCA parameter: Enable FCA */ | ||
int mccl_enable; | ||
|
||
/** r/o MCA parameter: libmccl compiletime version */ | ||
char* compiletime_version; | ||
|
||
/** r/o MCA parameter: libmccl runtime version */ | ||
const char* runtime_version; | ||
|
||
/** MCA parameter: Minimal number of processes in the communicator | ||
for the corresponding mccl context to be created */ | ||
int mccl_np; | ||
|
||
/** Whether or not mccl_init was ever called */ | ||
bool libmccl_initialized; | ||
mccl_context_h mccl_context; | ||
opal_free_list_t requests; | ||
}; | ||
typedef struct mca_coll_mccl_component_t mca_coll_mccl_component_t; | ||
|
||
OMPI_MODULE_DECLSPEC extern mca_coll_mccl_component_t mca_coll_mccl_component; | ||
|
||
/** | ||
* FCA enabled communicator | ||
*/ | ||
struct mca_coll_mccl_module_t { | ||
mca_coll_base_module_t super; | ||
ompi_communicator_t* comm; | ||
int rank; | ||
mccl_comm_h mccl_comm; | ||
mca_coll_base_module_allreduce_fn_t previous_allreduce; | ||
mca_coll_base_module_t* previous_allreduce_module; | ||
mca_coll_base_module_barrier_fn_t previous_barrier; | ||
mca_coll_base_module_t* previous_barrier_module; | ||
mca_coll_base_module_bcast_fn_t previous_bcast; | ||
mca_coll_base_module_t* previous_bcast_module; | ||
}; | ||
typedef struct mca_coll_mccl_module_t mca_coll_mccl_module_t; | ||
OBJ_CLASS_DECLARATION(mca_coll_mccl_module_t); | ||
|
||
|
||
|
||
int mca_coll_mccl_init_query(bool enable_progress_threads, bool enable_mpi_threads); | ||
mca_coll_base_module_t *mca_coll_mccl_comm_query(struct ompi_communicator_t *comm, int *priority); | ||
|
||
|
||
int mca_coll_mccl_allreduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, | ||
struct ompi_op_t *op, struct ompi_communicator_t *comm, | ||
mca_coll_base_module_t *module); | ||
int mca_coll_mccl_barrier(struct ompi_communicator_t *comm, | ||
mca_coll_base_module_t *module); | ||
int mca_coll_mccl_bcast(void *buf, int count, struct ompi_datatype_t *dtype, | ||
int root, struct ompi_communicator_t *comm, | ||
mca_coll_base_module_t *module); | ||
|
||
END_C_DECLS | ||
#endif |
Oops, something went wrong.