Skip to content

Commit

Permalink
FEAT: HMPI INITIAL COMMIT
Browse files Browse the repository at this point in the history
HyperMPI initial commit.
  • Loading branch information
nsosnsos committed Oct 26, 2020
1 parent 24d3af9 commit 64ac740
Show file tree
Hide file tree
Showing 56 changed files with 2,990 additions and 36 deletions.
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "xucg"]
path = src/ucg
url = https://github.com/kunpengcompute/xucg.git
branch = huawei
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Copyright (C) 2015 The University of Tennessee and The University
Copyright (C) 2016 ARM Ltd. All rights reserved.
Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved.
Copyright (C) 2016-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019-2020 Huawei Technologies Co.,Ltd. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Expand Down
9 changes: 8 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED.
# Copyright (C) The University of Tennessee and The University
# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
# Copyright (C) Huawei Technologies Co.,Ltd. 2020. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#
Expand Down Expand Up @@ -30,7 +31,13 @@ SUBDIRS = \
src/ucm \
src/ucs \
src/uct \
src/ucp \
src/ucp

if HAVE_UCG
SUBDIRS += src/ucg
endif

SUBDIRS += \
src/tools/info \
src/tools/perf \
src/tools/profile \
Expand Down
56 changes: 56 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,59 @@ To reference the UCX website:
* [x86_64](https://en.wikipedia.org/wiki/X86-64)
* [Power8/9](https://www.ibm.com/support/knowledgecenter/en/POWER9/p9hdx/POWER9welcome.htm)
* [Arm v8](https://www.arm.com/products/silicon-ip-cpu)

## Huawei Optimization Introduction

Based on performance consideration, UCX **DO NOT** provide the functionalities related to transmission security.

There are three optimized collective operations:

- MPI_Allreduce
- MPI_Bcast
- MPI_Barrier

New algorithms are as follows:

- Binomial tree
- Ring
- Recursive
- Topo-aware Binomial tree
- Topo-aware K-nomial tree
- Topo-aware Recursive + Binomial(intra)
- Topo-aware Recursive + K-nomial(intra)

Select specific algorithm with parameters which is showed in the table below.

Bcast:

| UCX_BUILTIN_BCAST_ALGORITHM | Algorithm |
| ---- | ---- |
| 1 | Binomial tree |
| 2 | Topo-aware Binomial tree |
| 3 | Topo-aware K-nomial tree |
| 4 | Topo-aware K-nomial tree + Binomial tree(intra) |

Allreduce:

| UCX_BUILTIN_ALLREDUCE_ALGORITHM | Algorithm |
| ---- | ---- |
| 1 | Recursive |
| 2 | Topo-aware Recursive + Binomial(intra)(Node) |
| 3 | Topo-aware Recursive + Binomial(intra)(Socket) |
| 4 | Ring |
| 5 | Topo-aware Recursive + K-nomial (intra)(Node) |
| 6 | Topo-aware Recursive + K-nomial (intra)(Socket) |
| 7 | Topo-aware K-nomial(Node) |
| 8 | Topo-aware K-nomial(Socket) |

Barrier:

| UCX_BUILTIN_BARRIER_ALGORITHM | Algorithm |
| ---- | ---- |
| 1 | Recursive |
| 2 | Topo-aware Recursive + Binomial(intra)(Node) |
| 3 | Topo-aware Recursive + Binomial(intra)(Socket) |
| 4 | Topo-aware Recursive + K-nomial (intra)(Node) |
| 5 | Topo-aware Recursive + K-nomial (intra)(Socket) |
| 6 | Topo-aware K-nomial(Node) |
| 7 | Topo-aware K-nomial(Socket) |
36 changes: 35 additions & 1 deletion autogen.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,40 @@
#!/bin/sh

usage()
{
echo "Usage: autogen.sh <options>"
echo
echo " -h|--help Show this help message"
echo " --with-ucg Fetch UCG submodule"
echo
}

with_ucg="no"

for key in "$@"
do
case $key in
-h|--help)
usage
exit 0
;;
--with-ucg)
with_ucg="yes"
;;
*)
usage
exit -2
;;
esac
done

rm -rf autom4te.cache
mkdir -p config/m4 config/aux
mkdir -p config/m4 config/aux

if [ "X$with_ucg" = "Xyes" ]
then
git submodule update --init --recursive --remote
fi

autoreconf -v --install || exit 1
rm -rf autom4te.cache
8 changes: 8 additions & 0 deletions config/m4/compiler.m4
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
# Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED.
# Copyright (C) ARM Ltd. 2016-2018. ALL RIGHTS RESERVED.
# Copyright (C) Huawei Technologies Co.,Ltd. 2020. ALL RIGHTS RESERVED.
# See file LICENSE for terms.
#

Expand Down Expand Up @@ -282,6 +283,13 @@ AS_IF([test "x$with_avx" != xyes],
])


#
# CLWB
#
#COMPILER_OPTION([clwb], [CLWB], [-mclwb], [$enable_optimizations],
# [int main() { int* a; asm volatile("clwb %0" :: "m" (a)); return 0; }])


DETECT_UARCH()

#
Expand Down
12 changes: 9 additions & 3 deletions config/m4/mpi.m4
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
#
# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
#
# Copyright (C) Huawei Technologies Co.,Ltd. 2020. ALL RIGHTS RESERVED.
# See file LICENSE for terms.
#

Expand All @@ -26,8 +28,10 @@ AS_IF([test "x$with_mpi" = xyes],
[
AC_ARG_VAR(MPICC,[MPI C compiler command])
AC_PATH_PROGS(MPICC,mpicc mpiicc,"",$mpi_path)
AC_PATH_PROGS(MPICXX,mpicxx mpiicxx,"",$mpi_path)
AC_ARG_VAR(MPIRUN,[MPI launch command])
AC_PATH_PROGS(MPIRUN,mpirun mpiexec aprun orterun,"",$mpi_path)
AC_SUBST([OMPI_COLL_UCX], ["$mpi_path/../lib/openmpi/mca_coll_ucx.la"])
AS_IF([test -z "$MPIRUN"],
AC_MSG_ERROR([--with-mpi was requested but MPI was not found in the PATH in $mpi_path]),[:])
],[:])
Expand All @@ -36,6 +40,8 @@ AS_IF([test -n "$MPICC"],
[AC_DEFINE([HAVE_MPI], [1], [MPI support])
mpi_enable=Disabled],
[mpi_enable=Enabled])
AM_CONDITIONAL([HAVE_MPI], [test -n "$MPIRUN"])
AM_CONDITIONAL([HAVE_MPICC], [test -n "$MPICC"])
AM_CONDITIONAL([HAVE_MPIRUN], [test -n "$MPIRUN"])
AM_CONDITIONAL([HAVE_MPI], [test -n "$MPIRUN"])
AM_CONDITIONAL([HAVE_MPICC], [test -n "$MPICC"])
AM_CONDITIONAL([HAVE_MPIRUN], [test -n "$MPIRUN"])
AM_CONDITIONAL([HAVE_OMPI], [test -n "$OMPI_COLL_UCX"])
AM_CONDITIONAL([HAVE_OMPI_SRC], [test ! -z "$with_ompi_src"])
26 changes: 25 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Copyright (C) The University of Tennessee and The University
# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
# Copyright (C) ARM Ltd. 2016-2019. ALL RIGHTS RESERVED.
# Copyright (C) Huawei Technologies Co.,Ltd. 2020. ALL RIGHTS RESERVED.
# See file LICENSE for terms.
#
AC_PREREQ([2.63])
Expand Down Expand Up @@ -169,6 +170,7 @@ AS_IF([test "x$with_docs_only" = xyes],
AM_CONDITIONAL([HAVE_DC_EXP], [false])
AM_CONDITIONAL([HAVE_TL_UD], [false])
AM_CONDITIONAL([HAVE_TL_CM], [false])
AM_CONDITIONAL([HAVE_HNS_ROCE], [false])
AM_CONDITIONAL([HAVE_IBV_EX_HW_TM], [false])
AM_CONDITIONAL([HAVE_CRAY_UGNI], [false])
AM_CONDITIONAL([HAVE_CUDA], [false])
Expand Down Expand Up @@ -204,6 +206,7 @@ AS_IF([test "x$with_docs_only" = xyes],
m4_include([config/m4/gdrcopy.m4])
m4_include([src/ucm/configure.m4])
m4_include([src/uct/configure.m4])
m4_include([src/ucg/configure.m4])
m4_include([src/tools/perf/configure.m4])
m4_include([test/gtest/configure.m4])
Expand Down Expand Up @@ -317,17 +320,34 @@ AS_IF([test "x$with_docs_only" = xyes],
[],
[enable_examples=no])
AM_CONDITIONAL([HAVE_EXAMPLES], [test "x$enable_examples" = "xyes"])
#
# Disable UCG - Group collective operations component
#
AC_ARG_ENABLE([ucg],
[AS_HELP_STRING([--disable-ucg],
[Disable the group collective operations component, default: NO])],
[AM_CONDITIONAL([HAVE_UCG], [false])
AC_DEFINE( [ENABLE_UCG], [0])],
[AS_IF([test "x$enable_ucg" != "xno"],
[AM_CONDITIONAL([HAVE_UCG], [true])
AC_DEFINE( [ENABLE_UCG], [1],
[Enable Groups and collective operations support (UCG)])])])
]) # Docs only

#
# Print which transports are built
# Print which transports and group topologies are built
#
build_modules="${uct_modules}"
build_modules+="${uct_ib_modules}"
build_modules+="${uct_cuda_modules}"
build_modules+="${ucm_modules}"
build_modules+="${ucx_perftest_modules}"
build_modules+="${uct_rocm_modules}"
AS_IF([test "x$enable_ucg" != "xno"],
[AC_MSG_NOTICE([Supported group modules: $ucg_modules])
build_modules+="${ucg_modules}"],[])
AC_SUBST([build_modules], [${build_modules}])

#
Expand All @@ -354,6 +374,8 @@ AC_CONFIG_FILES([
src/ucp/Makefile
src/ucp/api/ucp_version.h
src/ucp/core/ucp_version.c
src/ucg/api/ucg_version.h
src/ucg/base/ucg_version.c
src/tools/info/Makefile
src/tools/profile/Makefile
test/apps/Makefile
Expand Down Expand Up @@ -389,5 +411,7 @@ AC_MSG_NOTICE([ ROCM modules: <$(echo $uct_rocm_modules|tr ':' ' ') >])
AC_MSG_NOTICE([ IB modules: <$(echo $uct_ib_modules|tr ':' ' ') >])
AC_MSG_NOTICE([ UCM modules: <$(echo $ucm_modules|tr ':' ' ') >])
AC_MSG_NOTICE([ Perf modules: <$(echo $ucx_perftest_modules|tr ':' ' ') >])
AS_IF([test "x$enable_ucg" != "xno"],
[AC_MSG_NOTICE([ UCG modules: <$(echo $ucg_modules|tr ':' ' ') >])], [])
])
AC_MSG_NOTICE([=========================================================])
2 changes: 2 additions & 0 deletions contrib/configure-opt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/sh
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
# Copyright (C) Huawei Technologies Co.,Ltd. 2020. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#
Expand All @@ -13,5 +14,6 @@
basedir=$(cd $(dirname $0) && pwd)
$basedir/configure-release \
--with-avx \
--with-clwb \
--with-sse41 \
"$@"
1 change: 1 addition & 0 deletions contrib/upload_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ git remote show origin &>/dev/null || git remote add origin https://github.com/o
git fetch --all
git checkout -t origin/master -f
git pull
git submodule update --init --recursive --remote
cp -f ../doc/doxygen-doc/ucx.pdf ./
git commit ucx.pdf -m "update ucx.pdf for $rev"
git push
5 changes: 5 additions & 0 deletions src/tools/info/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED.
# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
# Copyright (C) Huawei Technologies Co.,Ltd. 2020. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#
Expand Down Expand Up @@ -32,6 +33,9 @@ ucx_info_SOURCES = \
tl_info.c \
type_info.c \
ucx_info.c
if HAVE_UCG
ucx_info_SOURCES += group_info.c
endif

noinst_HEADERS = \
ucx_info.h
Expand All @@ -42,5 +46,6 @@ nodist_ucx_info_SOURCES = \
ucx_info_LDADD = \
$(abs_top_builddir)/src/uct/libuct.la \
$(abs_top_builddir)/src/ucp/libucp.la \
$(abs_top_builddir)/src/ucg/libucg.la \
$(abs_top_builddir)/src/ucs/libucs.la \
$(abs_top_builddir)/src/ucm/libucm.la
37 changes: 37 additions & 0 deletions src/tools/info/group_info.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* Copyright (C) Huawei Technologies Co., Ltd. 2019. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#include "ucx_info.h"

#include <ucg/api/ucg_mpi.h>
#include <ucg/api/ucg_plan_component.h>
#include <ucg/api/ucg_mpi.h>
#include <ucs/debug/memtrack.h>

/* In accordance with @ref enum ucg_predefined */
const char *collective_names[] = {
"barrier",
"reduce",
"gather",
"bcast",
"scatter",
"allreduce",
NULL
};

#define EMPTY UCG_GROUP_MEMBER_DISTANCE_LAST

ucg_address_t *worker_address = 0;
ucs_status_t dummy_resolve_address(void *cb_group_obj,
ucg_group_member_index_t index,
ucg_address_t **addr, size_t *addr_len)
{
*addr = worker_address;
*addr_len = 0; /* special debug flow: replace uct_ep_t with member indexes */
return UCS_OK;
}

void dummy_release_address(ucg_address_t *addr) { }
3 changes: 2 additions & 1 deletion src/tools/info/proto_info.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED.
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -139,7 +140,7 @@ void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags,
print_resource_usage(&usage, "UCP context");
}

if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) {
if (!(print_opts & (PRINT_UCP_WORKER | PRINT_UCP_EP | PRINT_UCG | PRINT_UCG_TOPO))) {
goto out_cleanup_context;
}

Expand Down
2 changes: 2 additions & 0 deletions src/tools/info/sys_info.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -72,6 +73,7 @@ void print_sys_info()

printf("# Timer frequency: %.3f MHz\n", ucs_get_cpu_clocks_per_sec() / 1e6);
printf("# CPU model: %s\n", cpu_model_names[ucs_arch_get_cpu_model()]);
printf("# CPU flags: 0x%08X\n", ucs_arch_get_cpu_flag());

printf("# Memcpy bandwidth:\n");
for (size = 4096; size <= 256 * UCS_MBYTE; size *= 2) {
Expand Down
Loading

0 comments on commit 64ac740

Please sign in to comment.