From 50906b34b311811e771d4678a42236e3ef62ab91 Mon Sep 17 00:00:00 2001 From: Igor Ivanov Date: Tue, 15 Mar 2016 18:21:39 +0200 Subject: [PATCH] oshmem: Align OSHMEM API with spec v1.3 (Add scoll/alltoall interface) --- oshmem/include/shmem.h.in | 38 +++++++++++++++++++++ oshmem/mca/scoll/base/scoll_base_frame.c | 1 + oshmem/mca/scoll/base/scoll_base_select.c | 28 ++++++++++++--- oshmem/mca/scoll/basic/Makefile.am | 5 +-- oshmem/mca/scoll/basic/scoll_basic.h | 9 ++++- oshmem/mca/scoll/basic/scoll_basic_module.c | 5 +-- oshmem/mca/scoll/fca/scoll_fca.h | 2 ++ oshmem/mca/scoll/fca/scoll_fca_module.c | 4 +++ oshmem/mca/scoll/mpi/scoll_mpi.h | 2 ++ oshmem/mca/scoll/mpi/scoll_mpi_module.c | 4 +++ oshmem/mca/scoll/scoll.h | 17 ++++++--- 11 files changed, 102 insertions(+), 13 deletions(-) diff --git a/oshmem/include/shmem.h.in b/oshmem/include/shmem.h.in index 3d25058bb58..6be14ddb4e6 100644 --- a/oshmem/include/shmem.h.in +++ b/oshmem/include/shmem.h.in @@ -101,6 +101,7 @@ enum shmem_wait_ops { #define _SHMEM_BCAST_SYNC_SIZE (1 + _SHMEM_BARRIER_SYNC_SIZE) #define _SHMEM_COLLECT_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE) #define _SHMEM_REDUCE_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE) +#define _SHMEM_ALLTOALL_SYNC_SIZE (1) #define _SHMEM_REDUCE_MIN_WRKDATA_SIZE (1) #define _SHMEM_SYNC_VALUE (-1) @@ -108,6 +109,7 @@ enum shmem_wait_ops { #define SHMEM_BCAST_SYNC_SIZE _SHMEM_BCAST_SYNC_SIZE #define SHMEM_COLLECT_SYNC_SIZE _SHMEM_COLLECT_SYNC_SIZE #define SHMEM_REDUCE_SYNC_SIZE _SHMEM_REDUCE_SYNC_SIZE +#define SHMEM_ALLTOALL_SYNC_SIZE _SHMEM_ALLTOALL_SYNC_SIZE #define SHMEM_REDUCE_MIN_WRKDATA_SIZE _SHMEM_REDUCE_MIN_WRKDATA_SIZE #define SHMEM_SYNC_VALUE _SHMEM_SYNC_VALUE @@ -191,6 +193,24 @@ OSHMEM_DECLSPEC void shmem_iput32(void* target, const void* source, ptrdiff_t ts OSHMEM_DECLSPEC void shmem_iput64(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_iput128(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +/* + * Nonblocking put routines + */ +OSHMEM_DECLSPEC void shmem_putmem_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_char_put_nbi(char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_short_put_nbi(short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int_put_nbi(int* target, const int* source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_long_put_nbi(long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_longlong_put_nbi(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_float_put_nbi(float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_double_put_nbi(double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_longdouble_put_nbi(long double *target, const long double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_put8_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_put16_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_put32_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_put64_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_put128_nbi(void *target, const void *source, size_t len, int pe); + /* * Elemental get routines */ @@ -232,6 +252,24 @@ OSHMEM_DECLSPEC void shmem_iget32(void* target, const void* source, ptrdiff_t ts OSHMEM_DECLSPEC void shmem_iget64(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); OSHMEM_DECLSPEC void shmem_iget128(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); +/* + * Nonblocking data get routines + */ +OSHMEM_DECLSPEC void shmem_getmem_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_char_get_nbi(char *target, const char *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_short_get_nbi(short *target, const short *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_int_get_nbi(int *target, const int *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_long_get_nbi(long *target, const long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_longlong_get_nbi(long long *target, const long long *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_float_get_nbi(float *target, const float *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_double_get_nbi(double *target, const double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_longdouble_get_nbi(long double *target, const long double *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_get8_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_get16_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_get32_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_get64_nbi(void *target, const void *source, size_t len, int pe); +OSHMEM_DECLSPEC void shmem_get128_nbi(void *target, const void *source, size_t len, int pe); + /* * Atomic operations */ diff --git a/oshmem/mca/scoll/base/scoll_base_frame.c b/oshmem/mca/scoll/base/scoll_base_frame.c index 3da96b5ee33..e8db9b35c35 100644 --- a/oshmem/mca/scoll/base/scoll_base_frame.c +++ b/oshmem/mca/scoll/base/scoll_base_frame.c @@ -46,6 +46,7 @@ static void scoll_base_module_construct(mca_scoll_base_module_t *m) m->scoll_broadcast = NULL; m->scoll_collect = NULL; m->scoll_reduce = NULL; + m->scoll_alltoall = NULL; m->scoll_module_enable = NULL; } diff --git a/oshmem/mca/scoll/base/scoll_base_select.c b/oshmem/mca/scoll/base/scoll_base_select.c index b44383696de..d25d24a1bc6 100644 --- a/oshmem/mca/scoll/base/scoll_base_select.c +++ b/oshmem/mca/scoll/base/scoll_base_select.c @@ -118,6 +118,22 @@ static int scoll_null_reduce(struct oshmem_group_t *group, return OSHMEM_SUCCESS; } +static int scoll_null_alltoall(struct oshmem_group_t *group, + void *target, + const void *source, + ptrdiff_t dst, ptrdiff_t sst, + size_t nlong, + long *pSync, + int alg) +{ + if (oshmem_proc_group_is_member(group)) { + SCOLL_ERROR("internal error"); + oshmem_shmem_abort(-1); + return OSHMEM_ERROR; + } + return OSHMEM_SUCCESS; +} + /* * Stuff for the OBJ interface */ @@ -160,6 +176,7 @@ int mca_scoll_base_group_unselect(struct oshmem_group_t * group) CLOSE(group, broadcast); CLOSE(group, collect); CLOSE(group, reduce); + CLOSE(group, alltoall); /* All done */ return OSHMEM_SUCCESS; @@ -184,6 +201,7 @@ int mca_scoll_base_select(struct oshmem_group_t *group) group->g_scoll.scoll_broadcast = scoll_null_broadcast; group->g_scoll.scoll_collect = scoll_null_collect; group->g_scoll.scoll_reduce = scoll_null_reduce; + group->g_scoll.scoll_alltoall = scoll_null_alltoall; return OSHMEM_SUCCESS; } SCOLL_VERBOSE(10, @@ -206,10 +224,11 @@ int mca_scoll_base_select(struct oshmem_group_t *group) if (OSHMEM_SUCCESS != ret) { mca_scoll_base_group_unselect(group); } else { + COPY(avail->ac_module, group, barrier); COPY(avail->ac_module, group, broadcast); COPY(avail->ac_module, group, collect); COPY(avail->ac_module, group, reduce); - COPY(avail->ac_module, group, barrier); + COPY(avail->ac_module, group, alltoall); } OBJ_RELEASE(avail->ac_module); OBJ_RELEASE(avail); @@ -220,7 +239,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group) if ((NULL == group->g_scoll.scoll_barrier) || (NULL == group->g_scoll.scoll_broadcast) || (NULL == group->g_scoll.scoll_collect) - || (NULL == group->g_scoll.scoll_reduce)) { + || (NULL == group->g_scoll.scoll_reduce) + || (NULL == group->g_scoll.scoll_alltoall)) { mca_scoll_base_group_unselect(group); return OSHMEM_ERR_NOT_FOUND; } @@ -228,8 +248,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group) return OSHMEM_SUCCESS; } -static int avail_coll_compare (opal_list_item_t **a, - opal_list_item_t **b) +static int avail_coll_compare(opal_list_item_t **a, + opal_list_item_t **b) { avail_com_t *acom = (avail_com_t *) *a; avail_com_t *bcom = (avail_com_t *) *b; diff --git a/oshmem/mca/scoll/basic/Makefile.am b/oshmem/mca/scoll/basic/Makefile.am index a9a39fb2864..689b563c80a 100644 --- a/oshmem/mca/scoll/basic/Makefile.am +++ b/oshmem/mca/scoll/basic/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2013 Mellanox Technologies, Inc. +# Copyright (c) 2013-2016 Mellanox Technologies, Inc. # All rights reserved. # $COPYRIGHT$ # @@ -15,7 +15,8 @@ sources = \ scoll_basic_barrier.c \ scoll_basic_broadcast.c \ scoll_basic_collect.c \ - scoll_basic_reduce.c + scoll_basic_reduce.c \ + scoll_basic_alltoall.c # Make the output library in this directory, and name it either diff --git a/oshmem/mca/scoll/basic/scoll_basic.h b/oshmem/mca/scoll/basic/scoll_basic.h index 4a0b4a4a9d2..a60eda80468 100644 --- a/oshmem/mca/scoll/basic/scoll_basic.h +++ b/oshmem/mca/scoll/basic/scoll_basic.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Mellanox Technologies, Inc. + * Copyright (c) 2013-2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -77,6 +77,13 @@ int mca_scoll_basic_reduce(struct oshmem_group_t *group, long *pSync, void *pWrk, int alg); +int mca_scoll_basic_alltoall(struct oshmem_group_t *group, + void *target, + const void *source, + ptrdiff_t dst, ptrdiff_t sst, + size_t nlong, + long *pSync, + int alg); static inline unsigned int scoll_log2(unsigned long val) { diff --git a/oshmem/mca/scoll/basic/scoll_basic_module.c b/oshmem/mca/scoll/basic/scoll_basic_module.c index 6e4e83ece21..0d1274c7d8e 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_module.c +++ b/oshmem/mca/scoll/basic/scoll_basic_module.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Mellanox Technologies, Inc. + * Copyright (c) 2013-2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ * @@ -52,9 +52,10 @@ mca_scoll_basic_query(struct oshmem_group_t *group, int *priority) module->super.scoll_broadcast = mca_scoll_basic_broadcast; module->super.scoll_collect = mca_scoll_basic_collect; module->super.scoll_reduce = mca_scoll_basic_reduce; + module->super.scoll_alltoall = mca_scoll_basic_alltoall; module->super.scoll_module_enable = mca_scoll_basic_enable; return &(module->super); } - return NULL ; + return NULL; } diff --git a/oshmem/mca/scoll/fca/scoll_fca.h b/oshmem/mca/scoll/fca/scoll_fca.h index 267dbdc4a27..38215ec8684 100644 --- a/oshmem/mca/scoll/fca/scoll_fca.h +++ b/oshmem/mca/scoll/fca/scoll_fca.h @@ -93,6 +93,8 @@ struct mca_scoll_fca_module_t { mca_scoll_base_module_t *previous_collect_module; mca_scoll_base_module_reduce_fn_t previous_reduce; mca_scoll_base_module_t *previous_reduce_module; + mca_scoll_base_module_alltoall_fn_t previous_alltoall; + mca_scoll_base_module_t *previous_alltoall_module; }; typedef struct mca_scoll_fca_module_t mca_scoll_fca_module_t; OBJ_CLASS_DECLARATION(mca_scoll_fca_module_t); diff --git a/oshmem/mca/scoll/fca/scoll_fca_module.c b/oshmem/mca/scoll/fca/scoll_fca_module.c index 2c6aab8212a..131391667cd 100644 --- a/oshmem/mca/scoll/fca/scoll_fca_module.c +++ b/oshmem/mca/scoll/fca/scoll_fca_module.c @@ -391,6 +391,7 @@ static int _save_coll_handlers(mca_scoll_fca_module_t *fca_module) FCA_SAVE_PREV_SCOLL_API(broadcast); FCA_SAVE_PREV_SCOLL_API(collect); FCA_SAVE_PREV_SCOLL_API(reduce); + FCA_SAVE_PREV_SCOLL_API(alltoall); return OSHMEM_SUCCESS; } @@ -450,6 +451,7 @@ static void mca_scoll_fca_module_clear(mca_scoll_fca_module_t *fca_module) fca_module->previous_broadcast = NULL; fca_module->previous_collect = NULL; fca_module->previous_reduce = NULL; + fca_module->previous_alltoall = NULL; } static void mca_scoll_fca_module_construct(mca_scoll_fca_module_t *fca_module) @@ -465,6 +467,7 @@ static void mca_scoll_fca_module_destruct(mca_scoll_fca_module_t *fca_module) OBJ_RELEASE(fca_module->previous_broadcast_module); OBJ_RELEASE(fca_module->previous_collect_module); OBJ_RELEASE(fca_module->previous_reduce_module); + OBJ_RELEASE(fca_module->previous_alltoall_module); if (fca_module->fca_comm) _destroy_fca_comm(fca_module); free(fca_module->local_ranks); @@ -541,6 +544,7 @@ mca_scoll_fca_comm_query(struct oshmem_group_t *comm, int *priority) fca_module->super.scoll_broadcast = mca_scoll_fca_component.fca_enable_bcast ? mca_scoll_fca_broadcast : NULL; + fca_module->super.scoll_alltoall = NULL; *priority = mca_scoll_fca_component.fca_priority; module = &fca_module->super; diff --git a/oshmem/mca/scoll/mpi/scoll_mpi.h b/oshmem/mca/scoll/mpi/scoll_mpi.h index 9938e27c15e..4c30f8193b4 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi.h +++ b/oshmem/mca/scoll/mpi/scoll_mpi.h @@ -69,6 +69,8 @@ struct mca_scoll_mpi_module_t { mca_scoll_base_module_t *previous_barrier_module; mca_scoll_base_module_collect_fn_t previous_collect; mca_scoll_base_module_t *previous_collect_module; + mca_scoll_base_module_alltoall_fn_t previous_alltoall; + mca_scoll_base_module_t *previous_alltoall_module; }; typedef struct mca_scoll_mpi_module_t mca_scoll_mpi_module_t; diff --git a/oshmem/mca/scoll/mpi/scoll_mpi_module.c b/oshmem/mca/scoll/mpi/scoll_mpi_module.c index a0ef2d45702..adc1b4a826f 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi_module.c +++ b/oshmem/mca/scoll/mpi/scoll_mpi_module.c @@ -29,6 +29,7 @@ static void mca_scoll_mpi_module_clear(mca_scoll_mpi_module_t *mpi_module) mpi_module->previous_broadcast = NULL; mpi_module->previous_reduce = NULL; mpi_module->previous_collect = NULL; + mpi_module->previous_alltoall = NULL; } static void mca_scoll_mpi_module_construct(mca_scoll_mpi_module_t *mpi_module) @@ -43,6 +44,7 @@ static void mca_scoll_mpi_module_destruct(mca_scoll_mpi_module_t *mpi_module) OBJ_RELEASE(mpi_module->previous_broadcast_module); OBJ_RELEASE(mpi_module->previous_reduce_module); OBJ_RELEASE(mpi_module->previous_collect_module); + OBJ_RELEASE(mpi_module->previous_alltoall_module); mca_scoll_mpi_module_clear(mpi_module); /* Free ompi_comm */ @@ -68,6 +70,7 @@ static int mca_scoll_mpi_save_coll_handlers(mca_scoll_base_module_t *module, osh MPI_SAVE_PREV_SCOLL_API(broadcast); MPI_SAVE_PREV_SCOLL_API(reduce); MPI_SAVE_PREV_SCOLL_API(collect); + MPI_SAVE_PREV_SCOLL_API(alltoall); return OSHMEM_SUCCESS; } @@ -173,6 +176,7 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority) mpi_module->super.scoll_broadcast = mca_scoll_mpi_broadcast; mpi_module->super.scoll_reduce = mca_scoll_mpi_reduce; mpi_module->super.scoll_collect = mca_scoll_mpi_collect; + mpi_module->super.scoll_alltoall = NULL; *priority = cm->mpi_priority; module = &mpi_module->super; diff --git a/oshmem/mca/scoll/scoll.h b/oshmem/mca/scoll/scoll.h index 4f0336cff6e..d1e016430c7 100644 --- a/oshmem/mca/scoll/scoll.h +++ b/oshmem/mca/scoll/scoll.h @@ -89,7 +89,6 @@ typedef struct mca_scoll_base_component_1_0_0_t mca_scoll_base_component_t; typedef int (*mca_scoll_base_module_enable_1_0_0_fn_t)(struct mca_scoll_base_module_1_0_0_t* module, struct oshmem_group_t *comm); -typedef int (*mca_scoll_base_module_ft_event_fn_t)(int state); #define SCOLL_DEFAULT_ALG (-1) @@ -139,6 +138,13 @@ typedef int (*mca_scoll_base_module_reduce_fn_t)(struct oshmem_group_t *group, long *pSync, void *pWrk, int alg); +typedef int (*mca_scoll_base_module_alltoall_fn_t)(struct oshmem_group_t *group, + void *target, + const void *source, + ptrdiff_t dst, ptrdiff_t sst, + size_t nlong, + long *pSync, + int alg); struct mca_scoll_base_module_1_0_0_t { /** Collective modules all inherit from opal_object */ @@ -149,13 +155,14 @@ struct mca_scoll_base_module_1_0_0_t { mca_scoll_base_module_broadcast_fn_t scoll_broadcast; mca_scoll_base_module_collect_fn_t scoll_collect; mca_scoll_base_module_reduce_fn_t scoll_reduce; + mca_scoll_base_module_alltoall_fn_t scoll_alltoall; mca_scoll_base_module_enable_1_0_0_fn_t scoll_module_enable; }; typedef struct mca_scoll_base_module_1_0_0_t mca_scoll_base_module_1_0_0_t; -/** Per guidence in mca.h, use the unversioned struct name if you just +/** Per guidance in mca.h, use the unversioned struct name if you just want to always keep up with the most recent version of the - interace. */ + interface. */ typedef struct mca_scoll_base_module_1_0_0_t mca_scoll_base_module_t; OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(mca_scoll_base_module_t); @@ -171,7 +178,7 @@ OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(mca_scoll_base_module_t); /* * Collectives group cache structure * - * Collectives gorup cache structure, used to find functions to + * Collectives group cache structure, used to find functions to * implement collective algorithms and their associated modules. */ struct mca_scoll_base_group_scoll_t { @@ -183,6 +190,8 @@ struct mca_scoll_base_group_scoll_t { mca_scoll_base_module_1_0_0_t *scoll_collect_module; mca_scoll_base_module_reduce_fn_t scoll_reduce; mca_scoll_base_module_1_0_0_t *scoll_reduce_module; + mca_scoll_base_module_alltoall_fn_t scoll_alltoall; + mca_scoll_base_module_1_0_0_t *scoll_alltoall_module; }; typedef struct mca_scoll_base_group_scoll_t mca_scoll_base_group_scoll_t;