Skip to content

Commit

Permalink
Make sure opal_start_thread always spawns pthreads
Browse files Browse the repository at this point in the history
Users of `opal_start_thread` (btl/tcp, ft, smcuda, progress thread)
may spawn threads that may block in functions unaware of argobots
or qthreads (e.g., libevent or read(3)). If we spawn an argobot or
qthread instead of a pthread the thread executing the argobot or
qthread (potentially the main thread) blocks, leading to a deadlock
situation. Open MPI expects the semantics of a pthread so
we should handle all internal threads as such.

Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
  • Loading branch information
devreal committed Aug 27, 2021
1 parent 38c2e37 commit 286095f
Show file tree
Hide file tree
Showing 8 changed files with 15 additions and 222 deletions.
74 changes: 1 addition & 73 deletions opal/mca/threads/argobots/threads_argobots_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -33,78 +33,6 @@
#include "opal/util/output.h"
#include "opal/util/sys_limits.h"

/*
* Constructor
*/
static void opal_thread_construct(opal_thread_t *t)
{
t->t_run = 0;
t->t_handle = ABT_THREAD_NULL;
}

OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL);

static inline ABT_thread opal_thread_get_argobots_self(void)
{
ABT_thread self;
ABT_thread_self(&self);
return self;
}

static void opal_thread_argobots_wrapper(void *arg)
{
opal_thread_t *t = (opal_thread_t *) arg;
t->t_ret = ((void *(*) (void *) ) t->t_run)(t);
}

opal_thread_t *opal_thread_get_self(void)
{
opal_threads_argobots_ensure_init();
opal_thread_t *t = OBJ_NEW(opal_thread_t);
t->t_handle = opal_thread_get_argobots_self();
return t;
}

bool opal_thread_self_compare(opal_thread_t *t)
{
opal_threads_argobots_ensure_init();
return opal_thread_get_argobots_self() == t->t_handle;
}

int opal_thread_join(opal_thread_t *t, void **thr_return)
{
int rc = ABT_thread_free(&t->t_handle);
if (thr_return) {
*thr_return = t->t_ret;
}
t->t_handle = ABT_THREAD_NULL;
return (ABT_SUCCESS == rc) ? OPAL_SUCCESS : OPAL_ERROR;
}

void opal_thread_set_main()
{
}

int opal_thread_start(opal_thread_t *t)
{
opal_threads_argobots_ensure_init();
int rc;
if (OPAL_ENABLE_DEBUG) {
if (NULL == t->t_run || ABT_THREAD_NULL != t->t_handle) {
return OPAL_ERR_BAD_PARAM;
}
}

ABT_xstream self_xstream;
ABT_xstream_self(&self_xstream);
rc = ABT_thread_create_on_xstream(self_xstream, opal_thread_argobots_wrapper, t,
ABT_THREAD_ATTR_NULL, &t->t_handle);

return (ABT_SUCCESS == rc) ? OPAL_SUCCESS : OPAL_ERROR;
}

OBJ_CLASS_DECLARATION(opal_thread_t);

int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor)
{
opal_threads_argobots_ensure_init();
Expand Down
11 changes: 1 addition & 10 deletions opal/mca/threads/argobots/threads_argobots_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
Expand All @@ -27,15 +27,6 @@
#define OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_THREADS_H

#include "opal/mca/threads/argobots/threads_argobots.h"
#include <signal.h>

struct opal_thread_t {
opal_object_t super;
opal_thread_fn_t t_run;
void *t_arg;
ABT_thread t_handle;
void *t_ret;
};

/* Argobots are cooperatively scheduled so yield when idle */
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true
Expand Down
1 change: 1 addition & 0 deletions opal/mca/threads/base/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ headers += \

libmca_threads_la_SOURCES += \
base/mutex.c \
base/create_join.c \
base/threads_base.c \
base/tsd.c \
base/wait_sync.c
51 changes: 1 addition & 50 deletions opal/mca/threads/pthreads/threads_pthreads_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -32,58 +32,9 @@
#include "opal/util/output.h"
#include "opal/util/sys_limits.h"

/*
* Constructor
*/
static void opal_thread_construct(opal_thread_t *t)
{
t->t_run = 0;
t->t_handle = (pthread_t) -1;
}

OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL);

int opal_thread_start(opal_thread_t *t)
{
int rc;

if (OPAL_ENABLE_DEBUG) {
if (NULL == t->t_run || (pthread_t) -1 != t->t_handle) {
return OPAL_ERR_BAD_PARAM;
}
}

rc = pthread_create(&t->t_handle, NULL, (void *(*) (void *) ) t->t_run, t);

return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO;
}

int opal_thread_join(opal_thread_t *t, void **thr_return)
{
int rc = pthread_join(t->t_handle, thr_return);
t->t_handle = (pthread_t) -1;
return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO;
}

bool opal_thread_self_compare(opal_thread_t *t)
{
return pthread_self() == t->t_handle;
}

opal_thread_t *opal_thread_get_self(void)
{
opal_thread_t *t = OBJ_NEW(opal_thread_t);
t->t_handle = pthread_self();
return t;
}

int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor)
{
int rc;
rc = pthread_key_create(key, destructor);
return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO;
}

void opal_thread_set_main(void)
{
}
9 changes: 1 addition & 8 deletions opal/mca/threads/pthreads/threads_pthreads_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -32,13 +32,6 @@
#include "opal/mca/threads/pthreads/threads_pthreads.h"
#include "opal/mca/threads/threads.h"

struct opal_thread_t {
opal_object_t super;
opal_thread_fn_t t_run;
void *t_arg;
pthread_t t_handle;
};

/* Pthreads do not need to yield when idle */
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT false

Expand Down
70 changes: 1 addition & 69 deletions opal/mca/threads/qthreads/threads_qthreads_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -51,74 +51,6 @@ static inline void self_key_ensure_init(void)
/* opal_thread_self_key has been already initialized. */
}

/*
* Constructor
*/
static void opal_thread_construct(opal_thread_t *t)
{
t->t_run = 0;
t->t_thread_ret = 0;
}

OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL);

static inline aligned_t *opal_thread_get_qthreads_self(void)
{
self_key_ensure_init();
void *ptr = qthread_getspecific(opal_thread_self_key);
return (aligned_t *) ptr;
}

static aligned_t opal_thread_qthreads_wrapper(void *arg)
{
opal_thread_t *t = (opal_thread_t *) arg;

/* Register itself. */
self_key_ensure_init();
qthread_setspecific(opal_thread_self_key, t->t_thread_ret_ptr);

t->t_ret = ((void *(*) (void *) ) t->t_run)(t);
return 0;
}

opal_thread_t *opal_thread_get_self(void)
{
opal_threads_ensure_init_qthreads();
opal_thread_t *t = OBJ_NEW(opal_thread_t);
t->t_thread_ret_ptr = opal_thread_get_qthreads_self();
return t;
}

bool opal_thread_self_compare(opal_thread_t *t)
{
opal_threads_ensure_init_qthreads();
return opal_thread_get_qthreads_self() == &t->t_thread_ret;
}

int opal_thread_join(opal_thread_t *t, void **thr_return)
{
qthread_readFF(NULL, t->t_thread_ret_ptr);
if (thr_return) {
*thr_return = t->t_ret;
}
t->t_thread_ret = 0;
return OPAL_SUCCESS;
}

void opal_thread_set_main(void)
{
}

int opal_thread_start(opal_thread_t *t)
{
opal_threads_ensure_init_qthreads();
t->t_thread_ret_ptr = &t->t_thread_ret;
qthread_fork(opal_thread_qthreads_wrapper, t, &t->t_thread_ret);
return OPAL_SUCCESS;
}

OBJ_CLASS_DECLARATION(opal_thread_t);

int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor)
{
opal_threads_ensure_init_qthreads();
Expand Down
12 changes: 1 addition & 11 deletions opal/mca/threads/qthreads/threads_qthreads_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
Expand All @@ -27,16 +27,6 @@
#define OPAL_MCA_THREADS_QTHREADS_THREADS_QTHREADS_THREADS_H 1

#include "opal/mca/threads/qthreads/threads_qthreads.h"
#include <signal.h>

struct opal_thread_t {
opal_object_t super;
opal_thread_fn_t t_run;
void *t_arg;
void *t_ret;
aligned_t t_thread_ret;
aligned_t *t_thread_ret_ptr;
};

/* Qthreads are cooperatively scheduled so yield when idle */
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true
Expand Down
9 changes: 8 additions & 1 deletion opal/mca/threads/threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2021 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
Expand Down Expand Up @@ -45,6 +45,13 @@ typedef void *(*opal_thread_fn_t)(opal_object_t *);

#include MCA_threads_base_include_HEADER

struct opal_thread_t {
opal_object_t super;
opal_thread_fn_t t_run;
void *t_arg;
pthread_t t_handle;
};

typedef struct opal_thread_t opal_thread_t;

OBJ_CLASS_DECLARATION(opal_thread_t);
Expand Down

0 comments on commit 286095f

Please sign in to comment.