From aa4529b0f1917c4a5ac07f80ecd93a8058bcece1 Mon Sep 17 00:00:00 2001 From: Aboorva Devarajan Date: Fri, 29 May 2020 06:36:09 -0400 Subject: [PATCH] ompi/request: Add a read memory barrier to sync the receive buffer soon after wait completes. We found an issue where with using multiple threads, it is possible for the data to not be in the buffer before MPI_Wait() returns. Testing the buffer later after MPI_Wait() returned would show the data arrives eventually without the rmb(). We have seen this issue on Power9 intermittently using PAMI, but in theory could happen with any transport. Signed-off-by: Austen Lauria (cherry picked from commit 12192f150330e587b46b4c5a289a6a8bffe14748) --- ompi/request/request.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/ompi/request/request.h b/ompi/request/request.h index e9a16e6f804..adae96c5c93 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -417,21 +417,24 @@ static inline int ompi_request_free(ompi_request_t** request) static inline void ompi_request_wait_completion(ompi_request_t *req) { - if (opal_using_threads () && !REQUEST_COMPLETE(req)) { - void *_tmp_ptr = REQUEST_PENDING; - ompi_wait_sync_t sync; + if (opal_using_threads ()) { + if(!REQUEST_COMPLETE(req)) { + void *_tmp_ptr = REQUEST_PENDING; + ompi_wait_sync_t sync; - WAIT_SYNC_INIT(&sync, 1); + WAIT_SYNC_INIT(&sync, 1); - if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) { - SYNC_WAIT(&sync); - } else { - /* completed before we had a chance to swap in the sync object */ - WAIT_SYNC_SIGNALLED(&sync); - } + if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) { + SYNC_WAIT(&sync); + } else { + /* completed before we had a chance to swap in the sync object */ + WAIT_SYNC_SIGNALLED(&sync); + } - assert(REQUEST_COMPLETE(req)); - WAIT_SYNC_RELEASE(&sync); + assert(REQUEST_COMPLETE(req)); + WAIT_SYNC_RELEASE(&sync); + } + opal_atomic_rmb(); } else { while(!REQUEST_COMPLETE(req)) { opal_progress();