Skip to content

Commit

Permalink
osc/rdma: Fix MPI_Win_start()/complete() with MPI_GROUP_EMPTY.
Browse files Browse the repository at this point in the history
- Make sure the epoch type is set before returning from MPI_Win_start().
- Make sure the group is only free'd if it is valid in MPI_Win_complete().
  - Fix possible double free() of the group.

Signed-off-by: Austen Lauria <awlauria@us.ibm.com>
  • Loading branch information
awlauria committed Apr 6, 2021
1 parent 3d67c65 commit a8d2263
Showing 1 changed file with 12 additions and 11 deletions.
23 changes: 12 additions & 11 deletions ompi/mca/osc/rdma/osc_rdma_active_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,8 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int mpi_assert, ompi_win_t

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "start group size %d", sync->num_peers);

sync->type = OMPI_OSC_RDMA_SYNC_TYPE_PSCW;

if (0 == ompi_group_size (group)) {
/* nothing more to do. this is an empty start epoch */
OPAL_THREAD_UNLOCK(&module->lock);
Expand All @@ -393,8 +395,6 @@ int ompi_osc_rdma_start_atomic (ompi_group_t *group, int mpi_assert, ompi_win_t

opal_atomic_wmb ();

sync->type = OMPI_OSC_RDMA_SYNC_TYPE_PSCW;

/* prevent us from entering a passive-target, fence, or another pscw access epoch until
* the matching complete is called */
sync->epoch_active = true;
Expand Down Expand Up @@ -466,17 +466,19 @@ int ompi_osc_rdma_complete_atomic (ompi_win_t *win)
sync->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE;
sync->epoch_active = false;

/* phase 2 cleanup group */
OBJ_RELEASE(group);

peers = sync->peer_list.peers;
if (NULL == peers) {
/* empty peer list */
OPAL_THREAD_UNLOCK(&(module->lock));
OBJ_RELEASE(group);
if(MPI_GROUP_EMPTY != group) {
OBJ_RELEASE(group);
}
OPAL_THREAD_UNLOCK(&(module->lock));
return OMPI_SUCCESS;
}

/* phase 2 cleanup group */
OBJ_RELEASE(group);

sync->peer_list.peers = NULL;

OPAL_THREAD_UNLOCK(&(module->lock));
Expand Down Expand Up @@ -508,7 +510,6 @@ int ompi_osc_rdma_wait_atomic (ompi_win_t *win)
{
ompi_osc_rdma_module_t *module = GET_MODULE(win);
ompi_osc_rdma_state_t *state = module->state;
ompi_group_t *group;
int group_size;

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait: %s", win->w_name);
Expand All @@ -532,12 +533,12 @@ int ompi_osc_rdma_wait_atomic (ompi_win_t *win)
}

OPAL_THREAD_LOCK(&module->lock);
group = module->pw_group;
if(MPI_GROUP_EMPTY != module->pw_group) {
OBJ_RELEASE(module->pw_group);
}
module->pw_group = NULL;
OPAL_THREAD_UNLOCK(&module->lock);

OBJ_RELEASE(group);

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "wait complete");

return OMPI_SUCCESS;
Expand Down

0 comments on commit a8d2263

Please sign in to comment.