Skip to content
This repository has been archived by the owner on Sep 30, 2022. It is now read-only.

fix spawn on heterogeneous clusters #977

Merged
merged 2 commits into from
Mar 8, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 10 additions & 15 deletions ompi/dpm/dpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
opal_list_append(&ilist, &cd->super);
}
/* either way, add to the remote list */
cd = OBJ_NEW(ompi_dpm_proct_caddy_t);
cd = OBJ_NEW(ompi_dpm_proct_caddy_t);
cd->p = proc;
opal_list_append(&rlist, &cd->super);
}
Expand All @@ -403,24 +403,19 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
i = 0;
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
opal_value_t *kv;
new_proc_list[i] = cd->p;
/* set the locality */
new_proc_list[i]->super.proc_flags = OPAL_PROC_NON_LOCAL;
/* have to save it for later */
proc = cd->p;
new_proc_list[i] = proc ;
/* ompi_proc_complete_init_single() initializes and optionally retrieves
* OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without
* them, we are just fine */
ompi_proc_complete_init_single(proc);
/* save the locality for later */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALITY);
kv->type = OPAL_UINT16;
kv->data.uint16 = OPAL_PROC_NON_LOCAL;
opal_pmix.store_local(&cd->p->super.proc_name, kv);
kv->data.uint16 = proc->super.proc_flags;
opal_pmix.store_local(&proc->super.proc_name, kv);
OBJ_RELEASE(kv); // maintain accounting
/* we can retrieve the hostname at no cost because it
* was provided at connect */
OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_HOSTNAME, &new_proc_list[i]->super.proc_name,
(char**)&(new_proc_list[i]->super.proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != rc) {
/* we can live without it */
new_proc_list[i]->super.proc_hostname = NULL;
}
++i;
}
/* call add_procs on the new ones */
Expand Down
5 changes: 3 additions & 2 deletions ompi/proc/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,15 @@ static int ompi_proc_allocate (ompi_jobid_t jobid, ompi_vpid_t vpid, ompi_proc_t
* retrieving the hostname (if below the modex cutoff), determining the
* remote architecture, and calculating the locality of the process.
*/
static int ompi_proc_complete_init_single (ompi_proc_t *proc)
int ompi_proc_complete_init_single (ompi_proc_t *proc)
{
uint16_t u16, *u16ptr;
int ret;

u16ptr = &u16;

if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid) {
if ((OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid == OMPI_PROC_MY_NAME->jobid) &&
(OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid == OMPI_PROC_MY_NAME->vpid)) {
/* nothing else to do */
return OMPI_SUCCESS;
}
Expand Down
12 changes: 12 additions & 0 deletions ompi/proc/proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,18 @@ OMPI_DECLSPEC int ompi_proc_init(void);
*/
OMPI_DECLSPEC int ompi_proc_complete_init(void);

/**
* Complete filling up the proc information (arch, name and locality) for
* a given proc. This function is to be called only after the modex exchange
* has been completed.
*
* @param[in] proc the proc whose information will be filled up
*
* @retval OMPI_SUCCESS All information correctly set.
* @retval OMPI_ERROR Some info could not be initialized.
*/
OMPI_DECLSPEC int ompi_proc_complete_init_single(ompi_proc_t* proc);

/**
* Finalize the OMPI Process subsystem
*
Expand Down