Skip to content

Commit

Permalink
Merge pull request #4510 from matcabral/mtl_psm2_shadow_vars
Browse files Browse the repository at this point in the history
New flag for MCA parameters that allows a behaving with a default value of "unset".
  • Loading branch information
matcabral authored Dec 4, 2017
2 parents b160cf6 + 1fad594 commit 2c86b87
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 19 deletions.
57 changes: 40 additions & 17 deletions ompi/mca/mtl/psm2/mtl_psm2_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,29 +92,33 @@ struct ompi_mtl_psm2_shadow_variable {
mca_base_var_info_lvl_t info_level;
const char *mca_name;
const char *description;
mca_base_var_flag_t flags;
};

struct ompi_mtl_psm2_shadow_variable ompi_mtl_psm2_shadow_variables[] = {
{MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_devices, {.stringval = "self,shm,hfi"}, "PSM2_DEVICES", OPAL_INFO_LVL_3,
"devices", "Comma-delimited list of PSM2 devices. Valid values: self, shm, hfi (default: self,shm,hfi)"},
"devices",
"Comma-delimited list of PSM2 devices. Valid values: self, shm, hfi (default: self,shm,hfi. Reduced to self,shm in single node jobs)",0},
{MCA_BASE_VAR_TYPE_STRING, &ompi_mtl_psm2.psm2_memory, {.stringval = "normal"}, "PSM2_MEMORY", OPAL_INFO_LVL_9,
"memory_model", "PSM2 memory usage mode (default: normal)"},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_sendreqs_max, {.ulval = 1048576}, "PSM2_MQ_SENDREQS_MAX", OPAL_INFO_LVL_3,
"mq_sendreqs_max", "PSM2 maximum number of isend requests in flight (default: 1M)"},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_recvreqs_max, {.ulval = 1048576}, "PSM2_MQ_RECVREQS_MAX", OPAL_INFO_LVL_3,
"mq_recvreqs_max", "PSM2 maximum number of irecv requests in flight (default: 1M)"},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_hfi_threshold, {.ulval = 64000}, "PSM2_MQ_RNDV_HFI_THRESH", OPAL_INFO_LVL_3,
"hfi_eager_limit", "PSM2 eager to rendezvous threshold (default: 64000)"},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_shm_threshold, {.ulval = 16000}, "PSM2_MQ_RNDV_SHM_THRESH", OPAL_INFO_LVL_3,
"shm_eager_limit", "PSM2 shared memory eager to rendezvous threshold (default: 16000)"},
"memory_model", "PSM2 memory usage mode. Valid values: min, normal, large (default: normal)", 0},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_sendreqs_max, {.ulval = 0}, "PSM2_MQ_SENDREQS_MAX", OPAL_INFO_LVL_3,
"mq_sendreqs_max", "PSM2 maximum number of isend requests in flight (default: unset, let libpsm2 use its default)", MCA_BASE_VAR_FLAG_DEF_UNSET},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_recvreqs_max, {.ulval = 0}, "PSM2_MQ_RECVREQS_MAX", OPAL_INFO_LVL_3,
"mq_recvreqs_max", "PSM2 maximum number of irecv requests in flight (default: unset, let libpsm2 use its default)", MCA_BASE_VAR_FLAG_DEF_UNSET},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_hfi_threshold, {.ulval = 0}, "PSM2_MQ_RNDV_HFI_THRESH", OPAL_INFO_LVL_3,
"hfi_eager_limit", "PSM2 eager to rendezvous threshold (default: unset, let libpsm2 use its defaults)", MCA_BASE_VAR_FLAG_DEF_UNSET},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_mq_rndv_shm_threshold, {.ulval = 0}, "PSM2_MQ_RNDV_SHM_THRESH", OPAL_INFO_LVL_3,
"shm_eager_limit", "PSM2 shared memory eager to rendezvous threshold (default: unset, let libpsm2 use its default)", MCA_BASE_VAR_FLAG_DEF_UNSET},
{MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_recvthread, {.boolval = true}, "PSM2_RCVTHREAD", OPAL_INFO_LVL_3,
"use_receive_thread", "Use PSM2 progress thread (default: true)"},
{MCA_BASE_VAR_TYPE_BOOL, &ompi_mtl_psm2.psm2_shared_contexts, {.boolval = true}, "PSM2_SHAREDCONTEXTS", OPAL_INFO_LVL_6,
"use_shared_contexts", "Share PSM contexts between MPI processes (default: true)"},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_shared_contexts_max, {.ulval = 8}, "PSM2_SHAREDCONTEXTS_MAX", OPAL_INFO_LVL_9,
"max_shared_contexts", "Maximum number of contexts available on a node (default: 8, max: 8)"},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_max_contexts_per_job, {.ulval = 0}, "PSM2_MAX_CONTEXTS_PER_JOB", OPAL_INFO_LVL_9,
"max_contexts_per_job", "Maximum number of contexts available on a node (default: unset, let libpsm2 use its default)", MCA_BASE_VAR_FLAG_DEF_UNSET},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_tracemask, {.ulval = 1}, "PSM2_TRACEMASK", OPAL_INFO_LVL_9,
"trace_mask", "PSM2 tracemask value. See PSM2 documentation for accepted values (default: 1)"},
"trace_mask", "PSM2 tracemask value. See PSM2 documentation for accepted values in 0x (default: 1)"},
{MCA_BASE_VAR_TYPE_UNSIGNED_LONG, &ompi_mtl_psm2.psm2_opa_sl, {.ulval = 0}, "HFI_SL", OPAL_INFO_LVL_9,
"opa_service_level", "HFI Service Level (default: unset, let libpsm2 use its defaults)", MCA_BASE_VAR_FLAG_DEF_UNSET},
{-1},
};

Expand All @@ -123,10 +127,27 @@ static void ompi_mtl_psm2_set_shadow_env (struct ompi_mtl_psm2_shadow_variable *
mca_base_var_storage_t *storage = variable->storage;
char *env_value;
int ret = 0;
int var_index = 0;
const mca_base_var_t *mca_base_var;

var_index = mca_base_var_find("ompi", "mtl", "psm2", variable->mca_name);
ret = mca_base_var_get (var_index,&mca_base_var);
/* Something is fundamentally broken if registered variables are
* not found */
if (OPAL_SUCCESS != ret) {
fprintf (stderr, "ERROR setting PSM2 environment variable: %s\n", variable->env_name);
return;
}

/** Skip setting variables for which the default behavior is "unset" */
if ((mca_base_var->mbv_flags & MCA_BASE_VAR_FLAG_DEF_UNSET) &&
(MCA_BASE_VAR_SOURCE_DEFAULT == mca_base_var->mbv_source)){
return ;
}

switch (variable->variable_type) {
case MCA_BASE_VAR_TYPE_BOOL:
ret = asprintf (&env_value, "%s=%s", variable->env_name, storage->boolval ? "YES" : "NO");
ret = asprintf (&env_value, "%s=%d", variable->env_name, storage->boolval ? 1 : 0);
break;
case MCA_BASE_VAR_TYPE_UNSIGNED_LONG:
if (0 == strcmp (variable->env_name, "PSM2_TRACEMASK")) {
Expand Down Expand Up @@ -182,7 +203,7 @@ static void ompi_mtl_psm2_register_shadow_env (struct ompi_mtl_psm2_shadow_varia
}

(void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version, variable->mca_name, variable->description,
variable->variable_type, NULL, 0, 0, variable->info_level, MCA_BASE_VAR_SCOPE_READONLY,
variable->variable_type, NULL, 0, variable->flags, variable->info_level, MCA_BASE_VAR_SCOPE_READONLY,
variable->storage);
}

Expand Down Expand Up @@ -221,8 +242,10 @@ ompi_mtl_psm2_component_register(void)
(void) get_num_total_procs(&num_total_procs);

/* set priority high enough to beat ob1's default (also set higher than psm) */
if (num_local_procs == num_total_procs) {
/* disable hfi if all processes are local */
if ((num_local_procs == num_total_procs) && (1 < num_total_procs)) {
/* Disable hfi if all processes are local. However, if running only one
* process assume it is ompi_info or this is most likely going to spawn, for
* which all PSM2 devices are needed */
setenv("PSM2_DEVICES", "self,shm", 0);
/* ob1 is much faster than psm2 with shared memory */
param_priority = 10;
Expand Down
3 changes: 2 additions & 1 deletion ompi/mca/mtl/psm2/mtl_psm2_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ struct mca_mtl_psm2_module_t {
unsigned long psm2_mq_recvreqs_max;
unsigned long psm2_mq_rndv_hfi_threshold;
unsigned long psm2_mq_rndv_shm_threshold;
unsigned long psm2_shared_contexts_max;
unsigned long psm2_max_contexts_per_job;
unsigned long psm2_tracemask;
bool psm2_recvthread;
bool psm2_shared_contexts;
unsigned long psm2_opa_sl;
};

typedef struct mca_mtl_psm2_module_t mca_mtl_psm2_module_t;
Expand Down
8 changes: 8 additions & 0 deletions opal/mca/base/mca_base_var.c
Original file line number Diff line number Diff line change
Expand Up @@ -1896,6 +1896,14 @@ static int var_value_string (mca_base_var_t *var, char **value_string)

assert (MCA_BASE_VAR_TYPE_MAX > var->mbv_type);

/** Parameters with MCA_BASE_VAR_FLAG_DEF_UNSET flag should be shown
* as "unset" by default. */
if ((var->mbv_flags & MCA_BASE_VAR_FLAG_DEF_UNSET) &&
(MCA_BASE_VAR_SOURCE_DEFAULT == var->mbv_source)){
asprintf (value_string, "%s", "unset");
return OPAL_SUCCESS;
}

ret = mca_base_var_get_value(var->mbv_index, &value, NULL, NULL);
if (OPAL_SUCCESS != ret || NULL == value) {
return ret;
Expand Down
5 changes: 4 additions & 1 deletion opal/mca/base/mca_base_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ typedef enum {
manually when you register a variable with
mca_base_var_register(). Analogous to the
MCA_BASE_PVAR_FLAG_IWG. */
MCA_BASE_VAR_FLAG_DWG = 0x0040
MCA_BASE_VAR_FLAG_DWG = 0x0040,
/** Variable has a default value of "unset". Meaning to only
* be set when the user explicitly asks for it */
MCA_BASE_VAR_FLAG_DEF_UNSET = 0x0080,
} mca_base_var_flag_t;


Expand Down

0 comments on commit 2c86b87

Please sign in to comment.