Skip to content

Commit

Permalink
Protect against the envar version of the Slurm custom args param
Browse files Browse the repository at this point in the history
Protect against the envar version of the Slurm
custom args MCA param. This is an unfortunate
hack that hopefully will eventually go away.
See both of the following for detailed
explanations and discussion:

openpmix#1974
open-mpi/ompi#12471

Orgs/users wanting to add custom args to the
internal "srun" command used to spawn the
PRRTE daemons must do so via the default MCA
param files (system or user), or via the
prterun (or its proxy) cmd line

Signed-off-by: Ralph Castain <rhc@pmix.org>
(from upstream commit 28432ed)
  • Loading branch information
rhc54 authored and hppritcha committed Oct 8, 2024
1 parent 13f6975 commit 5c844a7
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 44 deletions.
3 changes: 1 addition & 2 deletions src/mca/plm/slurm/plm_slurm.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@ BEGIN_C_DECLS

struct prte_mca_plm_slurm_component_t {
prte_plm_base_component_t super;
int custom_args_index;
char *custom_args;
bool slurm_warning_msg;
bool early;
};
typedef struct prte_mca_plm_slurm_component_t prte_mca_plm_slurm_component_t;

Expand Down
74 changes: 33 additions & 41 deletions src/mca/plm/slurm/plm_slurm_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "src/util/name_fns.h"
#include "src/util/pmix_environ.h"
#include "src/util/pmix_show_help.h"
#include "src/util/pmix_string_copy.h"

#include "plm_slurm.h"
#include "src/mca/plm/base/plm_private.h"
Expand Down Expand Up @@ -84,28 +85,15 @@ prte_mca_plm_slurm_component_t prte_mca_plm_slurm_component = {
here; will be initialized in plm_slurm_open() */
};

static char *custom_args = NULL;
static char *force_args = NULL;

static int plm_slurm_register(void)
{
pmix_mca_base_component_t *comp = &prte_mca_plm_slurm_component.super;


prte_mca_plm_slurm_component.custom_args_index =
pmix_mca_base_component_var_register(comp, "args", "Custom arguments to srun",
PMIX_MCA_BASE_VAR_TYPE_STRING,
&custom_args);

force_args = NULL;
(void) pmix_mca_base_component_var_register(comp, "force_args", "Mandatory custom arguments to srun",
PMIX_MCA_BASE_VAR_TYPE_STRING,
&force_args);

prte_mca_plm_slurm_component.slurm_warning_msg = false;
(void) pmix_mca_base_component_var_register(comp, "disable_warning", "Turn off warning message about custom args set in environment",
PMIX_MCA_BASE_VAR_TYPE_BOOL,
&prte_mca_plm_slurm_component.slurm_warning_msg);
prte_mca_plm_slurm_component.custom_args = NULL;
pmix_mca_base_component_var_register(comp, "args", "Custom arguments to srun",
PMIX_MCA_BASE_VAR_TYPE_STRING,
&prte_mca_plm_slurm_component.custom_args);

return PRTE_SUCCESS;
}
Expand All @@ -117,40 +105,44 @@ static int plm_slurm_open(void)

static int prte_mca_plm_slurm_component_query(pmix_mca_base_module_t **module, int *priority)
{
const pmix_mca_base_var_t *var;
pmix_status_t rc;
FILE *fp;
char version[1024], *ptr;
int major, minor;

/* Are we running under a SLURM job? */

if (NULL != getenv("SLURM_JOBID")) {
*priority = 75;

PMIX_OUTPUT_VERBOSE((1, prte_plm_base_framework.framework_output,
"%s plm:slurm: available for selection",
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)));

prte_mca_plm_slurm_component.custom_args = NULL;

// if we were are warning about externally set custom args, then
// check to see if that was done
if (!prte_mca_plm_slurm_component.slurm_warning_msg &&
NULL == force_args) {
// check for custom args
rc = pmix_mca_base_var_get(prte_mca_plm_slurm_component.custom_args_index, &var);
if (PMIX_SUCCESS == rc) {
// the variable was set - see who set it
if (PMIX_MCA_BASE_VAR_SOURCE_ENV == var->mbv_source) {
// set in the environment - warn
pmix_show_help("help-plm-slurm.txt", "custom-args-in-env", true,
custom_args);
}
}
// check the version
fp = popen("srun --version", "r");
if (NULL == fp) {
// cannot run srun, so we cannot support this job
*module = NULL;
return PRTE_ERROR;
}

if (NULL != force_args) {
prte_mca_plm_slurm_component.custom_args = force_args;
} else if (NULL != custom_args) {
prte_mca_plm_slurm_component.custom_args = custom_args;
if (NULL == fgets(version, sizeof(version), fp)) {
pclose(fp);
*module = NULL;
return PRTE_ERROR;
}
pclose(fp);
// parse on the dots
major = strtol(&version[6], &ptr, 10);
++ptr;
minor = strtol(ptr, NULL, 10);

if (23 > major) {
prte_mca_plm_slurm_component.early = true;
} else if (23 < major) {
prte_mca_plm_slurm_component.early = false;
} else if (11 > minor) {
prte_mca_plm_slurm_component.early = true;
} else {
prte_mca_plm_slurm_component.early = false;
}

*module = (pmix_mca_base_module_t *) &prte_plm_slurm_module;
Expand Down
7 changes: 6 additions & 1 deletion src/mca/plm/slurm/plm_slurm_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -253,6 +253,11 @@ static void launch_daemons(int fd, short args, void *cbdata)
/* add the srun command */
pmix_argv_append(&argc, &argv, "srun");

// add the external launcher flag if necessary
if (!prte_mca_plm_slurm_component.early) {
pmix_argv_append(&argc, &argv, "--external-launcher");
}

/* start one orted on each node */
pmix_argv_append(&argc, &argv, "--ntasks-per-node=1");

Expand Down
18 changes: 18 additions & 0 deletions src/runtime/prte_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,24 @@ int prte_init_minimum(void)
return PRTE_ERR_SILENT;
}

/* Protect against the envar version of the Slurm
* custom args MCA param. This is an unfortunate
* hack that hopefully will eventually go away.
* See both of the following for detailed
* explanations and discussion:
*
* https://github.com/openpmix/prrte/issues/1974
* https://github.com/open-mpi/ompi/issues/12471
*
* Orgs/users wanting to add custom args to the
* internal "srun" command used to spawn the
* PRRTE daemons must do so via the default MCA
* param files (system or user), or via the
* prterun (or its proxy) cmd line
*/
unsetenv("PRTE_MCA_plm_slurm_args");
unsetenv("OMPI_MCA_plm_slurm_args");

/* carry across the toolname */
pmix_tool_basename = prte_tool_basename;

Expand Down

0 comments on commit 5c844a7

Please sign in to comment.