From 9ff2c7ff62b1d810ef1feebd789fda0275e46b3f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 8 Apr 2022 13:00:48 -0700 Subject: [PATCH] Restore detailed error msg for some codes Provide support for the exe/wdir not found or missing permissions error output. Signed-off-by: Ralph Castain --- src/mca/odls/base/odls_base_default_fns.c | 5 ++--- src/prted/pmix/pmix_server_dyn.c | 10 ++++++++-- src/prted/prte_app_parse.c | 1 + src/runtime/prte_quit.c | 12 +++++++++--- src/tools/prte/prte.c | 4 ++-- src/tools/prun/help-prun.txt | 17 +++++++++++++---- 6 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/mca/odls/base/odls_base_default_fns.c b/src/mca/odls/base/odls_base_default_fns.c index f70d3262ff..5d8b9d0333 100644 --- a/src/mca/odls/base/odls_base_default_fns.c +++ b/src/mca/odls/base/odls_base_default_fns.c @@ -888,9 +888,9 @@ static int setup_path(prte_app_context_t *app, char **wdir) if (prte_get_attribute(&app->attributes, PRTE_APP_USER_CWD, NULL, PMIX_BOOL)) { usercwd = true; } - if (PMIX_SUCCESS != (rc = pmix_util_check_context_cwd(&app->cwd, true, usercwd))) { + rc = pmix_util_check_context_cwd(&app->cwd, true, usercwd); + if (PMIX_SUCCESS != rc) { /* do not ERROR_LOG - it will be reported elsewhere */ - rc = prte_pmix_convert_status(rc); goto CLEANUP; } @@ -1351,7 +1351,6 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata) rc = pmix_util_check_context_app(&app->app, app->cwd, app->env); /* do not ERROR_LOG - it will be reported elsewhere */ if (PMIX_SUCCESS != rc) { - rc = PRTE_ERR_EXE_NOT_FOUND; /* cycle through children to find those for this jobid */ for (idx = 0; idx < prte_local_children->size; idx++) { child = (prte_proc_t *) pmix_pointer_array_get_item(prte_local_children, idx); diff --git a/src/prted/pmix/pmix_server_dyn.c b/src/prted/pmix/pmix_server_dyn.c index 4359b9eccb..c91708aeba 100644 --- a/src/prted/pmix/pmix_server_dyn.c +++ b/src/prted/pmix/pmix_server_dyn.c @@ -274,9 +274,15 @@ static void interim(int sd, short args, void *cbdata) /* construct the absolute path */ app->cwd = pmix_os_path(false, cwd, info->value.data.string, NULL); } - } else if (PMIX_CHECK_KEY(info, PMIX_PRELOAD_BIN)) { +#ifdef PMIX_WDIR_USER_SPECIFIED + } else if (PMIX_CHECK_KEY(info, PMIX_WDIR_USER_SPECIFIED)) { flag = PMIX_INFO_TRUE(info); - prte_set_attribute(&app->attributes, PRTE_APP_PRELOAD_BIN, PRTE_ATTR_GLOBAL, + prte_set_attribute(&app->attributes, PRTE_APP_USER_CWD, PRTE_ATTR_GLOBAL, + &flag, PMIX_BOOL); +#endif + } else if (PMIX_CHECK_KEY(info, PMIX_SET_SESSION_CWD)) { + flag = PMIX_INFO_TRUE(info); + prte_set_attribute(&app->attributes, PRTE_APP_SSNDIR_CWD, PRTE_ATTR_GLOBAL, &flag, PMIX_BOOL); } else if (PMIX_CHECK_KEY(info, PMIX_PRELOAD_FILES)) { prte_set_attribute(&app->attributes, PRTE_APP_PRELOAD_FILES, PRTE_ATTR_GLOBAL, diff --git a/src/prted/prte_app_parse.c b/src/prted/prte_app_parse.c index 4c9fe1da95..1691c2f975 100644 --- a/src/prted/prte_app_parse.c +++ b/src/prted/prte_app_parse.c @@ -159,6 +159,7 @@ static int create_app(prte_schizo_base_module_t *schizo, char **argv, pmix_list_ /* construct the absolute path */ app->app.cwd = pmix_os_path(false, cwd, param, NULL); } + PMIX_INFO_LIST_ADD(rc, app->info, PMIX_WDIR_USER_SPECIFIED, NULL, PMIX_BOOL); } else if (pmix_cmd_line_is_taken(&results, "set-cwd-to-session-dir")) { PMIX_INFO_LIST_ADD(rc, app->info, PMIX_SET_SESSION_CWD, NULL, PMIX_BOOL); } else { diff --git a/src/runtime/prte_quit.c b/src/runtime/prte_quit.c index 1ef2bfdec2..154dd492fc 100644 --- a/src/runtime/prte_quit.c +++ b/src/runtime/prte_quit.c @@ -108,6 +108,7 @@ static char *print_aborted_job(prte_job_t *job, prte_app_context_t *approc, prte if (PRTE_PROC_STATE_FAILED_TO_START == proc->state || PRTE_PROC_STATE_FAILED_TO_LAUNCH == proc->state) { switch (proc->exit_code) { + case PMIX_ERR_SILENT: case PRTE_ERR_SILENT: /* say nothing - it was already reported */ break; @@ -131,18 +132,23 @@ static char *print_aborted_job(prte_job_t *job, prte_app_context_t *approc, prte prte_tool_basename, node->name, (unsigned long) proc->name.rank); break; - case PRTE_ERR_WDIR_NOT_FOUND: + case PMIX_ERR_JOB_WDIR_NOT_FOUND: output = prte_show_help_string("help-prun.txt", "prun:wdir-not-found", true, prte_tool_basename, approc->cwd, node->name, (unsigned long) proc->name.rank); break; - case PRTE_ERR_EXE_NOT_FOUND: + case PMIX_ERR_JOB_WDIR_NOT_ACCESSIBLE: + output = prte_show_help_string("help-prun.txt", "prun:wdir-not-accessible", true, + prte_tool_basename, approc->cwd, node->name, + (unsigned long) proc->name.rank); + break; + case PMIX_ERR_JOB_EXE_NOT_FOUND: output = prte_show_help_string("help-prun.txt", "prun:exe-not-found", true, prte_tool_basename, (unsigned long) proc->name.rank, prte_tool_basename, prte_tool_basename, node->name, approc->app); break; - case PRTE_ERR_EXE_NOT_ACCESSIBLE: + case PMIX_ERR_EXE_NOT_ACCESSIBLE: output = prte_show_help_string("help-prun.txt", "prun:exe-not-accessible", true, prte_tool_basename, approc->app, node->name, (unsigned long) proc->name.rank); diff --git a/src/tools/prte/prte.c b/src/tools/prte/prte.c index 1831bd3065..b1941e98d8 100644 --- a/src/tools/prte/prte.c +++ b/src/tools/prte/prte.c @@ -332,8 +332,8 @@ int main(int argc, char *argv[]) /* Set both ends of this pipe to be close-on-exec so that no children inherit it */ - if (pmix_fd_set_cloexec(term_pipe[0]) != PRTE_SUCCESS - || pmix_fd_set_cloexec(term_pipe[1]) != PRTE_SUCCESS) { + if (pmix_fd_set_cloexec(term_pipe[0]) != PRTE_SUCCESS || + pmix_fd_set_cloexec(term_pipe[1]) != PRTE_SUCCESS) { fprintf(stderr, "unable to set the pipe to CLOEXEC\n"); prte_progress_thread_finalize(NULL); exit(1); diff --git a/src/tools/prun/help-prun.txt b/src/tools/prun/help-prun.txt index bd7db6495f..0fbbeab7f6 100644 --- a/src/tools/prun/help-prun.txt +++ b/src/tools/prun/help-prun.txt @@ -13,7 +13,7 @@ # Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2017-2020 Intel, Inc. All rights reserved. -# Copyright (c) 2021 Nanook Consulting. All rights reserved. +# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -236,7 +236,16 @@ while attempting to start process rank %lu. # [prun:wdir-not-found] %s was unable to launch the specified application as it could not -change to the specified working directory: +find the specified working directory: + +Working directory: %s +Node: %s + +while attempting to start process rank %lu. +# +[prun:wdir-not-accessible] +%s was unable to launch the specified application as it lacks +permissions to change to the specified working directory: Working directory: %s Node: %s @@ -256,8 +265,8 @@ Node: %s Executable: %s # [prun:exe-not-accessible] -%s was unable to launch the specified application as it could not access -or execute an executable: +%s was unable to launch the specified application as it lacked +permissions to execute an executable: Executable: %s Node: %s