Skip to content

Commit

Permalink
Restore detailed error msg for some codes
Browse files Browse the repository at this point in the history
Provide support for the exe/wdir not found or missing
permissions error output.

Signed-off-by: Ralph Castain <rhc@pmix.org>
  • Loading branch information
rhc54 committed Apr 8, 2022
1 parent 577145f commit 9ff2c7f
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 14 deletions.
5 changes: 2 additions & 3 deletions src/mca/odls/base/odls_base_default_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -888,9 +888,9 @@ static int setup_path(prte_app_context_t *app, char **wdir)
if (prte_get_attribute(&app->attributes, PRTE_APP_USER_CWD, NULL, PMIX_BOOL)) {
usercwd = true;
}
if (PMIX_SUCCESS != (rc = pmix_util_check_context_cwd(&app->cwd, true, usercwd))) {
rc = pmix_util_check_context_cwd(&app->cwd, true, usercwd);
if (PMIX_SUCCESS != rc) {
/* do not ERROR_LOG - it will be reported elsewhere */
rc = prte_pmix_convert_status(rc);
goto CLEANUP;
}

Expand Down Expand Up @@ -1351,7 +1351,6 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
rc = pmix_util_check_context_app(&app->app, app->cwd, app->env);
/* do not ERROR_LOG - it will be reported elsewhere */
if (PMIX_SUCCESS != rc) {
rc = PRTE_ERR_EXE_NOT_FOUND;
/* cycle through children to find those for this jobid */
for (idx = 0; idx < prte_local_children->size; idx++) {
child = (prte_proc_t *) pmix_pointer_array_get_item(prte_local_children, idx);
Expand Down
10 changes: 8 additions & 2 deletions src/prted/pmix/pmix_server_dyn.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,15 @@ static void interim(int sd, short args, void *cbdata)
/* construct the absolute path */
app->cwd = pmix_os_path(false, cwd, info->value.data.string, NULL);
}
} else if (PMIX_CHECK_KEY(info, PMIX_PRELOAD_BIN)) {
#ifdef PMIX_WDIR_USER_SPECIFIED
} else if (PMIX_CHECK_KEY(info, PMIX_WDIR_USER_SPECIFIED)) {
flag = PMIX_INFO_TRUE(info);
prte_set_attribute(&app->attributes, PRTE_APP_PRELOAD_BIN, PRTE_ATTR_GLOBAL,
prte_set_attribute(&app->attributes, PRTE_APP_USER_CWD, PRTE_ATTR_GLOBAL,
&flag, PMIX_BOOL);
#endif
} else if (PMIX_CHECK_KEY(info, PMIX_SET_SESSION_CWD)) {
flag = PMIX_INFO_TRUE(info);
prte_set_attribute(&app->attributes, PRTE_APP_SSNDIR_CWD, PRTE_ATTR_GLOBAL,
&flag, PMIX_BOOL);
} else if (PMIX_CHECK_KEY(info, PMIX_PRELOAD_FILES)) {
prte_set_attribute(&app->attributes, PRTE_APP_PRELOAD_FILES, PRTE_ATTR_GLOBAL,
Expand Down
1 change: 1 addition & 0 deletions src/prted/prte_app_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ static int create_app(prte_schizo_base_module_t *schizo, char **argv, pmix_list_
/* construct the absolute path */
app->app.cwd = pmix_os_path(false, cwd, param, NULL);
}
PMIX_INFO_LIST_ADD(rc, app->info, PMIX_WDIR_USER_SPECIFIED, NULL, PMIX_BOOL);
} else if (pmix_cmd_line_is_taken(&results, "set-cwd-to-session-dir")) {
PMIX_INFO_LIST_ADD(rc, app->info, PMIX_SET_SESSION_CWD, NULL, PMIX_BOOL);
} else {
Expand Down
12 changes: 9 additions & 3 deletions src/runtime/prte_quit.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ static char *print_aborted_job(prte_job_t *job, prte_app_context_t *approc, prte
if (PRTE_PROC_STATE_FAILED_TO_START == proc->state
|| PRTE_PROC_STATE_FAILED_TO_LAUNCH == proc->state) {
switch (proc->exit_code) {
case PMIX_ERR_SILENT:
case PRTE_ERR_SILENT:
/* say nothing - it was already reported */
break;
Expand All @@ -131,18 +132,23 @@ static char *print_aborted_job(prte_job_t *job, prte_app_context_t *approc, prte
prte_tool_basename, node->name,
(unsigned long) proc->name.rank);
break;
case PRTE_ERR_WDIR_NOT_FOUND:
case PMIX_ERR_JOB_WDIR_NOT_FOUND:
output = prte_show_help_string("help-prun.txt", "prun:wdir-not-found", true,
prte_tool_basename, approc->cwd, node->name,
(unsigned long) proc->name.rank);
break;
case PRTE_ERR_EXE_NOT_FOUND:
case PMIX_ERR_JOB_WDIR_NOT_ACCESSIBLE:
output = prte_show_help_string("help-prun.txt", "prun:wdir-not-accessible", true,
prte_tool_basename, approc->cwd, node->name,
(unsigned long) proc->name.rank);
break;
case PMIX_ERR_JOB_EXE_NOT_FOUND:
output = prte_show_help_string("help-prun.txt", "prun:exe-not-found", true,
prte_tool_basename, (unsigned long) proc->name.rank,
prte_tool_basename, prte_tool_basename, node->name,
approc->app);
break;
case PRTE_ERR_EXE_NOT_ACCESSIBLE:
case PMIX_ERR_EXE_NOT_ACCESSIBLE:
output = prte_show_help_string("help-prun.txt", "prun:exe-not-accessible", true,
prte_tool_basename, approc->app, node->name,
(unsigned long) proc->name.rank);
Expand Down
4 changes: 2 additions & 2 deletions src/tools/prte/prte.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,8 @@ int main(int argc, char *argv[])

/* Set both ends of this pipe to be close-on-exec so that no
children inherit it */
if (pmix_fd_set_cloexec(term_pipe[0]) != PRTE_SUCCESS
|| pmix_fd_set_cloexec(term_pipe[1]) != PRTE_SUCCESS) {
if (pmix_fd_set_cloexec(term_pipe[0]) != PRTE_SUCCESS ||
pmix_fd_set_cloexec(term_pipe[1]) != PRTE_SUCCESS) {
fprintf(stderr, "unable to set the pipe to CLOEXEC\n");
prte_progress_thread_finalize(NULL);
exit(1);
Expand Down
17 changes: 13 additions & 4 deletions src/tools/prun/help-prun.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
# Copyright (c) 2017-2020 Intel, Inc. All rights reserved.
# Copyright (c) 2021 Nanook Consulting. All rights reserved.
# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down Expand Up @@ -236,7 +236,16 @@ while attempting to start process rank %lu.
#
[prun:wdir-not-found]
%s was unable to launch the specified application as it could not
change to the specified working directory:
find the specified working directory:

Working directory: %s
Node: %s

while attempting to start process rank %lu.
#
[prun:wdir-not-accessible]
%s was unable to launch the specified application as it lacks
permissions to change to the specified working directory:

Working directory: %s
Node: %s
Expand All @@ -256,8 +265,8 @@ Node: %s
Executable: %s
#
[prun:exe-not-accessible]
%s was unable to launch the specified application as it could not access
or execute an executable:
%s was unable to launch the specified application as it lacked
permissions to execute an executable:

Executable: %s
Node: %s
Expand Down

0 comments on commit 9ff2c7f

Please sign in to comment.