diff --git a/.gitignore b/.gitignore index 7d1352714d3..b10869595dc 100644 --- a/.gitignore +++ b/.gitignore @@ -418,6 +418,8 @@ opal/mca/pmix/pmix*/openpmix/test/run_tests12.pl opal/mca/pmix/pmix*/openpmix/test/run_tests13.pl opal/mca/pmix/pmix*/openpmix/test/run_tests14.pl opal/mca/pmix/pmix*/openpmix/test/run_tests15.pl +opal/mca/pmix/pmix*/openpmix/src/tools/wrapper/pmix.pc +opal/mca/pmix/pmix*/openpmix/src/tools/wrapper/pmixcc-wrapper-data.txt opal/mca/pmix/ext4x/ext4x.c diff --git a/orte/mca/iof/base/Makefile.am b/orte/mca/iof/base/Makefile.am index 0d08004c774..c4cdce552a3 100644 --- a/orte/mca/iof/base/Makefile.am +++ b/orte/mca/iof/base/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -17,6 +18,8 @@ # $HEADER$ # +dist_ortedata_DATA = base/help-iof-base.txt + headers += \ base/base.h \ base/iof_base_setup.h diff --git a/orte/mca/iof/base/help-iof-base.txt b/orte/mca/iof/base/help-iof-base.txt new file mode 100644 index 00000000000..8ad812959ee --- /dev/null +++ b/orte/mca/iof/base/help-iof-base.txt @@ -0,0 +1,31 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# +[unrecognized-directive] +The following directive was given to "--%s": + + Directive: %s + +This directive is not recognized. Please check your spelling +and/or use the "--help" option to find the supported values. +# diff --git a/orte/mca/iof/base/iof_base_setup.c b/orte/mca/iof/base/iof_base_setup.c index 1c3616078f2..a03fc03728c 100644 --- a/orte/mca/iof/base/iof_base_setup.c +++ b/orte/mca/iof/base/iof_base_setup.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -63,11 +63,13 @@ #include "opal/util/opal_environ.h" #include "opal/util/os_dirpath.h" #include "opal/util/output.h" +#include "opal/util/basename.h" #include "opal/util/argv.h" #include "opal/util/printf.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/name_fns.h" +#include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/iof/iof.h" @@ -291,9 +293,9 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name, char *p, **s; bool usejobid = true; - /* see if we are to output to a file */ + /* see if we are to output to a directory */ dirname = NULL; - if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_FILE, (void**)&dirname, OPAL_STRING) && + if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, (void**)&dirname, OPAL_STRING) && NULL != dirname) { np = jobdat->num_procs / 10; /* determine the number of digits required for max vpid */ @@ -313,6 +315,12 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name, usejobid = false; } else if (0 == strcasecmp(s[i], "nocopy")) { proct->copy = false; + } else { + orte_show_help("help-iof-base", + "unrecognized-directive", + true, "output-directory", s[i]); + opal_argv_free(s); + return ORTE_ERROR; } } } @@ -379,7 +387,80 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name, proct->revstddiag->sink = proct->revstderr->sink; } #endif + return ORTE_SUCCESS; } + /* see if we are to output to a file */ + dirname = NULL; + if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_FILE, (void**)&dirname, OPAL_STRING) && + NULL != dirname) { + np = jobdat->num_procs / 10; + /* determine the number of digits required for max vpid */ + numdigs = 1; + while (np > 0) { + numdigs++; + np = np / 10; + } + /* check for a conditional in the directory name */ + if (NULL != (p = strchr(dirname, ':'))) { + *p = '\0'; + ++p; + /* could me more than one directive */ + s = opal_argv_split(p, ','); + for (i=0; NULL != s[i]; i++) { + if (0 == strcasecmp(s[i], "nocopy")) { + proct->copy = false; + } else { + orte_show_help("help-iof-base", + "unrecognized-directive", + true, "output-filename", s[i]); + opal_argv_free(s); + return ORTE_ERROR; + } + } + } + + /* construct the directory where the output files will go */ + outdir = opal_dirname(dirname); + + /* ensure the directory exists */ + if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(outdir, S_IRWXU|S_IRGRP|S_IXGRP))) { + ORTE_ERROR_LOG(rc); + free(outdir); + return rc; + } + if (NULL != proct->revstdout && NULL == proct->revstdout->sink) { + /* setup the stdout sink */ + opal_asprintf(&outfile, "%s.%d.%0*lu", dirname, + (int)ORTE_LOCAL_JOBID(proct->name.jobid), + numdigs, (unsigned long)proct->name.vpid); + fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644); + free(outfile); + if (fdout < 0) { + /* couldn't be opened */ + ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); + return ORTE_ERR_FILE_OPEN_FAILURE; + } + /* define a sink to that file descriptor */ + ORTE_IOF_SINK_DEFINE(&proct->revstdout->sink, dst_name, + fdout, ORTE_IOF_STDOUTALL, + orte_iof_base_write_handler); + } + + if (NULL != proct->revstderr && NULL == proct->revstderr->sink) { + /* we only create one file - all output goes there */ + OBJ_RETAIN(proct->revstdout->sink); + proct->revstdout->sink->tag = ORTE_IOF_STDMERGE; // show that it is merged + proct->revstderr->sink = proct->revstdout->sink; + } +#if OPAL_PMIX_V1 + if (NULL != proct->revstddiag && NULL == proct->revstddiag->sink) { + /* always tie the sink for stddiag to stderr */ + OBJ_RETAIN(proct->revstderr->sink); + proct->revstddiag->sink = proct->revstderr->sink; + } +#endif + return ORTE_SUCCESS; + } return ORTE_SUCCESS; } diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 27a7420603a..af0a5049e76 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -14,7 +14,7 @@ * Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2017 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2017 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. @@ -138,9 +138,13 @@ static opal_cmd_line_init_t cmd_line_init[] = { { "orte_timestamp_output", '\0', "timestamp-output", "timestamp-output", 0, &orte_cmd_options.timestamp_output, OPAL_CMD_LINE_TYPE_BOOL, "Timestamp all application process output", OPAL_CMD_LINE_OTYPE_OUTPUT }, + { "orte_output_directory", '\0', "output-directory", "output-directory", 1, + &orte_cmd_options.output_directory, OPAL_CMD_LINE_TYPE_STRING, + "Redirect output from application processes into filename/job/rank/std[out,err,diag]. A relative path value will be converted to an absolute path. The directory name may include a colon followed by a comma-delimited list of optional case-insensitive directives. Supported directives currently include NOJOBID (do not include a job-id directory level) and NOCOPY (do not copy the output to the stdout/err streams)", + OPAL_CMD_LINE_OTYPE_OUTPUT }, { "orte_output_filename", '\0', "output-filename", "output-filename", 1, &orte_cmd_options.output_filename, OPAL_CMD_LINE_TYPE_STRING, - "Redirect output from application processes into filename/job/rank/std[out,err,diag]. A relative path value will be converted to an absolute path", + "Redirect output from application processes into filename.rank. A relative path value will be converted to an absolute path. The directory name may include a colon followed by a comma-delimited list of optional case-insensitive directives. Supported directives currently include NOCOPY (do not copy the output to the stdout/err streams)", OPAL_CMD_LINE_OTYPE_OUTPUT }, { NULL, '\0', "merge-stderr-to-stdout", "merge-stderr-to-stdout", 0, &orte_cmd_options.merge, OPAL_CMD_LINE_TYPE_BOOL, diff --git a/orte/orted/help-orted.txt b/orte/orted/help-orted.txt index 2f390d067a2..cba04c92494 100644 --- a/orte/orted/help-orted.txt +++ b/orte/orted/help-orted.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -107,3 +107,12 @@ mechanisms. You may wish to contact your debugger vendor to inquire about support for PMIx-based debugger attachment mechanisms. Meantime, you can disable this warning by setting the OMPI_MPIR_DO_NOT_WARN envar to 1. +# +[both-file-and-dir-set] +Both the "output-directory" and "output-filename" options have been +set: + + Directory: %s + Filename: %s + +Only one of these can be set - please fix the options and try again. diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index e30fc1b09af..2d0551ab56f 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -209,6 +209,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = { &orted_globals.singleton_died_pipe, OPAL_CMD_LINE_TYPE_INT, "Watch on indicated pipe for singleton termination"}, + { "orte_output_directory", '\0', "output-directory", "output-directory", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Redirect output from application processes into filename/job/rank/std[out,err,diag]." }, + { "orte_output_filename", '\0', "output-filename", "output-filename", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, "Redirect output from application processes into filename.rank" }, diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 8a700240c01..b0e34546400 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -176,7 +176,7 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata); ORTE_DECLSPEC void __opal_attribute_optnone__ MPIR_Breakpoint(void); -/* +/* * Attempt to prevent the compiler from optimizing out * MPIR_Breakpoint(). * @@ -199,7 +199,7 @@ volatile void* volatile orte_noop_mpir_breakpoint_ptr = NULL; */ void MPIR_Breakpoint(void) { - /* + /* * Actually do something with this pointer to make * sure the compiler does not optimize out this function. * The compiler should be forced to keep this @@ -852,9 +852,16 @@ int orte_submit_job(char *argv[], int *index, if (orte_cmd_options.timestamp_output) { orte_set_attribute(&jdata->attributes, ORTE_JOB_TIMESTAMP_OUTPUT, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); } - /* if we were asked to output to files, pass it along */ - if (NULL != orte_cmd_options.output_filename) { - /* if the given filename isn't an absolute path, then + /* cannot have both files and directory set for output */ + if (NULL != orte_cmd_options.output_filename && + NULL != orte_cmd_options.output_directory) { + orte_show_help("help-orted.txt", "both-file-and-dir-set", true, + orte_cmd_options.output_directory, + orte_cmd_options.output_filename); + return ORTE_ERR_FATAL; + } else if (NULL != orte_cmd_options.output_filename) { + /* if we were asked to output to files, pass it along. + * If the given filename isn't an absolute path, then * convert it to one so the name will be relative to * the directory where prun was given as that is what * the user will have seen */ @@ -867,6 +874,21 @@ int orte_submit_job(char *argv[], int *index, } else { orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_FILE, ORTE_ATTR_GLOBAL, orte_cmd_options.output_filename, OPAL_STRING); } + } else if (NULL != orte_cmd_options.output_directory) { + /* if we were asked to output to a directory, pass it along. + * If the given filename isn't an absolute path, then + * convert it to one so the name will be relative to + * the directory where prun was given as that is what + * the user will have seen */ + if (!opal_path_is_absolute(orte_cmd_options.output_directory)) { + char cwd[OPAL_PATH_MAX], *path; + getcwd(cwd, sizeof(cwd)); + path = opal_os_path(false, cwd, orte_cmd_options.output_directory, NULL); + orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, ORTE_ATTR_GLOBAL, path, OPAL_STRING); + free(path); + } else { + orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, ORTE_ATTR_GLOBAL, orte_cmd_options.output_directory, OPAL_STRING); + } } /* if we were asked to merge stderr to stdout, mark it so */ if (orte_cmd_options.merge) { diff --git a/orte/orted/orted_submit.h b/orte/orted/orted_submit.h index 873747697a4..79ce9202fb9 100644 --- a/orte/orted/orted_submit.h +++ b/orte/orted/orted_submit.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -90,6 +90,7 @@ struct orte_cmd_options_t { bool debug; bool tag_output; bool timestamp_output; + char *output_directory; char *output_filename; bool merge; bool continuous; diff --git a/orte/tools/orterun/orterun.1in b/orte/tools/orterun/orterun.1in index 1a6c6699ee4..e9987697dfa 100644 --- a/orte/tools/orterun/orterun.1in +++ b/orte/tools/orterun/orterun.1in @@ -1,7 +1,7 @@ .\" -*- nroff -*- .\" Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved. .\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" Copyright (c) 2017-2018 Intel, Inc. All rights reserved. +.\" Copyright (c) 2017-2019 Intel, Inc. All rights reserved. .\" Copyright (c) 2017 Los Alamos National Security, LLC. All rights .\" reserved. .\" $COPYRIGHT$ @@ -418,10 +418,25 @@ Redirect the stdout, stderr, and stddiag of all processes to a process-unique ve the specified filename. Any directories in the filename will automatically be created. Each output file will consist of filename.id, where the id will be the processes' rank in MPI_COMM_WORLD, left-filled with -zero's for correct ordering in listings. A relative path value will be converted to an +zero's for correct ordering in listings. Both stdout and stderr will be redirected to the file. +A relative path value will be converted to an absolute path based on the cwd where mpirun is executed. Note that this \fIwill not\fP work on environments where the file system on compute nodes differs from that where mpirun -is executed. +is executed. This option accepts one case-insensitive directive, specified after a colon: NOCOPY +indicates that the output is not to be echoed to the terminal. +. +. +.TP +.B -output-directory\fR,\fP --output-directory \fR\fP +Redirect the stdout, stderr, and stddiag of all processes to a process-unique location +consisting of "//rank.id/stdout[err]", where the id will be the +processes' rank in MPI_COMM_WORLD, left-filled with +zero's for correct ordering in listings. Any directories in the filename will automatically be created. +A relative path value will be converted to an +absolute path based on the cwd where mpirun is executed. Note that this \fIwill not\fP work +on environments where the file system on compute nodes differs from that where mpirun +is executed. This option also supports two case-insensitive directives, specified in comma-delimited form after a colon: NOJOBID (omits the jobid directory layer) and NOCOPY (do +not copy the output to the terminal). . . .TP diff --git a/orte/util/attr.c b/orte/util/attr.c index 0e42a90f8c9..cca47ffa3c9 100644 --- a/orte/util/attr.c +++ b/orte/util/attr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved @@ -384,6 +384,8 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key) return "ORTE_APP_ADD_ENVAR"; case ORTE_JOB_APP_SETUP_DATA: return "ORTE_JOB_APP_SETUP_DATA"; + case ORTE_JOB_OUTPUT_TO_DIRECTORY: + return "ORTE_JOB_OUTPUT_TO_DIRECTORY"; case ORTE_PROC_NOBARRIER: return "PROC-NOBARRIER"; diff --git a/orte/util/attr.h b/orte/util/attr.h index c8d2219993e..9e87211b0ca 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -142,7 +142,7 @@ typedef uint16_t orte_job_flags_t; #define ORTE_JOB_FIXED_DVM (ORTE_JOB_START_KEY + 42) // bool - do not change the size of the DVM for this job #define ORTE_JOB_DVM_JOB (ORTE_JOB_START_KEY + 43) // bool - job is using a DVM #define ORTE_JOB_CANCELLED (ORTE_JOB_START_KEY + 44) // bool - job was cancelled -#define ORTE_JOB_OUTPUT_TO_FILE (ORTE_JOB_START_KEY + 45) // string - name of directory to which stdout/err is to be directed +#define ORTE_JOB_OUTPUT_TO_FILE (ORTE_JOB_START_KEY + 45) // string - path to use as basename of files to which stdout/err is to be directed #define ORTE_JOB_MERGE_STDERR_STDOUT (ORTE_JOB_START_KEY + 46) // bool - merge stderr into stdout stream #define ORTE_JOB_TAG_OUTPUT (ORTE_JOB_START_KEY + 47) // bool - tag stdout/stderr #define ORTE_JOB_TIMESTAMP_OUTPUT (ORTE_JOB_START_KEY + 48) // bool - timestamp stdout/stderr @@ -159,6 +159,7 @@ typedef uint16_t orte_job_flags_t; #define ORTE_JOB_APPEND_ENVAR (ORTE_JOB_START_KEY + 58) // opal_envar_t - append the specified value to the given envar #define ORTE_JOB_ADD_ENVAR (ORTE_JOB_START_KEY + 59) // opal_envar_t - add envar, do not override pre-existing one #define ORTE_JOB_APP_SETUP_DATA (ORTE_JOB_START_KEY + 60) // opal_byte_object_t - blob containing app setup data +#define ORTE_JOB_OUTPUT_TO_DIRECTORY (ORTE_JOB_START_KEY + 61) // tring - path of directory to which stdout/err is to be directed #define ORTE_JOB_MAX_KEY 300