Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update IOF redirection options #7162

Merged
merged 1 commit into from
Nov 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,8 @@ opal/mca/pmix/pmix*/openpmix/test/run_tests12.pl
opal/mca/pmix/pmix*/openpmix/test/run_tests13.pl
opal/mca/pmix/pmix*/openpmix/test/run_tests14.pl
opal/mca/pmix/pmix*/openpmix/test/run_tests15.pl
opal/mca/pmix/pmix*/openpmix/src/tools/wrapper/pmix.pc
opal/mca/pmix/pmix*/openpmix/src/tools/wrapper/pmixcc-wrapper-data.txt


opal/mca/pmix/ext4x/ext4x.c
Expand Down
3 changes: 3 additions & 0 deletions orte/mca/iof/base/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved.
# Copyright (c) 2019 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

dist_ortedata_DATA = base/help-iof-base.txt

headers += \
base/base.h \
base/iof_base_setup.h
Expand Down
31 changes: 31 additions & 0 deletions orte/mca/iof/base/help-iof-base.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2018 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
#
[unrecognized-directive]
The following directive was given to "--%s":

Directive: %s

This directive is not recognized. Please check your spelling
and/or use the "--help" option to find the supported values.
#
87 changes: 84 additions & 3 deletions orte/mca/iof/base/iof_base_setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
Expand Down Expand Up @@ -63,11 +63,13 @@
#include "opal/util/opal_environ.h"
#include "opal/util/os_dirpath.h"
#include "opal/util/output.h"
#include "opal/util/basename.h"
#include "opal/util/argv.h"
#include "opal/util/printf.h"

#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"

#include "orte/mca/iof/iof.h"
Expand Down Expand Up @@ -291,9 +293,9 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
char *p, **s;
bool usejobid = true;

/* see if we are to output to a file */
/* see if we are to output to a directory */
dirname = NULL;
if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_FILE, (void**)&dirname, OPAL_STRING) &&
if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, (void**)&dirname, OPAL_STRING) &&
NULL != dirname) {
np = jobdat->num_procs / 10;
/* determine the number of digits required for max vpid */
Expand All @@ -313,6 +315,12 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
usejobid = false;
} else if (0 == strcasecmp(s[i], "nocopy")) {
proct->copy = false;
} else {
orte_show_help("help-iof-base",
"unrecognized-directive",
true, "output-directory", s[i]);
opal_argv_free(s);
return ORTE_ERROR;
}
}
}
Expand Down Expand Up @@ -379,7 +387,80 @@ int orte_iof_base_setup_output_files(const orte_process_name_t* dst_name,
proct->revstddiag->sink = proct->revstderr->sink;
}
#endif
return ORTE_SUCCESS;
}

/* see if we are to output to a file */
dirname = NULL;
if (orte_get_attribute(&jobdat->attributes, ORTE_JOB_OUTPUT_TO_FILE, (void**)&dirname, OPAL_STRING) &&
NULL != dirname) {
np = jobdat->num_procs / 10;
/* determine the number of digits required for max vpid */
numdigs = 1;
while (np > 0) {
numdigs++;
np = np / 10;
}
/* check for a conditional in the directory name */
if (NULL != (p = strchr(dirname, ':'))) {
*p = '\0';
++p;
/* could me more than one directive */
s = opal_argv_split(p, ',');
for (i=0; NULL != s[i]; i++) {
if (0 == strcasecmp(s[i], "nocopy")) {
proct->copy = false;
} else {
orte_show_help("help-iof-base",
"unrecognized-directive",
true, "output-filename", s[i]);
opal_argv_free(s);
return ORTE_ERROR;
}
}
}

/* construct the directory where the output files will go */
outdir = opal_dirname(dirname);

/* ensure the directory exists */
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(outdir, S_IRWXU|S_IRGRP|S_IXGRP))) {
ORTE_ERROR_LOG(rc);
free(outdir);
return rc;
}
if (NULL != proct->revstdout && NULL == proct->revstdout->sink) {
/* setup the stdout sink */
opal_asprintf(&outfile, "%s.%d.%0*lu", dirname,
(int)ORTE_LOCAL_JOBID(proct->name.jobid),
numdigs, (unsigned long)proct->name.vpid);
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
free(outfile);
if (fdout < 0) {
/* couldn't be opened */
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
return ORTE_ERR_FILE_OPEN_FAILURE;
}
/* define a sink to that file descriptor */
ORTE_IOF_SINK_DEFINE(&proct->revstdout->sink, dst_name,
fdout, ORTE_IOF_STDOUTALL,
orte_iof_base_write_handler);
}

if (NULL != proct->revstderr && NULL == proct->revstderr->sink) {
/* we only create one file - all output goes there */
OBJ_RETAIN(proct->revstdout->sink);
proct->revstdout->sink->tag = ORTE_IOF_STDMERGE; // show that it is merged
proct->revstderr->sink = proct->revstdout->sink;
}
#if OPAL_PMIX_V1
if (NULL != proct->revstddiag && NULL == proct->revstddiag->sink) {
/* always tie the sink for stddiag to stderr */
OBJ_RETAIN(proct->revstderr->sink);
proct->revstddiag->sink = proct->revstderr->sink;
}
#endif
return ORTE_SUCCESS;
}
return ORTE_SUCCESS;
}
8 changes: 6 additions & 2 deletions orte/mca/schizo/ompi/schizo_ompi.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2011-2017 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2017 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 IBM Corporation. All rights reserved.
Expand Down Expand Up @@ -138,9 +138,13 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ "orte_timestamp_output", '\0', "timestamp-output", "timestamp-output", 0,
&orte_cmd_options.timestamp_output, OPAL_CMD_LINE_TYPE_BOOL,
"Timestamp all application process output", OPAL_CMD_LINE_OTYPE_OUTPUT },
{ "orte_output_directory", '\0', "output-directory", "output-directory", 1,
&orte_cmd_options.output_directory, OPAL_CMD_LINE_TYPE_STRING,
"Redirect output from application processes into filename/job/rank/std[out,err,diag]. A relative path value will be converted to an absolute path. The directory name may include a colon followed by a comma-delimited list of optional case-insensitive directives. Supported directives currently include NOJOBID (do not include a job-id directory level) and NOCOPY (do not copy the output to the stdout/err streams)",
OPAL_CMD_LINE_OTYPE_OUTPUT },
{ "orte_output_filename", '\0', "output-filename", "output-filename", 1,
&orte_cmd_options.output_filename, OPAL_CMD_LINE_TYPE_STRING,
"Redirect output from application processes into filename/job/rank/std[out,err,diag]. A relative path value will be converted to an absolute path",
"Redirect output from application processes into filename.rank. A relative path value will be converted to an absolute path. The directory name may include a colon followed by a comma-delimited list of optional case-insensitive directives. Supported directives currently include NOCOPY (do not copy the output to the stdout/err streams)",
OPAL_CMD_LINE_OTYPE_OUTPUT },
{ NULL, '\0', "merge-stderr-to-stdout", "merge-stderr-to-stdout", 0,
&orte_cmd_options.merge, OPAL_CMD_LINE_TYPE_BOOL,
Expand Down
11 changes: 10 additions & 1 deletion orte/orted/help-orted.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down Expand Up @@ -107,3 +107,12 @@ mechanisms.
You may wish to contact your debugger vendor to inquire about support
for PMIx-based debugger attachment mechanisms. Meantime, you can
disable this warning by setting the OMPI_MPIR_DO_NOT_WARN envar to 1.
#
[both-file-and-dir-set]
Both the "output-directory" and "output-filename" options have been
set:

Directory: %s
Filename: %s

Only one of these can be set - please fix the options and try again.
4 changes: 4 additions & 0 deletions orte/orted/orted_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
&orted_globals.singleton_died_pipe, OPAL_CMD_LINE_TYPE_INT,
"Watch on indicated pipe for singleton termination"},

{ "orte_output_directory", '\0', "output-directory", "output-directory", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Redirect output from application processes into filename/job/rank/std[out,err,diag]." },

{ "orte_output_filename", '\0', "output-filename", "output-filename", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Redirect output from application processes into filename.rank" },
Expand Down
32 changes: 27 additions & 5 deletions orte/orted/orted_submit.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata);

ORTE_DECLSPEC void __opal_attribute_optnone__ MPIR_Breakpoint(void);

/*
/*
* Attempt to prevent the compiler from optimizing out
* MPIR_Breakpoint().
*
Expand All @@ -199,7 +199,7 @@ volatile void* volatile orte_noop_mpir_breakpoint_ptr = NULL;
*/
void MPIR_Breakpoint(void)
{
/*
/*
* Actually do something with this pointer to make
* sure the compiler does not optimize out this function.
* The compiler should be forced to keep this
Expand Down Expand Up @@ -852,9 +852,16 @@ int orte_submit_job(char *argv[], int *index,
if (orte_cmd_options.timestamp_output) {
orte_set_attribute(&jdata->attributes, ORTE_JOB_TIMESTAMP_OUTPUT, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
}
/* if we were asked to output to files, pass it along */
if (NULL != orte_cmd_options.output_filename) {
/* if the given filename isn't an absolute path, then
/* cannot have both files and directory set for output */
if (NULL != orte_cmd_options.output_filename &&
NULL != orte_cmd_options.output_directory) {
orte_show_help("help-orted.txt", "both-file-and-dir-set", true,
orte_cmd_options.output_directory,
orte_cmd_options.output_filename);
return ORTE_ERR_FATAL;
} else if (NULL != orte_cmd_options.output_filename) {
/* if we were asked to output to files, pass it along.
* If the given filename isn't an absolute path, then
* convert it to one so the name will be relative to
* the directory where prun was given as that is what
* the user will have seen */
Expand All @@ -867,6 +874,21 @@ int orte_submit_job(char *argv[], int *index,
} else {
orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_FILE, ORTE_ATTR_GLOBAL, orte_cmd_options.output_filename, OPAL_STRING);
}
} else if (NULL != orte_cmd_options.output_directory) {
/* if we were asked to output to a directory, pass it along.
* If the given filename isn't an absolute path, then
* convert it to one so the name will be relative to
* the directory where prun was given as that is what
* the user will have seen */
if (!opal_path_is_absolute(orte_cmd_options.output_directory)) {
char cwd[OPAL_PATH_MAX], *path;
getcwd(cwd, sizeof(cwd));
path = opal_os_path(false, cwd, orte_cmd_options.output_directory, NULL);
orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, ORTE_ATTR_GLOBAL, path, OPAL_STRING);
free(path);
} else {
orte_set_attribute(&jdata->attributes, ORTE_JOB_OUTPUT_TO_DIRECTORY, ORTE_ATTR_GLOBAL, orte_cmd_options.output_directory, OPAL_STRING);
}
}
/* if we were asked to merge stderr to stdout, mark it so */
if (orte_cmd_options.merge) {
Expand Down
3 changes: 2 additions & 1 deletion orte/orted/orted_submit.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
Expand Down Expand Up @@ -90,6 +90,7 @@ struct orte_cmd_options_t {
bool debug;
bool tag_output;
bool timestamp_output;
char *output_directory;
char *output_filename;
bool merge;
bool continuous;
Expand Down
21 changes: 18 additions & 3 deletions orte/tools/orterun/orterun.1in
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.\" -*- nroff -*-
.\" Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved.
.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
.\" Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
.\" Copyright (c) 2017-2019 Intel, Inc. All rights reserved.
.\" Copyright (c) 2017 Los Alamos National Security, LLC. All rights
.\" reserved.
.\" $COPYRIGHT$
Expand Down Expand Up @@ -418,10 +418,25 @@ Redirect the stdout, stderr, and stddiag of all processes to a process-unique ve
the specified filename. Any directories in the filename will automatically be created.
Each output file will consist of filename.id, where the id will be the
processes' rank in MPI_COMM_WORLD, left-filled with
zero's for correct ordering in listings. A relative path value will be converted to an
zero's for correct ordering in listings. Both stdout and stderr will be redirected to the file.
A relative path value will be converted to an
absolute path based on the cwd where mpirun is executed. Note that this \fIwill not\fP work
on environments where the file system on compute nodes differs from that where mpirun
is executed.
is executed. This option accepts one case-insensitive directive, specified after a colon: NOCOPY
indicates that the output is not to be echoed to the terminal.
.
.
.TP
.B -output-directory\fR,\fP --output-directory \fR<path>\fP
Redirect the stdout, stderr, and stddiag of all processes to a process-unique location
consisting of "<path>/<jobid>/rank.id/stdout[err]", where the id will be the
processes' rank in MPI_COMM_WORLD, left-filled with
zero's for correct ordering in listings. Any directories in the filename will automatically be created.
A relative path value will be converted to an
absolute path based on the cwd where mpirun is executed. Note that this \fIwill not\fP work
on environments where the file system on compute nodes differs from that where mpirun
is executed. This option also supports two case-insensitive directives, specified in comma-delimited form after a colon: NOJOBID (omits the jobid directory layer) and NOCOPY (do
not copy the output to the terminal).
.
.
.TP
Expand Down
4 changes: 3 additions & 1 deletion orte/util/attr.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
Expand Down Expand Up @@ -384,6 +384,8 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key)
return "ORTE_APP_ADD_ENVAR";
case ORTE_JOB_APP_SETUP_DATA:
return "ORTE_JOB_APP_SETUP_DATA";
case ORTE_JOB_OUTPUT_TO_DIRECTORY:
return "ORTE_JOB_OUTPUT_TO_DIRECTORY";

case ORTE_PROC_NOBARRIER:
return "PROC-NOBARRIER";
Expand Down
5 changes: 3 additions & 2 deletions orte/util/attr.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -142,7 +142,7 @@ typedef uint16_t orte_job_flags_t;
#define ORTE_JOB_FIXED_DVM (ORTE_JOB_START_KEY + 42) // bool - do not change the size of the DVM for this job
#define ORTE_JOB_DVM_JOB (ORTE_JOB_START_KEY + 43) // bool - job is using a DVM
#define ORTE_JOB_CANCELLED (ORTE_JOB_START_KEY + 44) // bool - job was cancelled
#define ORTE_JOB_OUTPUT_TO_FILE (ORTE_JOB_START_KEY + 45) // string - name of directory to which stdout/err is to be directed
#define ORTE_JOB_OUTPUT_TO_FILE (ORTE_JOB_START_KEY + 45) // string - path to use as basename of files to which stdout/err is to be directed
#define ORTE_JOB_MERGE_STDERR_STDOUT (ORTE_JOB_START_KEY + 46) // bool - merge stderr into stdout stream
#define ORTE_JOB_TAG_OUTPUT (ORTE_JOB_START_KEY + 47) // bool - tag stdout/stderr
#define ORTE_JOB_TIMESTAMP_OUTPUT (ORTE_JOB_START_KEY + 48) // bool - timestamp stdout/stderr
Expand All @@ -159,6 +159,7 @@ typedef uint16_t orte_job_flags_t;
#define ORTE_JOB_APPEND_ENVAR (ORTE_JOB_START_KEY + 58) // opal_envar_t - append the specified value to the given envar
#define ORTE_JOB_ADD_ENVAR (ORTE_JOB_START_KEY + 59) // opal_envar_t - add envar, do not override pre-existing one
#define ORTE_JOB_APP_SETUP_DATA (ORTE_JOB_START_KEY + 60) // opal_byte_object_t - blob containing app setup data
#define ORTE_JOB_OUTPUT_TO_DIRECTORY (ORTE_JOB_START_KEY + 61) // tring - path of directory to which stdout/err is to be directed

#define ORTE_JOB_MAX_KEY 300

Expand Down