Let track authors decide on the default challenge

With this commit we add a new property `default` for tracks that allows to define the default challenge instead of relying on the magic name "append-no-conflicts". Closes #199
elastic · Jan 16, 2017 · 9efd5ab · 9efd5ab
1 parent 24061a7
commit 9efd5ab
Show file tree

Hide file tree

Showing 11 changed files with 290 additions and 26 deletions.
diff --git a/docs/adding_tracks.rst b/docs/adding_tracks.rst
@@ -107,8 +107,9 @@ The track repository is managed by git, so ensure that you are on the ``master``
       ],
       "challenges": [
         {
-          "name": "append-no-conflicts",
+          "name": "append-only",
           "description": "",
+          "default": true,
           "index-settings": {
             "index.number_of_replicas": 0
           },
@@ -138,7 +139,7 @@ Finally, you need to commit your changes: ``git commit -a -m "Add geonames track
 
 A few things to note:
 
-* Rally assumes that the challenge that should be run by default is called "append-no-conflicts". If you want to run a different challenge, provide the command line option ``--challenge=YOUR_CHALLENGE_NAME``.
+* If you define multiple challenges, Rally will run the challenge where ``default`` is set to ``true``. If you want to run a different challenge, provide the command line option ``--challenge=YOUR_CHALLENGE_NAME``.
 * You can add as many queries as you want. We use the `official Python Elasticsearch client <http://elasticsearch-py.readthedocs.org/>`_ to issue queries.
 * The numbers below the ``types`` property are needed to verify integrity and provide progress reports.
 
@@ -158,11 +159,11 @@ When you invoke ``esrally list tracks``, the new track should now appear::
                     /____/
     Available tracks:
     
-    Name        Description                                               Challenges
-    ----------  --------------------------------------------------------  -------------------
-    geonames    Standard benchmark in Rally (8.6M POIs from Geonames)     append-no-conflicts
+    Name        Description                                                           Default Challenge    All Challenges
+    ----------  --------------------------------------------------------------------  -------------------  --------------
+    geonames    Standard benchmark in Rally (8.6M POIs from Geonames)                 append-only          append-only
 
-Congratulations, you have created your first track! You can test it with ``esrally --track=geonames --offline`` (or whatever the name of your track is) and run specific challenges with ``esrally --track=geonames --challenge=append-fast-with-conflicts --offline``.
+Congratulations, you have created your first track! You can test it with ``esrally --track=geonames --offline`` (or whatever the name of your track is) and run specific challenges with ``esrally --track=geonames --challenge=append-only --offline``.
 
 If you want to share your track with the community, please read on.
 

diff --git a/docs/command_line_reference.rst b/docs/command_line_reference.rst
@@ -55,7 +55,7 @@ Selects the track that Rally should run. By default the ``geonames`` track is ru
 ``challenge``
 ~~~~~~~~~~~~~
 
-A track consists of one or more challenges. With this flag you can specify which challenge should be run.
+A track consists of one or more challenges. With this flag you can specify which challenge should be run. If you don't specify a challenge, Rally derives the default challenge itself. To see the default challenge of a track, run ``esrally list tracks``.
 
 ``car``
 ~~~~~~~

diff --git a/docs/track.rst b/docs/track.rst
@@ -190,6 +190,7 @@ Each challenge consists of the following properties:
 
 * ``name`` (mandatory): A descriptive name of the challenge. Should not contain spaces in order to simplify handling on the command line for users.
 * ``description`` (mandatory): A human readable description of the challenge.
+* ``default`` (optional): If true, Rally selects this challenge by default if the user did not specify a challenge on the command line. If your track only defines one challenge, it is implicitly selected as default, otherwise you need define ``"default": true`` on exactly one challenge.
 * ``index-settings`` (optional): Defines the index settings of the benchmark candidate when an index is created. Note that these settings are only applied if the index is auto-managed.
 * ``schedule`` (mandatory): Defines the concrete execution order of operations. It is described in more detail below.
 

diff --git a/esrally/mechanic/mechanic.py b/esrally/mechanic/mechanic.py
@@ -46,7 +46,9 @@ def __init__(self, cfg, supply, p, l):
         # TODO dm: Check whether we can remove this completely
         # ensure we don't mix ES installs
         track_name = self._config.opts("benchmarks", "track")
-        challenge_name = self._config.opts("benchmarks", "challenge")
+        # if the user does not specify, we just use a default ourselves (it's just about unique names). For backwards compatibility we will
+        # choose the only possible name where this option can be empty ("append-no-conflicts"). With Rally 0.5.0, this should all be gone.
+        challenge_name = self._config.opts("benchmarks", "challenge", default_value="append-no-conflicts")
         race_paths = paths.Paths(self._config)
         self._config.add(config.Scope.challenge, "system", "challenge.root.dir",
                          race_paths.challenge_root(track_name, challenge_name))

diff --git a/esrally/rally.py b/esrally/rally.py
@@ -280,9 +280,7 @@ def positive_number(v):
             default="geonames")
         p.add_argument(
             "--challenge",
-            help="define the challenge to use. List possible challenges for tracks with `%s list tracks`"
-                 " (default: append-no-conflicts)." % PROGRAM_NAME,
-            default="append-no-conflicts")  # optimized for local usage
+            help="define the challenge to use. List possible challenges for tracks with `%s list tracks`" % PROGRAM_NAME)
         p.add_argument(
             "--car",
             help="define the car to use. List possible cars with `%s list cars` (default: defaults)." % PROGRAM_NAME,
@@ -462,7 +460,7 @@ def dispatch_sub_command(cfg, sub_command):
         return True
     except exceptions.RallyError as e:
         logging.exception("Cannot run subcommand [%s]." % sub_command)
-        console.error("Cannot %s. %s." % (sub_command, e))
+        console.error("Cannot %s. %s" % (sub_command, e))
         console.println("")
         print_help_on_errors(cfg)
         return False

diff --git a/esrally/reporter.py b/esrally/reporter.py
@@ -192,7 +192,7 @@ def report(self, t):
             print_header("--------------------------------------------------")
             print_internal("")
 
-        selected_challenge = self._config.opts("benchmarks", "challenge")
+        selected_challenge = t.find_challenge_or_default(self._config.opts("benchmarks", "challenge"))
         for challenge in t.challenges:
             if challenge.name == selected_challenge:
                 stats = Stats(self._metrics_store, challenge, self._lap)

diff --git a/esrally/resources/track-schema.json b/esrally/resources/track-schema.json
@@ -213,6 +213,10 @@
             "type": "string",
             "description": "A descriptive name of the challenge. Should not contain spaces in order to simplify handling on the command line for users."
           },
+          "default": {
+            "type": "boolean",
+            "description": "If true, Rally should select this challenge as the default challenge if the user does not specify one on the command line."
+          },
           "meta": {
             "type": "object",
             "description": "Meta-information which will be added to each metrics-record of this challenge."

diff --git a/esrally/track/loader.py b/esrally/track/loader.py
@@ -47,8 +47,8 @@ def tracks(cfg):
 def list_tracks(cfg):
     console.println("Available tracks:\n")
     console.println(tabulate.tabulate(
-        tabular_data=[[t.name, t.short_description, ",".join(map(str, t.challenges))] for t in tracks(cfg)],
-        headers=["Name", "Description", "Challenges"]))
+        tabular_data=[[t.name, t.short_description, t.default_challenge, ",".join(map(str, t.challenges))] for t in tracks(cfg)],
+        headers=["Name", "Description", "Default Challenge", "All Challenges"]))
 
 
 def load_track(cfg):
@@ -513,12 +513,24 @@ def _create_type(self, type_spec, mapping_dir, data_dir):
     def _create_challenges(self, track_spec):
         ops = self.parse_operations(self._r(track_spec, "operations"))
         challenges = []
+        known_challenge_names = set()
+        default_challenge = None
+        number_of_challenges = len(self._r(track_spec, "challenges"))
         for challenge in self._r(track_spec, "challenges"):
             name = self._r(challenge, "name", error_ctx="challenges")
             description = self._r(challenge, "description", error_ctx=name)
             meta_data = self._r(challenge, "meta", error_ctx=name, mandatory=False)
+            # if we only have one challenge it is treated as default challenge, no matter what the user has specified
+            default = number_of_challenges == 1 or self._r(challenge, "default", error_ctx=name, mandatory=False)
             index_settings = self._r(challenge, "index-settings", error_ctx=name, mandatory=False)
 
+            if default and default_challenge is not None:
+                self._error("Both '%s' and '%s' are defined as default challenges. Please define only one of them as default."
+                            % (default_challenge.name, name))
+            if name in known_challenge_names:
+                self._error("Duplicate challenge with name '%s'." % name)
+            known_challenge_names.add(name)
+
             schedule = []
 
             for op in self._r(challenge, "schedule", error_ctx=name):
@@ -528,11 +540,20 @@ def _create_challenges(self, track_spec):
                     task = self.parse_task(op, ops, name)
                 schedule.append(task)
 
-            challenges.append(track.Challenge(name=name,
-                                              meta_data=meta_data,
-                                              description=description,
-                                              index_settings=index_settings,
-                                              schedule=schedule))
+            new_challenge = track.Challenge(name=name,
+                                            meta_data=meta_data,
+                                            description=description,
+                                            index_settings=index_settings,
+                                            default=default,
+                                            schedule=schedule)
+            if default:
+                default_challenge = new_challenge
+
+            challenges.append(new_challenge)
+
+        if challenges and default_challenge is None:
+            self._error("No default challenge specified. Please edit the track and add \"default\": true to one of the challenges %s."
+                        % ", ".join([c.name for c in challenges]))
         return challenges
 
     def parse_parallel(self, ops_spec, ops, challenge_name):
@@ -584,6 +605,8 @@ def parse_operations(self, ops_specs):
                 logger.info("Using user-provided operation type [%s] for operation [%s]." % (op_type_name, op_name))
                 op_type = op_type_name
             param_source = self._r(op_spec, "param-source", error_ctx="operations", mandatory=False)
+            if op_name in ops:
+                self._error("Duplicate operation with name '%s'." % op_name)
             try:
                 ops[op_name] = track.Operation(name=op_name, meta_data=meta_data, operation_type=op_type, params=op_spec,
                                                param_source=param_source)

diff --git a/esrally/track/track.py b/esrally/track/track.py
@@ -105,7 +105,8 @@ class Track:
     A track defines the data set that is used. It corresponds loosely to a use case (e.g. logging, event processing, analytics, ...)
     """
 
-    def __init__(self, name, short_description, description, source_root_url, meta_data=None, challenges=None, indices=None, templates=None):
+    def __init__(self, name, short_description, description, source_root_url=None, meta_data=None, challenges=None, indices=None,
+                 templates=None):
         """
 
         Creates a new track.
@@ -116,7 +117,8 @@ def __init__(self, name, short_description, description, source_root_url, meta_d
         :param source_root_url: The publicly reachable http URL of the root folder for this track (without a trailing slash). Directly
         below this URL the benchmark document files have to be located.
         :param meta_data: An optional dict of meta-data elements to attach to each metrics record. Default: {}.
-        :param challenges: A list of one or more challenges to use.
+        :param challenges: A list of one or more challenges to use. Precondition: If the list is non-empty it contains exactly one element
+        with its ``default`` property set to ``True``.
         :param indices: A list of indices for this track. May be None. One of `indices` or `templates` must be set though.
         :param templates: A list of index templates for this track. May be None. One of `indices` or `templates` must be set though.
         """
@@ -129,6 +131,20 @@ def __init__(self, name, short_description, description, source_root_url, meta_d
         self.indices = indices
         self.templates = templates
 
+    @property
+    def default_challenge(self):
+        for challenge in self.challenges:
+            if challenge.default:
+                return challenge
+        # This should only happen if we don't have any challenges
+        return None
+
+    def find_challenge_or_default(self, name):
+        for challenge in self.challenges:
+            if challenge.name == name:
+                return challenge
+        return self.default_challenge
+
     @property
     def number_of_documents(self):
         num_docs = 0
@@ -149,15 +165,15 @@ class Challenge:
     def __init__(self,
                  name,
                  description,
-                 index_settings,
+                 index_settings=None,
+                 default=False,
                  meta_data=None,
                  schedule=None):
-        if schedule is None:
-            schedule = []
         self.name = name
         self.meta_data = meta_data if meta_data else {}
         self.description = description
-        self.index_settings = index_settings
+        self.index_settings = index_settings if index_settings else {}
+        self.default = default
         self.schedule = schedule if schedule else []
 
     def __str__(self):